package eu.faircode.email; /* This file is part of FairEmail. FairEmail is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. FairEmail is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FairEmail. If not, see . Copyright 2018-2023 by Marcel Bokhorst (M66B) */ import android.content.Context; import android.net.Uri; import android.text.TextUtils; import android.util.Base64; import android.webkit.URLUtil; import androidx.annotation.NonNull; import androidx.annotation.Nullable; import androidx.core.net.MailTo; import androidx.core.util.PatternsCompat; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.IDN; import java.net.URLDecoder; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; public class UriHelper { // https://publicsuffix.org/ private static final HashSet suffixList = new HashSet<>(); // https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat private static final String SUFFIX_LIST_NAME = "public_suffix_list.dat"; // https://github.com/svenjacobs/leon // https://github.com/newhouse/url-tracking-stripper // https://maxchadwick.xyz/tracking-query-params-registry/ private static final List PARANOID_QUERY = Collections.unmodifiableList(Arrays.asList( // https://en.wikipedia.org/wiki/UTM_parameters "awt_a", // AWeber "awt_l", // AWeber "awt_m", // AWeber "icid", // Adobe "ef_id", // https://experienceleague.adobe.com/docs/advertising-cloud/integrations/analytics/mc/mc-ids.html "_ga", // Google Analytics "gclid", // Google "gclsrc", // Google ads "dclid", // DoubleClick (Google) "fbclid", // Facebook "igshid", // Instagram "msclkid", // https://help.ads.microsoft.com/apex/index/3/en/60000 "mc_cid", // MailChimp "mc_eid", // MailChimp "zanpid", // Zanox (Awin) "kclickid", // https://support.freespee.com/hc/en-us/articles/202577831-Kenshoo-integration "oly_anon_id", "oly_enc_id", // https://training.omeda.com/knowledge-base/olytics-product-outline/ "_openstat", // https://yandex.com/support/direct/statistics/url-tags.html "vero_conv", "vero_id", // https://help.getvero.com/cloud/articles/what-is-vero_id/ "wickedid", // https://help.wickedreports.com/how-to-manually-tag-a-facebook-ad-with-wickedid "yclid", // https://ads-help.yahoo.co.jp/yahooads/ss/articledetail?lan=en&aid=20442 "__s", // https://ads-help.yahoo.co.jp/yahooads/ss/articledetail?lan=en&aid=20442 "guccounter", "guce_referrer", "guce_referrer_sig", // Yahoo "rb_clickid", // Russian "s_cid", // https://help.goacoustic.com/hc/en-us/articles/360043311613-Track-lead-sources "ml_subscriber", "ml_subscriber_hash", // https://www.mailerlite.com/help/how-to-integrate-your-forms-to-a-wix-website "twclid", // https://business.twitter.com/en/blog/performance-advertising-on-twitter.html "gbraid", "wbraid", // https://support.google.com/google-ads/answer/10417364 "_hsenc", "__hssc", "__hstc", "__hsfp", "hsCtaTracking" // https://knowledge.hubspot.com/reports/what-cookies-does-hubspot-set-in-a-visitor-s-browser )); // https://github.com/snarfed/granary/blob/master/granary/facebook.py#L1789 private static final List FACEBOOK_WHITELIST_PATH = Collections.unmodifiableList(Arrays.asList( "/nd/", "/n/", "/story.php" )); private static final List FACEBOOK_WHITELIST_QUERY = Collections.unmodifiableList(Arrays.asList( "story_fbid", "fbid", "id", "comment_id" )); static String getParentDomain(Context context, String host) { if (host == null) return null; int dot = host.indexOf('.'); if (dot < 0) return null; String parent = host.substring(dot + 1); String tld = getTld(context, host); if (tld == null || tld.equals(parent) || parent.length() < tld.length()) return null; return parent; } static String getRootDomain(Context context, String host) { if (host == null) return null; String tld = getTld(context, host); if (tld == null) return null; if (tld.equalsIgnoreCase(host)) return null; int len = host.length() - tld.length() - 1; if (len < 0) { Log.e("getRootDomain host=" + host + " tld=" + tld); return null; } int dot = host.substring(0, len).lastIndexOf('.'); if (dot < 0) return host; return host.substring(dot + 1); } static boolean isTld(Context context, String host) { if (host == null) return false; String tld = getTld(context, host); return (tld != null && tld.equals(host)); } static boolean hasTld(Context context, String host) { return (host != null && getTld(context, host) != null); } static String getTld(Context context, @NonNull String host) { ensureSuffixList(context); String eval = host.toLowerCase(Locale.ROOT); while (true) { int d = eval.indexOf('.'); String w = (d < 0 ? null : '*' + eval.substring(d)); synchronized (suffixList) { if (suffixList.contains(eval)) return eval; if (suffixList.contains(w)) if (suffixList.contains('!' + eval)) return eval.substring(d + 1); else return eval; } int dot = eval.indexOf('.'); if (dot < 0) return null; eval = eval.substring(dot + 1); } } static String getEmailUser(String address) { if (address == null) return null; int at = address.indexOf('@'); if (at > 0) return address.substring(0, at); return null; } static String getEmailDomain(String address) { if (address == null) return null; int at = address.indexOf('@'); if (at > 0) return address.substring(at + 1); return null; } static @NonNull Uri guessScheme(@NonNull Uri uri) { if (uri.getScheme() != null) return uri; String url = uri.toString(); if (Helper.EMAIL_ADDRESS.matcher(url).matches()) return Uri.parse("mailto:" + url); else if (PatternsCompat.IP_ADDRESS.matcher(url).matches()) return Uri.parse("https://" + url); else if (android.util.Patterns.PHONE.matcher(url).matches()) // Patterns.PHONE (\+[0-9]+[\- \.]*)?(\([0-9]+\)[\- \.]*)?([0-9][0-9\- \.]+[0-9]) // PhoneNumberUtils.isGlobalPhoneNumber() [\+]?[0-9.-]+ return Uri.parse("tel:" + url); else { Uri g = Uri.parse(URLUtil.guessUrl(url)); String scheme = g.getScheme(); if (scheme == null) return uri; else if ("http".equals(scheme)) scheme = "https"; return Uri.parse(scheme + "://" + url); } } static int getSuffixCount(Context context) { ensureSuffixList(context); synchronized (suffixList) { return suffixList.size(); } } private static void ensureSuffixList(Context context) { synchronized (suffixList) { if (suffixList.size() > 0) return; Log.i("Reading " + SUFFIX_LIST_NAME); try (InputStream is = context.getAssets().open(SUFFIX_LIST_NAME)) { BufferedReader br = new BufferedReader(new InputStreamReader((is))); String line; while ((line = br.readLine()) != null) { line = line.trim(); if (TextUtils.isEmpty(line)) continue; if (line.startsWith("//")) continue; suffixList.add(line); try { String ascii = IDN.toASCII(line, IDN.ALLOW_UNASSIGNED); if (!line.equals(ascii)) suffixList.add(line); } catch (Throwable ex) { Log.e(ex); } } Log.i(SUFFIX_LIST_NAME + "=" + suffixList.size()); } catch (Throwable ex) { Log.e(ex); } } } static Uri sanitize(Context context, Uri uri) { if (uri.isOpaque()) return uri; Uri url; boolean changed = false; if (uri.getHost() != null && uri.getHost().endsWith("safelinks.protection.outlook.com") && !TextUtils.isEmpty(uri.getQueryParameter("url"))) { Uri result = Uri.parse(uri.getQueryParameter("url")); changed = (result != null && isHyperLink(result)); url = (changed ? result : uri); } else if ("https".equals(uri.getScheme()) && "smex-ctp.trendmicro.com".equals(uri.getHost()) && "/wis/clicktime/v1/query".equals(uri.getPath()) && !TextUtils.isEmpty(uri.getQueryParameter("url"))) { Uri result = Uri.parse(uri.getQueryParameter("url")); changed = (result != null && isHyperLink(result)); url = (changed ? result : uri); } else if ("https".equals(uri.getScheme()) && "www.google.com".equals(uri.getHost()) && uri.getPath() != null && uri.getPath().startsWith("/amp/")) { // https://blog.amp.dev/2017/02/06/whats-in-an-amp-url/ Uri result = null; String u = uri.toString(); u = u.replace("https://www.google.com/amp/", ""); int p = u.indexOf("/"); while (p > 0) { String segment = u.substring(0, p); if (segment.contains(".")) { result = Uri.parse("https://" + u); break; } u = u.substring(p + 1); p = u.indexOf("/"); } changed = (result != null && isHyperLink(result)); url = (changed ? result : uri); } else if ("https".equals(uri.getScheme()) && uri.getHost() != null && uri.getHost().startsWith("www.google.") && uri.getQueryParameter("url") != null) { // Google non-com redirects Uri result = Uri.parse(uri.getQueryParameter("url")); changed = (result != null && isHyperLink(result)); url = (changed ? result : uri); } else if (uri.getPath() != null && uri.getPath().startsWith("/track/click") && uri.getQueryParameter("p") != null) { Uri result = null; try { // Mandrill String p = new String(Base64.decode(uri.getQueryParameter("p"), Base64.URL_SAFE)); JSONObject json = new JSONObject(p); json = new JSONObject(json.getString("p")); result = Uri.parse(json.getString("url")); } catch (Throwable ex) { Log.i(ex); } changed = (result != null && isHyperLink(result)); url = (changed ? result : uri); } else if (uri.getHost() != null && uri.getHost().endsWith(".awstrack.me")) { // https://docs.aws.amazon.com/ses/latest/dg/configure-custom-open-click-domains.html String path = uri.getPath(); int s = (path == null ? -1 : path.indexOf('/', 1)); Uri result = (s > 0 ? Uri.parse(path.substring(s + 1)) : null); changed = (result != null && isHyperLink(result)); url = (changed ? result : uri); } else { Uri result = getBraveDebounce(context, uri); if (result == null && uri.getQueryParameter("redirectUrl") != null) // https://.../link-tracker?redirectUrl=&sig=...&iat=...&a=...&account=...&email=...&s=...&i=... try { byte[] bytes = Base64.decode(uri.getQueryParameter("redirectUrl"), Base64.URL_SAFE); String u = URLDecoder.decode(new String(bytes), StandardCharsets.UTF_8.name()); result = Uri.parse(u); } catch (Throwable ex) { Log.i(ex); } changed = (result != null && isHyperLink(result)); url = (changed ? result : uri); } if (!changed) { // Sophos Email Appliance // http:///? Uri result = null; try { if (uri.getQueryParameterNames().size() == 1) { String key = uri.getQueryParameterNames().iterator().next(); if (TextUtils.isEmpty(uri.getQueryParameter(key))) { String data = new String(Base64.decode(key, Base64.URL_SAFE)); int v = data.indexOf("ver="); int u = data.indexOf("&&url="); if (v == 0 && u > 0) result = Uri.parse(URLDecoder.decode(data.substring(u + 6), StandardCharsets.UTF_8.name())); } } } catch (Throwable ex) { Log.i(ex); } changed = (result != null && isHyperLink(result)); url = (changed ? result : uri); } if (!changed) { // go.dhlparcel.nl and others // Try base64 last path segment Uri result = null; String path = uri.getPath(); try { if (path != null) { int s = path.lastIndexOf('/'); if (s > 0) { String b = new String(Base64.decode(path.substring(s + 1), Base64.URL_SAFE)); result = Uri.parse(b); } } } catch (Throwable ex) { Log.i(ex); } changed = (result != null && isHyperLink(result)); url = (changed ? result : uri); } if (url.isOpaque() || !isHyperLink(url)) return uri; if (BuildConfig.DEBUG) { Uri result = filterAdguard(context, url); if (result != null) { changed = true; url = result; } } Uri.Builder builder = url.buildUpon(); builder.clearQuery(); String host = uri.getHost(); String path = uri.getPath(); if (host != null) host = host.toLowerCase(Locale.ROOT); if (path != null) path = path.toLowerCase(Locale.ROOT); List clean = getBraveClean(context, url); boolean first = "www.facebook.com".equals(host); for (String key : url.getQueryParameterNames()) { // https://en.wikipedia.org/wiki/UTM_parameters // https://docs.oracle.com/en/cloud/saas/marketing/eloqua-user/Help/EloquaAsynchronousTrackingScripts/EloquaTrackingParameters.htm String lkey = key.toLowerCase(Locale.ROOT); if (PARANOID_QUERY.contains(lkey) || lkey.startsWith("utm_") || lkey.startsWith("elq") || ((host != null && host.endsWith("facebook.com")) && !first && FACEBOOK_WHITELIST_PATH.contains(path) && !FACEBOOK_WHITELIST_QUERY.contains(lkey)) || ("store.steampowered.com".equals(host) && "snr".equals(lkey)) || (clean != null && clean.contains(key))) changed = true; else if (!TextUtils.isEmpty(key)) for (String value : url.getQueryParameters(key)) { Log.i("Query " + key + "=" + value); Uri suri = Uri.parse(value); if (suri != null && isHyperLink(suri)) { Uri s = sanitize(context, suri); return (s == null ? suri : s); } builder.appendQueryParameter(key, value); } first = false; } return (changed ? builder.build() : null); } @Nullable private static Uri filterAdguard(Context context, Uri uri) { if (uri.isOpaque()) return null; String host = uri.getHost(); if (TextUtils.isEmpty(host)) return null; List removes = new ArrayList<>(); // https://github.com/AdguardTeam/FiltersRegistry/blob/master/filters/filter_17_TrackParam/filter.txt try (BufferedReader br = new BufferedReader( new InputStreamReader(context.getAssets().open("adguard_filter.txt")))) { String line; while ((line = br.readLine()) != null) { if (TextUtils.isEmpty(line) || line.startsWith("!")) continue; int dollar = line.indexOf('$'); while (dollar > 0 && line.charAt(dollar - 1) == '\\') dollar = line.indexOf('$', dollar + 1); if (dollar < 0) continue; String expr = line.substring(0, dollar).replace("\\$", "$"); String rest = line.substring(dollar + 1); List commands = new ArrayList<>(); int start = 0; while (start < rest.length()) { int comma = rest.indexOf(',', start); while (comma > 0 && rest.charAt(comma - 1) == '\\') comma = rest.indexOf(',', comma + 1); int end = (comma < 0 ? rest.length() : comma); commands.add(rest.substring(start, end).replace("\\,", ",")); start = (comma < 0 ? end : end + 1); } String remove = null; boolean matches = true; for (String command : commands) { int equal = command.indexOf('='); String c = (equal < 0 ? command : command.substring(0, equal)); String e = (equal < 0 ? "" : command.substring(equal + 1)); if ("removeparam".equals(c)) remove = e; else if ("domain".equals(c)) { // https://adguard.com/kb/general/ad-filtering/create-own-filters/#domain-modifier matches = false; List domains = new ArrayList<>(); start = 0; while (start < e.length()) { int pipe = e.indexOf('|', start); while (pipe > 0 && e.charAt(pipe - 1) == '\\') pipe = e.indexOf('|', pipe + 1); int end = (pipe < 0 ? e.length() : pipe); domains.add(e.substring(start, end).replace("\\|", "|")); start = (pipe < 0 ? end : end + 1); } for (String domain : domains) { boolean not = domain.startsWith("~"); String d = (not ? domain.substring(1) : domain); if (d.contains("*") && !d.endsWith("*")) Log.w("Adguard unexpected domain=" + domain); if (d.endsWith("*")) matches = host.startsWith(d.substring(0, d.length() - 1)); else matches = host.equals(d); if (matches) Log.w("Adguard domain=" + domain + " host=" + host); if (not) matches = !matches; if (matches) break; } } } if (remove == null /* no removeparam */ || !matches) continue; boolean except = false; matches = TextUtils.isEmpty(expr); if (!matches) { if (expr.startsWith("@@")) { except = true; expr = expr.substring(2); } String u = uri.toString(); if (expr.startsWith("||")) { int ss = u.indexOf("//"); if (ss > 0) u = u.substring(ss + 2); expr = expr.substring(2); } // https://adguard.com/kb/general/ad-filtering/create-own-filters/#basic-rules-special-characters StringBuilder b = new StringBuilder(); b.append(".*"); for (char c : expr.toCharArray()) if (c == '*') b.append(".*"); else if (c == '^') b.append("[^0-9a-zA-Z\\_\\-\\.\\%]"); else if (c == '|') Log.w("Adguard unexpected expr=" + expr); else { if ("\\.?![]{}()<>*+-=^$|".indexOf(c) >= 0) b.append("\\"); b.append(c); } if (!expr.endsWith("*")) b.append(".*"); matches = Pattern.compile(b.toString()).matcher(u).matches(); if (matches) Log.w("Adguard expr=" + b + " remove=" + remove); } if (matches) if (except) removes.clear(); else if (!removes.contains(remove)) removes.add(remove); } } catch (Throwable ex) { Log.e(ex); } try { boolean changed = false; Uri.Builder builder = uri.buildUpon(); builder.clearQuery(); if (removes.contains("") /* all */) changed = true; else for (String key : uri.getQueryParameterNames()) { boolean omit = false; for (String remove : removes) if (remove.startsWith("/")) { int end = remove.indexOf('/', 1); if (end > 0) { String regex = remove.substring(1, end); String rest = remove.substring(end + 1); if (!TextUtils.isEmpty(rest)) Log.w("Adguard unexpected remove=" + remove); if (Pattern.compile(regex).matcher(key).matches()) { omit = true; Log.w("Adguard omit regex=" + regex); break; } } } else if (remove.equals(key)) { omit = true; Log.w("Adguard omit key=" + key); break; } if (omit) changed = true; else for (String value : uri.getQueryParameters(key)) builder.appendQueryParameter(key, value); } return (changed ? builder.build() : null); } catch (Throwable ex) { Log.e(ex); return null; } } @Nullable private static List getBraveClean(Context context, Uri uri) { // https://github.com/brave/adblock-lists/blob/master/brave-lists/clean-urls.json try (InputStream is = context.getAssets().open("clean-urls.json")) { String json = Helper.readStream(is); JSONArray jclean = new JSONArray(json); for (int i = 0; i < jclean.length(); i++) { JSONObject jitem = jclean.getJSONObject(i); JSONArray jinclude = jitem.getJSONArray("include"); JSONArray jexclude = jitem.getJSONArray("exclude"); boolean include = false; for (int j = 0; j < jinclude.length(); j++) if (Pattern.matches(escapeStar(jinclude.getString(j)), uri.toString())) { include = true; break; } if (include) for (int j = 0; j < jexclude.length(); j++) if (Pattern.matches(escapeStar(jexclude.getString(j)), uri.toString())) { include = false; break; } if (include) { JSONArray jparams = jitem.getJSONArray("params"); List result = new ArrayList<>(); for (int j = 0; j < jparams.length(); j++) result.add(jparams.getString(j)); return result; } } } catch (Throwable ex) { Log.e(ex); } return null; } @Nullable private static Uri getBraveDebounce(Context context, Uri uri) { // https://github.com/brave/adblock-lists/blob/master/brave-lists/debounce.json try (InputStream is = context.getAssets().open("debounce.json")) { String json = Helper.readStream(is); JSONArray jbounce = new JSONArray(json); for (int i = 0; i < jbounce.length(); i++) { JSONObject jitem = jbounce.getJSONObject(i); JSONArray jinclude = jitem.getJSONArray("include"); JSONArray jexclude = jitem.getJSONArray("exclude"); boolean include = false; for (int j = 0; j < jinclude.length(); j++) if (Pattern.matches(escapeStar(jinclude.getString(j)), uri.toString())) { include = true; break; } if (include) for (int j = 0; j < jexclude.length(); j++) if (Pattern.matches(escapeStar(jexclude.getString(j)), uri.toString())) { include = false; break; } if (include) { String action = jitem.getString("action"); if ("redirect".equals(action) || "base64,redirect".equals(action)) { String name = jitem.getString("param"); String param = uri.getQueryParameter(name); if (!TextUtils.isEmpty(param)) try { if ("base64,redirect".equals(action)) param = new String(Base64.decode(param, Base64.NO_PADDING)); return Uri.parse(param); } catch (Throwable ex) { Log.w(ex); } } else if ("regex-path".equals(action)) { String regex = jitem.getString("param"); String prepend = jitem.optString("prepend_scheme"); String path = uri.getPath(); if (!TextUtils.isEmpty(path)) { Matcher m = Pattern.compile(regex).matcher(path); if (m.matches()) { String param = m.group(1); if (!TextUtils.isEmpty(prepend)) param = prepend + "://" + param; return Uri.parse(param); } } } } } } catch (Throwable ex) { Log.e(ex); } return null; } private static String escapeStar(String regex) { for (char kar : "\\.?![]{}()<>*+-=^$|".toCharArray()) if (kar != '*') regex = regex.replace("" + kar, "\\" + kar); return regex.replace("*", ".*"); } static Uri secure(Uri uri, boolean https) { String scheme = uri.getScheme(); if (https ? "http".equals(scheme) : "https".equals(scheme)) { Uri.Builder builder = uri.buildUpon(); builder.scheme(https ? "https" : "http"); String authority = uri.getEncodedAuthority(); if (authority != null) { authority = authority.replace(https ? ":80" : ":443", https ? ":443" : ":80"); builder.encodedAuthority(authority); } return builder.build(); } else return uri; } static boolean isSecure(Uri uri) { return (!uri.isOpaque() && "https".equalsIgnoreCase(uri.getScheme())); } static boolean isHyperLink(Uri uri) { return (!uri.isOpaque() && ("http".equalsIgnoreCase(uri.getScheme()) || "https".equalsIgnoreCase(uri.getScheme()))); } static Uri fix(Uri uri) { if ((!"http".equals(uri.getScheme()) && "http".equalsIgnoreCase(uri.getScheme())) || (!"https".equals(uri.getScheme()) && "https".equalsIgnoreCase(uri.getScheme()))) { String u = uri.toString(); int semi = u.indexOf(':'); if (semi > 0) return Uri.parse(u.substring(0, semi).toLowerCase(Locale.ROOT) + u.substring(semi)); } return uri; } static String getHost(Uri uri) { if ("mailto".equalsIgnoreCase(uri.getScheme())) { MailTo email = MailTo.parse(uri.toString()); return getEmailDomain(email.getTo()); } else return uri.getHost(); } static void test(Context context) { String[] hosts = new String[]{ "child.parent.example.com", "parent.example.com", "example.com", "com", "child.parent.co.uk", "parent.co.uk", "co.uk", "uk", "child.parent.aaa.ck", "parent.aaa.ck", "aaa.ck", "ck", "child.parent.www.ck", "parent.www.ck", "www.ck", "ck" }; for (String host : hosts) Log.i("PSL " + host + ":" + " tld=" + getTld(context, host) + " root=" + getRootDomain(context, host) + " parent=" + getParentDomain(context, host)); } }