From 3d119f01e2d8c51d1e310a2b1bb39fa798f08359 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kevin=20R=C3=B6bert?= Date: Thu, 24 Aug 2017 00:16:07 +0200 Subject: [PATCH] Add new rules and rewrite old rules + New rules for: + youtube + facebook + imdb + adsensecustomsearchads + new global rules Rewrite old rules regex after the = from "[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?" to "[^&]*(\\?|&(amp;)?)?", because it is stronger. --- data/data.json | 111 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 33 deletions(-) diff --git a/data/data.json b/data/data.json index 6f618b4..da78a76 100644 --- a/data/data.json +++ b/data/data.json @@ -4,17 +4,16 @@ "urlPattern": "(https:\\/\\/||http:\\/\\/).*(\\.amazon\\.)\\w{2,}\\/.*", "completeProvider": false, "rules": [ - "pf_rd_[a-zA-Z]=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "qid=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "sr=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "srs=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - ".*(amazon-adsystem\\.com)\\/.*", + "pf_rd_[a-zA-Z]=[^&]*(\\?|&(amp;)?)?", + "qid=[^&]*(\\?|&(amp;)?)?", + "sr=[^&]*(\\?|&(amp;)?)?", + "srs=[^&]*(\\?|&(amp;)?)?", ".*(adsensecustomsearchads\\.com)\\/.*", - "pd_rd_[a-zA-Z]*=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "__mk_[a-zA-Z]{1,3}_[a-zA-Z]{1,3}=[a-zA-Z0-9\\-\\.\\_\\%]*[\\?|&]?", - "url=[a-zA-Z0-9\\-\\.\\_\\%]*[\\?|&]?", - "spIA=[a-zA-Z0-9\\-\\.\\_\\%]*[\\?|&]?", - "rh=[a-zA-Z0-9\\-\\.\\_\\%]*[\\?|&]?" + "pd_rd_[a-zA-Z]*=[^&]*(\\?|&(amp;)?)?", + "__mk_[a-zA-Z]{1,3}_[a-zA-Z]{1,3}=[^&]*(\\?|&(amp;)?)?", + "url=[^&]*(\\?|&(amp;)?)?", + "spIA=[^&]*(\\?|&(amp;)?)?", + "rh=[^&]*(\\?|&(amp;)?)?" ], "exceptions": [ ".*(amazon\\.)\\w{2,}(\/gp\/).*\\/redirector.html\\/.*" @@ -24,27 +23,27 @@ "urlPattern": "(https:\\/\\/||http:\\/\\/).*(\\.google\\.)\\w{2,}\\/.*", "completeProvider": false, "rules": [ - "ved=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "bi[a-zA-Z]*=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "gfe_[a-zA-Z]*=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "ei=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "source=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "gs_[a-zA-Z]*=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "site=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "&\\.[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "oq=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "esrc=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "uact=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "cd=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "cad=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "gws_[a-zA-Z]*=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "im[a-zA-Z]*=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "atyp=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "vet=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "zx=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "_u=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "je=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?", - "[a-zA-Z\\_]+id=[a-zA-Z0-9\\-\\.\\_]*[\\?|&]?" + "ved=[^&]*(\\?|&(amp;)?)?", + "bi[a-zA-Z]*=[^&]*(\\?|&(amp;)?)?", + "gfe_[a-zA-Z]*=[^&]*(\\?|&(amp;)?)?", + "ei=[^&]*(\\?|&(amp;)?)?", + "source=[^&]*(\\?|&(amp;)?)?", + "gs_[a-zA-Z]*=[^&]*(\\?|&(amp;)?)?", + "site=[^&]*(\\?|&(amp;)?)?", + "&\\.[^&]*(\\?|&(amp;)?)?", + "oq=[^&]*(\\?|&(amp;)?)?", + "esrc=[^&]*(\\?|&(amp;)?)?", + "uact=[^&]*(\\?|&(amp;)?)?", + "cd=[^&]*(\\?|&(amp;)?)?", + "cad=[^&]*(\\?|&(amp;)?)?", + "gws_[a-zA-Z]*=[^&]*(\\?|&(amp;)?)?", + "im[a-zA-Z]*=[^&]*(\\?|&(amp;)?)?", + "atyp=[^&]*(\\?|&(amp;)?)?", + "vet=[^&]*(\\?|&(amp;)?)?", + "zx=[^&]*(\\?|&(amp;)?)?", + "_u=[^&]*(\\?|&(amp;)?)?", + "je=[^&]*(\\?|&(amp;)?)?", + "[a-zA-Z\\_]+id=[^&]*(\\?|&(amp;)?)?" ], "exceptions": [] }, @@ -60,11 +59,26 @@ "rules": [], "exceptions": [] }, - "utm": { + "globalRules": { "urlPattern": ".*", "completeProvider": false, "rules": [ - "utm_[a-zA-Z]*=.*[\\?|&]?" + "utm_[a-zA-Z]*=[^&]*(\\?|&(amp;)?)?", + "ga_source=[^&]*(\\?|&(amp;)?)?", + "ga_medium=[^&]*(\\?|&(amp;)?)?", + "ga_term=[^&]*(\\?|&(amp;)?)?", + "ga_content=[^&]*(\\?|&(amp;)?)?", + "ga_campaign=[^&]*(\\?|&(amp;)?)?", + "ga_place=[^&]*(\\?|&(amp;)?)?", + "yclid=[^&]*(\\?|&(amp;)?)?", + "_openstat=[^&]*(\\?|&(amp;)?)?", + "fb_action_ids=[^&]*(\\?|&(amp;)?)?", + "fb_action_types=[^&]*(\\?|&(amp;)?)?", + "fb_ref=[^&]*(\\?|&(amp;)?)?", + "fb_source=[^&]*(\\?|&(amp;)?)?", + "action_object_map=[^&]*(\\?|&(amp;)?)?", + "action_type_map=[^&]*(\\?|&(amp;)?)?", + "action_ref_map=[^&]*(\\?|&(amp;)?)?" ], "exceptions": [] }, @@ -91,6 +105,37 @@ "completeProvider": true, "rules": [], "exceptions": [] + }, + "adsensecustomsearchads": { + "urlPattern": ".*(adsensecustomsearchads).*", + "completeProvider": true, + "rules": [], + "exceptions": [] + }, + "youtube": { + "urlPattern": "(https:\\/\\/||http:\\/\\/).*(\\.youtube\\.)\\w{2,}\\/.*", + "completeProvider": false, + "rules": [ + "feature=[^&]*(\\?|&(amp;)?)?" + ], + "exceptions": [] + }, + "facebook": { + "urlPattern": "(https:\\/\\/||http:\\/\\/).*(\\.facebook\\.)\\w{2,}\\/.*", + "completeProvider": false, + "rules": [ + "[a-zA-Z]*ref=[^&]*(\\?|&(amp;)?)?", + "hc_location=[^&]*(\\?|&(amp;)?)?" + ], + "exceptions": [] + }, + "imdb": { + "urlPattern": "(https:\\/\\/||http:\\/\\/).*(\\.imdb\\.com)\\/.*", + "completeProvider": false, + "rules": [ + "ref_=[^&]*(\\?|&(amp;)?)?" + ], + "exceptions": [] } } } \ No newline at end of file