Regex Fix

Real domains are now recognized, not domain names.

Only URLs are examined, which also contain fields that could be cleaned. Each URL containing fields has at least one "?"
This commit is contained in:
Kevin Röbert 2018-06-23 01:26:04 +02:00
parent 525734c8c2
commit e1aa0f03ab

View File

@ -1,7 +1,7 @@
{ {
"providers": { "providers": {
"amazon": { "amazon": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?amazon\\.).*\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(amazon)(\\.[a-zA-Z]{2,})(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"pf_rd_[a-zA-Z]=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "pf_rd_[a-zA-Z]=[^\\/|\\?|&]*(\\/|&(amp;)?)?",
@ -28,14 +28,14 @@
"redirections": [] "redirections": []
}, },
"fls-na.amazon": { "fls-na.amazon": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*(fls-na\\.amazon\\.).*\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(fls-na\\.amazon)(\\.[a-zA-Z]{2,}).*",
"completeProvider": true, "completeProvider": true,
"rules": [], "rules": [],
"exceptions": [], "exceptions": [],
"redirections": [] "redirections": []
}, },
"google": { "google": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?google\\.).*\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(google)(\\.[a-zA-Z]{2,})(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"ved=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "ved=[^\\/|\\?|&]*(\\/|&(amp;)?)?",
@ -80,14 +80,14 @@
] ]
}, },
"googlesyndication": { "googlesyndication": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*(googlesyndication).*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(googlesyndication)(\\.[a-zA-Z]{2,}).*",
"completeProvider": true, "completeProvider": true,
"rules": [], "rules": [],
"exceptions": [], "exceptions": [],
"redirections": [] "redirections": []
}, },
"doubleclick": { "doubleclick": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*(doubleclick).*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(doubleclick)(\\.[a-zA-Z]{2,}).*",
"completeProvider": true, "completeProvider": true,
"rules": [], "rules": [],
"exceptions": [], "exceptions": [],
@ -132,42 +132,42 @@
"redirections": [] "redirections": []
}, },
"adtech": { "adtech": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*(adtech\\.).*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(adtech)(\\.[a-zA-Z]{2,}).*",
"completeProvider": true, "completeProvider": true,
"rules": [], "rules": [],
"exceptions": [], "exceptions": [],
"redirections": [] "redirections": []
}, },
"contentpass.net": { "contentpass.net": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*(contentpass\\.net).*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(contentpass\\.net).*",
"completeProvider": true, "completeProvider": true,
"rules": [], "rules": [],
"exceptions": [], "exceptions": [],
"redirections": [] "redirections": []
}, },
"bf-ad": { "bf-ad": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*(bf-ad).*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(bf-ad)(\\.[a-zA-Z]{2,}).*",
"completeProvider": true, "completeProvider": true,
"rules": [], "rules": [],
"exceptions": [], "exceptions": [],
"redirections": [] "redirections": []
}, },
"amazon-adsystem": { "amazon-adsystem": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*(amazon-adsystem).*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(amazon-adsystem)(\\.[a-zA-Z]{2,}).*",
"completeProvider": true, "completeProvider": true,
"rules": [], "rules": [],
"exceptions": [], "exceptions": [],
"redirections": [] "redirections": []
}, },
"adsensecustomsearchads": { "adsensecustomsearchads": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*(adsensecustomsearchads).*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(adsensecustomsearchads)(\\.[a-zA-Z]{2,}).*",
"completeProvider": true, "completeProvider": true,
"rules": [], "rules": [],
"exceptions": [], "exceptions": [],
"redirections": [] "redirections": []
}, },
"youtube": { "youtube": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?youtube\\.).*\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(youtube)(\\.[a-zA-Z]{2,})(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"feature=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "feature=[^\\/|\\?|&]*(\\/|&(amp;)?)?",
@ -178,7 +178,7 @@
"redirections": [] "redirections": []
}, },
"facebook": { "facebook": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?facebook\\.).*\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(facebook)(\\.[a-zA-Z]{2,})(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"hc_location=[^\\/|\\?|&]*(\\/|&(amp;)?)?" "hc_location=[^\\/|\\?|&]*(\\/|&(amp;)?)?"
@ -189,7 +189,7 @@
"redirections": [] "redirections": []
}, },
"twitter": { "twitter": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?twitter\\.).*\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(twitter)(\\.[a-zA-Z]{2,})(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"(ref_)?src=[^\\/|\\?|&]*(\\/|&(amp;)?)?" "(ref_)?src=[^\\/|\\?|&]*(\\/|&(amp;)?)?"
@ -198,7 +198,7 @@
"redirections": [] "redirections": []
}, },
"reddit": { "reddit": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?reddit\\.)\\w{2,}\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(reddit)(\\.[a-zA-Z]{2,})(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [], "rules": [],
"exceptions": [], "exceptions": [],
@ -208,7 +208,7 @@
} }
, ,
"netflix": { "netflix": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?netflix\\.)\\w{2,}\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(netflix)(\\.[a-zA-Z]{2,})(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"trackId=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "trackId=[^\\/|\\?|&]*(\\/|&(amp;)?)?",
@ -218,7 +218,7 @@
"redirections": [] "redirections": []
}, },
"techcrunch": { "techcrunch": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?techcrunch\\.)\\w{2,}\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?([\\.]?techcrunch\\.com)(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"ncid=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "ncid=[^\\/|\\?|&]*(\\/|&(amp;)?)?",
@ -229,7 +229,7 @@
"redirections": [] "redirections": []
}, },
"bing": { "bing": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?bing\\.).*\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(bing)(\\.[a-zA-Z]{2,})(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"cvid=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "cvid=[^\\/|\\?|&]*(\\/|&(amp;)?)?",
@ -244,17 +244,17 @@
"redirections": [] "redirections": []
}, },
"tweakers": { "tweakers": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?tweakers\\.net)\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(tweakers\\.net)(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"nb=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "nb=[^\\/|\\?|&]*(\\/|&(amp;)?)?",
"\\?.*u=[^\\/|\\?|&]*(\\/|&(amp;)?)?" "u=[^\\/|\\?|&]*(\\/|&(amp;)?)?"
], ],
"exceptions": [], "exceptions": [],
"redirections": [] "redirections": []
}, },
"twitch": { "twitch": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?twitch\\.)\\w{2,}\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(twitch)(\\.[a-zA-Z]{2,})(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"tt_medium=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "tt_medium=[^\\/|\\?|&]*(\\/|&(amp;)?)?",
@ -264,7 +264,7 @@
"redirections": [] "redirections": []
}, },
"vivaldi": { "vivaldi": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?vivaldi\\.com)\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(vivaldi\\.com)(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"pk_campaign=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "pk_campaign=[^\\/|\\?|&]*(\\/|&(amp;)?)?",
@ -274,7 +274,7 @@
"redirections": [] "redirections": []
}, },
"indeed": { "indeed": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?indeed\\.com)\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(indeed\\.com)(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"from=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "from=[^\\/|\\?|&]*(\\/|&(amp;)?)?",
@ -285,7 +285,7 @@
"redirections": [] "redirections": []
}, },
"hhdotru": { "hhdotru": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?hh\\.ru)\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(hh\\.ru)(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"vss=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "vss=[^\\/|\\?|&]*(\\/|&(amp;)?)?",
@ -301,7 +301,7 @@
"redirections": [] "redirections": []
}, },
"ebay": { "ebay": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?ebay\\.)\\w{2,}\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(ebay)(\\.[a-zA-Z]{2,})(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"_trkparms=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "_trkparms=[^\\/|\\?|&]*(\\/|&(amp;)?)?",
@ -311,7 +311,7 @@
"redirections": [] "redirections": []
}, },
"cnet": { "cnet": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?cnet\\.com)\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(cnet\\.com)(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"ftag=[^\\/|\\?|&]*(\\/|&(amp;)?)?" "ftag=[^\\/|\\?|&]*(\\/|&(amp;)?)?"
@ -320,7 +320,7 @@
"redirections": [] "redirections": []
}, },
"imdb.com": { "imdb.com": {
"urlPattern": "(https:\\/\\/||http:\\/\\/).*([\\.]?imdb\\.com)\\/.*", "urlPattern": "(https:\\/\\/|http:\\/\\/)([a-zA-Z0-9-]*\\.)?(imdb\\.com)(.*\\?.*)",
"completeProvider": false, "completeProvider": false,
"rules": [ "rules": [
"ref_=[^\\/|\\?|&]*(\\/|&(amp;)?)?", "ref_=[^\\/|\\?|&]*(\\/|&(amp;)?)?",