Add patch from Paul Wise to filter out various unwanted stuff
authorjoerg <joerg@c2f067bf-98e0-425f-9c98-e76020cfa380>
Thu, 11 Oct 2012 06:53:26 +0000 (06:53 +0000)
committerjoerg <joerg@c2f067bf-98e0-425f-9c98-e76020cfa380>
Thu, 11 Oct 2012 06:53:26 +0000 (06:53 +0000)
git-svn-id: svn+ssh://svn.debian.org/svn/planet-debian/trunk@1618 c2f067bf-98e0-425f-9c98-e76020cfa380

config/config.ini
config/config.ini.deriv
config/config.ini.es
config/config.ini.tmpl
filters/remove-trackers-and-ads.plugin [new file with mode: 0644]

index 08d4ea6..35f1d16 100644 (file)
@@ -61,6 +61,9 @@ days_per_page = 0
 encoding = utf-8
 # locale = C
 
+filters = remove-trackers-and-ads.plugin
+filter_directories = filters = remove-trackers-and-ads.plugin
+
 [templates/index.html.tmpl]
 date_format = %I:%M%P
 
index a6a5c30..d4da95a 100644 (file)
@@ -93,6 +93,9 @@ days_per_page = 0
 encoding = utf-8
 # locale = C
 
+filters = remove-trackers-and-ads.plugin
+filter_dir = filters
+
 [templates/index.html.dj]
 date_format = %I:%M%P
 
index 5e18ace..0642059 100644 (file)
@@ -92,6 +92,9 @@ days_per_page = 0
 encoding = utf-8
 # locale = C
 
+filters = remove-trackers-and-ads.plugin
+filter_dir = filters
+
 [templates/index.html.dj]
 date_format = %I:%M%P
 
index c28e7a6..0cb1924 100644 (file)
@@ -93,6 +93,9 @@ days_per_page = 0
 encoding = utf-8
 # locale = C
 
+filters = remove-trackers-and-ads.plugin
+filter_dir = filters
+
 [templates/index.html.dj]
 date_format = %I:%M%P
 
diff --git a/filters/remove-trackers-and-ads.plugin b/filters/remove-trackers-and-ads.plugin
new file mode 100644 (file)
index 0000000..84c2a04
--- /dev/null
@@ -0,0 +1,26 @@
+import sys
+import re
+
+data = sys.stdin.read()
+
+regexes = [
+       # Blogger tracking images
+       r"""<img [^>]*?src=["']https?://blogger\.googleusercontent\.com/tracker/[^'"]*['"][^>]*>""",
+       # Feedburner ads
+       r"""<p><a href=['"]https?://[a-zA-Z0-9\-\.]*/~a/[a-zA-Z0-9]*\?a=[a-zA-Z0-9]*['"]><img border=['"]0['"] src=['"]https?://[a-zA-Z0-9\.\-]*/~a/[a-zA-Z0-9/]*\?i=[a-zA-Z0-9]*['"]/></a></p>""",
+       # Feedburner tracking images
+       r"""<img [^>]*?src=['"]https?://feeds\.feedburner\.com/~r/[^"']*['"][^>]*>""",
+       # Wordpress tracking images
+       r"""<img [^>]*?src=['"]https?://stats\.wordpress\.com/b\.gif[^"']*['"][^>]*>""",
+       # Amazon tracking images
+       r"""<img [^>]*?src=['"]https?://www\.assoc-amazon\.com/e/ir[^"']*['"][^>]*>""",
+       # Paypal tracking images
+       r"""<img [^>]*?src=['"]https?://www\.paypalobjects\.com/[^/]*/i/scr/pixel.gif[^"']*['"][^>]*>""",
+       # Other tracking images
+       r'<img [^>]*?width=["']1["'][^>]*?height=["']1["'][^>]*>',
+       r'<img [^>]*?height=["']1["'][^>]*?width=["']1["'][^>]*>',
+]
+
+for regex in regexes: data = re.sub(regex, '', data)
+
+sys.stdout.write(data)