Frederic De Groef avatar Frederic De Groef committed 18b6846

added a utility to reprocess errors loaded from an error file

Comments (0)

Files changed (1)

scripts/csxj_reprocess_errorlist.py

+import itertools as it
+import json
+from pprint import pprint
+
+from csxj.db import Provider, get_all_provider_names
+from csxj.db import ErrorLogEntry, ErrorLogEntry2
+from csxj.datasources import lesoir, lalibre, dhnet, sudinfo, rtlinfo, lavenir, rtbfinfo, levif, septsursept, sudpresse
+
+
+
+NAME_TO_SOURCE_MODULE_MAPPING = {
+    'lesoir': lesoir,
+    'lalibre': lalibre,
+    'dhnet': dhnet,
+    'sudinfo': sudinfo,
+    'rtlinfo': rtlinfo,
+    'lavenir': lavenir,
+    'rtbfinfo': rtbfinfo,
+    'levif': levif,
+    'septsursept': septsursept,
+    'sudpresse': sudpresse
+}
+
+
+def main(infile):
+    with open(infile, 'r') as f:
+        errors_by_source = json.load(f)
+        for source_name, errors in errors_by_source.iteritems():
+            print "--- {0}: {1} errors".format(source_name, len(errors))
+            datasource = NAME_TO_SOURCE_MODULE_MAPPING[source_name]
+            if source_name == "sudinfo":
+                print "PASS"
+                continue
+            for timestamp, error in errors:
+                if error[0].startswith('http://'):
+                    url, title, stacktrace = error
+                else:
+                    title, url, stacktrace = error
+
+                title=title.strip()
+
+                print u"*** Reprocessing: {0} {1})".format(url, title)
+                try:
+                    article_data, html = datasource.extract_article_data(url)
+                    article_data.print_summary()
+                except:
+                    print "fail"
+
+
+
+
+if __name__=="__main__":
+    main("json_db_0_5_errors.json")
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.