Commits

Lars Yencken committed 582cb8a

Replaces print statements with console log.

Comments (0)

Files changed (5)

     url='http://bitbucket.org/lars512/gpalign-py/',
     packages=['gpalign'],
     package_dir={'gpalign': 'src'},
-    install_requires=["cjktools", 'consoleLog'],
+    install_requires=['cjktools', 'consoleLog>=0.2.4', 'simplestats'],
     scripts=['src/align.py'],
 )
 import optparse
 import warnings
 
+from consoleLog import default as _log
+
 from gpalign import potentials
 from gpalign import dictionary
 from gpalign.alignment import AlignmentModel
         taking an edict dictionary as input and producing a segmented output
         for each kanji input row.
     """
-    # read in edict dictionary
+    steps = 4 if options.evaluate else 3
+    _log.start('Aligning %s' % os.path.basename(input_file), nSteps=steps)
+    
+    _log.start('Setup phase', nSteps=4)
+    
+    _log.start('Reading entries')
     format = options.format
     if format == 'simple':
-        print 'Reading evaluation entries'
         entries, num_rejected = dictionary.evaluation_entries(input_file)
     elif format == 'edict':
-        print 'Reading edict entries'
         entries, num_rejected = dictionary.edict_entries(input_file)
     else:
         raise Exception('unknown format: %s' % format)
+    
+    _log.finish('Found %d entries (rejected %d)' % (len(entries),
+            num_rejected))
 
-    print '--> Found %d entries (rejected %d)' % (len(entries), num_rejected)
-
-    print 'Separating long and short entries'
+    _log.start('Separating long and short entries')
     short_entries, long_entries = dictionary.separate_entries(entries,
             options.longest_run)
-    print '--> %d short, %d long' % (len(short_entries), len(long_entries))
+    _log.finish('%d short, %d long' % (len(short_entries), len(long_entries)))
 
     alignment_model = AlignmentModel(output_file, options)
 
         reading_model = ReadingModel()
     else:
         reading_model = None
+    _log.finish()
+    _log.space()
 
-    print 'PASS 1: SHORT ENTRIES'
+    _log.start('Pass 1: short entries')
     _resolve_entries(alignment_model, reading_model, short_entries, options)
+    _log.finish('Finished first pass')
+    _log.space()
 
-    print 'PASS 2: LONG ENTRIES'
+    _log.start('Pass 2: long entries')
     _resolve_entries(alignment_model, reading_model, long_entries, options)
+    _log.finish('Finished second pass')
 
     alignment_model.finish()
 
     if options.evaluate:
+        _log.space()
         evaluate.evaluate_alignment(output_file, output_file + '.eval')
+        
+    _log.finish()
 
 #----------------------------------------------------------------------------#
 
 def _resolve_entries(model, reading_model, entries, options):
-    print 'Generating possible alignments'
+    _log.start('Generating possible alignments')
     unique, ambiguous = potentials.generate_alignments(entries, options)
-    print '--> %d unique, %d ambiguous' % (len(unique), len(ambiguous))
-    print '--> %d overconstrained' % \
-            (len(entries) - (len(unique) + len(ambiguous)))
+    _log.log('%d unique, %d ambiguous' % (len(unique), len(ambiguous)))
+    _log.finish('%d overconstrained' % \
+            (len(entries) - (len(unique) + len(ambiguous))))
 
     if options.use_kanjidic:
-        print 'Disambiguating using kanjidic'
+        _log.start('Disambiguating using kanjidic')
         more_unique, ambiguous = reading_model.prune_alignments(ambiguous)
-        print '--> %d unique, %d ambiguous' % (len(more_unique),
-                len(ambiguous))
-        unique.extend(more_unique); del more_unique
+        _log.finish('%d unique, %d ambiguous' % (len(more_unique),
+                len(ambiguous)))
+        unique.extend(more_unique)
 
-    print 'Disambiguating readings using statistical model'
-    print '--> Processing %d unique entries' % len(unique)
-    model.add_resolved(unique); del unique
-    print '--> Beginning statistical disambiguation of %d entries' % \
-            len(ambiguous)
-    model.disambiguate(ambiguous); del ambiguous
+    _log.start('Disambiguating readings using statistical model', nSteps=2)
+    _log.log('Processing %d unique entries' % len(unique))
+    model.add_resolved(unique)
+    if ambiguous:
+        _log.log('Disambiguating %d entries ' % len(ambiguous), newLine=False)
+        model.disambiguate(ambiguous)
+    _log.finish()
 
 #----------------------------------------------------------------------------#
 # COMMAND-LINE INTERFACE
 
 "This module implements the iterative TF-IDF alignment method."
 
-import potentials
-from frequency import FrequencyMap
+import os
+import math
+import random
+import cPickle as pickle
 
 from cjktools import scripts
 from cjktools.common import sopen
 from consoleLog.progressBar import ProgressBar
+from consoleLog import default as _log
 
-import math
-import random
-import cPickle as pickle
+import potentials
+from frequency import FrequencyMap
 
 # epsilon for testing for zero
 eps = 1e-8
         """ Creates a new instance using the list of correctly aligned
             readings.
         """
-        print 'Initialising alignment model'
+        _log.start('Initialising alignment model')
         if options.model_input:
-            print '--> Seeding from', `options.model_input`
+            _log.log('Seeding from %s' %  
+                    os.path.basename(options.model_input))
             self._unique_counts = pickle.load(open(options.model_input))
         else:
-            print '--> Seeding from empty model'
+            _log.log('Seeding from empty model')
             self._unique_counts = FrequencyMap()
 
         self._ambiguous_counts = FrequencyMap()
         # of that heuristic
         if options.random:
             self._use_random = True
-            print '--> Random model selected'
+            _log.log('Random model selected')
         else:
             self._use_random = False
 
             self._default_idf = 0
     
             if not options.tf_heuristic:
-                print '--> Disabling tf heuristic'
+                _log.finish('Disabling tf heuristic')
                 self._default_tf = 1
     
             elif not options.idf_heuristic:
-                print '--> Disabling idf heuristic'
+                _log.finish('Disabling idf heuristic')
                 self._default_idf = 1
             
             else:
-                print '--> Full TF-IDF enabled'
-
-        return
+                _log.finish('Full TF-IDF enabled')
     
     #------------------------------------------------------------------------#
 
             self._unique_counts.add_counts(entry.alignment)
             print >> self._output, entry.to_line()
 
-        return
-
     #------------------------------------------------------------------------#
 
     def disambiguate(self, ambiguous):
             ostream.close()
 
         assert self._ambiguous_counts._g_size == 0
-
-        return
     
     #------------------------------------------------------------------------#
 
 
             entry.score = 0.0
             entry.scores = [0.0]*len(alignments)
-
-        return
  
     #------------------------------------------------------------------------#
 
         entry.potentials = None
         entry.aligned = True
 
-        return
-
     #------------------------------------------------------------------------#
 
     def _rescore(self, ambiguous):
             entry.score, entry.alignment = max(zip(entry.scores, \
                     entry.potentials))
 
-        return
-
     #------------------------------------------------------------------------#
 
     def _weighted_freqs(self, g_segments, p_segments, index):
             print '----->', other_score,
             potentials.print_alignment(other_alignment)
     
-        return
-
     #------------------------------------------------------------------------#
 
     def _random_alignment(self, entries):
         for ambiguous_entry in entries:
             ambiguous_entry.alignment = random.sample(
                     ambiguous_entry.potentials, 1)[0]
-        return
 
     #------------------------------------------------------------------------#
 
 
 from simplestats import sequences
 from cjktools.common import sopen
+from consoleLog import default as _log
 
 import errors
 import settings
     results['missing'] = missing
 
     _write_results(results, results_file)
-
-    return
+    _log_results(results)
 
 #----------------------------------------------------------------------------#
 
     results['bad'] = bad + missing
 
     _write_results(results, results_file)
+    _log_results(results)
 
 #----------------------------------------------------------------------------#
 
         percent = 100.0*number/5000.0
         print >> summary_stream, '%s    %4d    %6.2f%%' % (key, number,
                 percent)
-        print '%s    %4d    %6.2f%%' % (key, number, percent)
+
         ostream = sopen(results_file + '.' + key, 'w')
         for line in key_entries:
             print >> ostream, ':'.join(line)
         ostream.close()
 
-    return
+#----------------------------------------------------------------------------#
+
+def _log_results(results_dict):
+    _log.start('Evaluating alignments', nSteps=2)
+
+    good = len(results_dict['good'])
+    _log.log('good: %d (%.02f%%)' % (good, 100.0 * good / 5000.0))
+    bad = len(results_dict['bad'])
+    _log.start('bad: %d (%.02f%%)' % (bad, 100.0 * bad / 5000.0))
+    align = len(results_dict['align'])
+    _log.log('bad alignment: %d (%.02f%%)' % (align, 100.0 * align / 5000.0))
+    gapping = len(results_dict['gapping'])
+    _log.log('gapping: %d (%.02f%%)' % (gapping, 100.0 * gapping / 5000.0))
+    missing = len(results_dict['missing'])
+    _log.finish('missing: %d (%.02f%%)' % (missing, 100.0 * missing / 5000.0))
+    
+    _log.finish()
 
 #----------------------------------------------------------------------------#
 
     return parser
 
 def main(argv):
-    """ The main method for this module.
-    """
     parser = create_option_parser()
     (options, args) = parser.parse_args(argv)
 

src/reading_model.py

 from cjktools import scripts, enum, alternations
 from cjktools.resources import kanjidic
 from simplestats.sequences import flatten
+from consoleLog import default as _log
 
 #----------------------------------------------------------------------------#
 
 #----------------------------------------------------------------------------#
 
 class ReadingModel:
-    """ A model of the readings that each kanji takes, and some additional
-        information on their reliability. 
+    """
+    A model of the readings that each kanji takes, and some additional
+    information on their reliability. 
     """
 
     #------------------------------------------------------------------------#
     #
 
     def __init__(self):
-        """ Creates a new instance, populating it with kanjidic's model. The
-            initial model gets cached, so we don't have to get any
-            differences.
         """
-        print 'Initialising reading model'
+        Creates a new instance, populating it with kanjidic's model. The
+        initial model gets cached, so we don't have to get any differences.
+        """
+        _log.log('Initialising reading model')
         self._readings = {}
         self._pool = {}
         self._okuri = {}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.