Commits

Lars Yencken  committed e8672ae

Aggregates file-based logging into a single output file.

  • Participants
  • Parent commits 56bbfef

Comments (0)

Files changed (7)

 syntax: glob
 .svn
-logs
 *.pyc
 *.pyo
 build
 overconstrained
 rejected-entries
 *.egg
+*.log
+*.out
+*.orig
+*.swp

File gpalign/align.py

 from gpalign.alignment import AlignmentModel
 from gpalign.reading_model import ReadingModel
 from gpalign import evaluate
+from gpalign import settings
 
 def perform_segmentation(input_file, output_file, options):
     """ The main method for this module. Performs the entire segmentation run,
         sys.exit(1)
     
     if options.evaluate:
-        input_file = os.path.join(os.path.dirname(potentials.__file__), 'data',
-                'eval-alignment.data')
+        input_file = os.path.join(settings.DATA_DIR, 'eval-alignment.data')
         with warnings.catch_warnings():
             warnings.simplefilter('ignore')
             output_file = os.tempnam()    

File gpalign/dictionary.py

 evaluation_entries().
 """
 
-from os import path
-
 from cjktools import scripts
 from cjktools.common import sopen
 
     input file is assumed to be in edict format.
     """
     istream = sopen(input_file)
-    rejection_stream = sopen(path.join(settings.LOG_DIR, 'rejected-entries'),
-            'w')
-
+    log_stream = settings.LogStream.get()
+    
     entries = []
     num_rejected = 0
     for line in istream:
             entries.append(Entry(g_string, p_string))
         else:
             num_rejected += 1
-            rejection_stream.write(line)
+            log_stream.log_rejected(line.rstrip())
 
     return entries, num_rejected
 
     entries = []
     istream = sopen(input_file, 'r')
 
-    rejection_stream = sopen(path.join(settings.LOG_DIR, 'rejected-entries'),
-            'w')
+    log_stream = settings.LogStream.get()
 
     num_rejected = 0
     for line in istream:
             entries.append(Entry(g_string, p_string))
         else:
             num_rejected += 1
-            rejection_stream.write(line)
+            log_stream.log_rejected(line.rstrip())
 
     return entries, num_rejected
 

File gpalign/entry.py

     def __cmp__(self, rhs):
         return cmp(self.score, rhs.score)
 
-    def to_string(self):
+    def __unicode__(self):
         if self.aligned:
             g_segments, p_segments = self.alignment
-            retStr = 'Entry(%s <-> %s)' % \
+            s = u'Entry(%s <-> %s)' % \
                     ('|'.join(g_segments), '|'.join(p_segments))
         elif self.potentials:
-            retStr = 'Entry(%s <-> %s, %d potentials)' % \
+            s = u'Entry(%s <-> %s, %d potentials)' % \
                     (self.g_string, self.p_string, len(self.potentials))
         else:
-            retStr = 'Entry(%s <-> %s)' % (self.g_string, self.p_string)
-        return retStr
+            s = u'Entry(%s <-> %s)' % (self.g_string, self.p_string)
+        return s        
 
     def __str__(self):
-        return self.to_string()
+        return unicode(self).encode('utf8')
     
     def __repr__(self):
-        return self.to_string()
+        return str(self)
 
     def to_line(self):
-        """ Prints the final alignment in our desired output format. 
-        """
+        "Prints the final alignment in our desired output format."
         assert self.aligned
 
         alignment = ' '.join(map(lambda x: '|'.join(x), self.alignment))

File gpalign/evaluate.py

     """ Evaluates the alignments provided in the prediction file, writing the
         results to the results file.
     """
-    current_dir = os.path.dirname(__file__)
-    validation_file = os.path.join(current_dir, 'data', 'eval-alignment.data')
+    validation_file = os.path.join(settings.DATA_DIR, 'eval-alignment.data')
 
     results = {}
 

File gpalign/potentials.py

 generate_alignments() method.
 """
 
-from os import path
 import string
 import sys
 
 from cjktools import scripts, kana_table
-from cjktools.common import sopen
 from simplestats import comb
 
 import settings
         second member is a list of (graphemeString, [potentialAlignments]).
     """
     # we record anything which we've overconstrained and can't solve
-    overconstrained = sopen(path.join(settings.LOG_DIR, 'overconstrained'),
-            'w')
+    log_stream = settings.LogStream.get()
 
     unique_entries = []
     ambiguous_entries = []
             ambiguous_entries.append(entry)
         else:
             # we've overconstrained this entry -- no potential alignments
-            print >> overconstrained, entry.to_string()
+            log_stream.log_overconstrained(entry)
     
     return unique_entries, ambiguous_entries
 

File gpalign/settings.py

 #  Copyright 2009 Lars Yencken. All rights reserved.
 # 
 
-DATA_DIR = ''
-CACHE_DIR = ''
-LOG_DIR = ''
+import os
+from cjktools.common import sopen
+
+DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
+
+class LogStream(object):
+    """An output stream for logging during alignment."""
+    def __init__(self, filename='align.log'):
+        self.ostream = sopen(filename, 'w')
+    
+    def log_overconstrained(self, entry):
+        print >> self.ostream, u"overconstrained: %s" % entry
+    
+    def log_excessive(self, entry):
+        print >> self.ostream, u"excessive: %s" % entry
+    
+    def log_rejected(self, line):
+        print >> self.ostream, u"badly formed: %s" % line
+    
+    @classmethod
+    def get(cls):
+        if not hasattr(cls, '_current'):
+            cls._current = LogStream()
+        
+        return cls._current