Commits

Lynn Rees committed df9d6f3

- class it

  • Participants
  • Parent commits 1cde482

Comments (0)

Files changed (2)

 
 class Templar:
     
-    def __init__(self, a, b):
-        self.linejunk, self.charjunk = None, None
-        self.a, self.b = a, b
-        self.smash = SequenceMatcher(self.linejunk, self.a, self.b)
+    def __init__(self):
+        self.linejunk, self.charjunk = None, None       
 
-    def _compare(self, a, b):        
+    def _compare(self, a, b):
+        self.smash = SequenceMatcher(self.linejunk, a, b)        
         for tag, alo, ahi, blo, bhi in self.smash.get_opcodes():
             if tag == 'replace': g = self._near(a, alo, ahi, b, blo, bhi)
             elif tag == 'delete': g = self._tagger('-', a, alo, ahi)
     def _tagger(self, tag, x, lo, hi):
         for i in xrange(lo, hi): yield ' '.join([tag, x[i]])
 
-    def extract(self):
-        return list(self._compare(self.a, self.b))
+    def _extract(self, a, b):
+        return list(self._compare(a, b))
 
-    def ratio(self):
-        return self.smash.ratio()
+    def _sorter(self, rawtemplate, learn=None):
 
-    def setbest_ratio(self, best_ratio):
-        self.best_ratio = best_ratio
+        def autofield(line, glist, label):
+            glist.append(fragstart % (label, label))
+            glist.append(line)
+            glist.append(fragend)
 
-    def setcutoff(self, cutoff):
-        self.cutoff = cutoff
+        def manfield(tag, glist, count): 
+            if rawtemplate[fullcount-1][:1] != tag:
+                label = str(count)
+                if template[-1].find('<psi:field') == -1:
+                    template.append(fieldtag % label)
+                elif count > int(template[-1][-5]):
+                    template.append(fieldtag % label)
+                else: count += 1
+                glist.append(fragstart % (label, label))            
+            glist.append(i[2:])
+            if rawtemplate[fullcount+1][:1] != tag: glist.append(fragend)
+            return count
         
+        def classifier(choice, fcount):
+            if choice == 1: template.append(aline)
+            elif choice == 2: template.append(bline)
+            elif choice == 3:
+                label = str(fcount)
+                template.append(fieldtag % label)
+                autofield(aline, alist, label)
+                autofield(bline, blist, label)
+                fcount += 1
+            elif choice == 4:
+                mirror, count = [], 0
+                linetemp = listemplar(aline.split(), bline.split())
+                for j in linetemp:
+                    if j[:1] == '1':
+                        label = str(fcount)
+                        aattr, battr = j, linetemp[count+1]
+                        mirror.append(fieldattr % label)                        
+                        autofield(aattr[2:], alist, label)
+                        autofield(battr[2:], blist, label)
+                        fcount += 1
+                    elif j[:1] != '2': mirror.append(j[1:])
+                    count += 1
+                template.append(''.join(mirror))
+            elif choice == 5:
+                alist.append(aline)
+                blist.append(bline)
+            elif choice == 6: return fcount            
+            return fcount
 
-def filetemplar(a, b):
-    tmp = Templar(htmlutils.htmlines(a), htmlutils.htmlines(b))
-    #if tmp.ratio() > 0.5:
-    return tmp.extract()
+        if rawtemplate:
+            fullcount, fcount, template, alist, blist = 0, 1, [], [], []
+            fieldtag, fragend = '<psi:field name="%s" />', '</psi:fragment>'
+            fragstart = '<psi:fragment name="%s" class="%s">'
+            fieldattr = ' psi:field="%s"'
+            if not learn: learn = dict()
+            for i in rawtemplate:
+                if i[:1] == ' ': template.append(i[2:])
+                elif i[:1] == '-': fcount = manfield('-', alist, fcount)
+                elif i[:1] == '+': fcount = manfield('+', blist, fcount)
+                elif i[:1] == '1':
+                    aline, bline = i[2:], rawtemplate[fullcount+1][2:]
+                    if aline in learn: fcount = classifier(learn[aline], fcount)
+                    else:
+                        print 'Choose line to insert in template'
+                        print '1. %s' % aline
+                        print '2. %s' % bline
+                        print '3. Insert field element'
+                        print '4. Insert field attribute'
+                        print '5. Insert in resources without field'
+                        print '6. Discard both lines'
+                        choice = input('Enter a number: ')
+                        fcount = classifier(choice, fcount)
+                        learn[aline] = choice
+                fullcount += 1
+            return template, alist, blist, learn
 
-def listemplar(a, b):
-    tmp = Templar(a, b)
-    #if tmp.ratio() > 0.5:
-    return tmp.extract()
+    def filesort(self, file1, file2, learn=None):
+        a = self._extract(htmlutils.htmlines(file1), htmlutils.htmlines(file2))
+        return sorter(a, learn)
 
-def mixtemplar(file, elist):
-    return Templar(htmlutils.htmlines(file), elist).extract()
+    def listsort(self, la, lb, learn=None):
+        return sorter(self._extract(la, lb), learn)
 
-def sorter(rawtemplate, learn=None):
+    def mixsort(self, file, elist, learn=None):
+        return sorter(self._extract(htmlutils.htmlines(file), elist), learn)
 
-    def autofield(line, glist, label):
-        glist.append(fragstart % (label, label))
-        glist.append(line)
-        glist.append(fragend)
+    def autosort(self, path):
+        from mimetypes import guess_type
+        from random import shuffle
+        import os
 
-    def manfield(tag, glist, count): 
-        if rawtemplate[fullcount-1][:1] != tag:
-            label = str(count)
-            if template[-1].find('<psi:field') == -1:
-                template.append(fieldtag % label)
-            elif count > int(template[-1][-5]):
-                template.append(fieldtag % label)
-            else: count += 1
-            glist.append(fragstart % (label, label))            
-        glist.append(i[2:])
-        if rawtemplate[fullcount+1][:1] != tag: glist.append(fragend)
-        return count
-    
-    def classifier(choice, fcount):
-        if choice == 1: template.append(aline)
-        elif choice == 2: template.append(bline)
-        elif choice == 3:
-            label = str(fcount)
-            template.append(fieldtag % label)
-            autofield(aline, alist, label)
-            autofield(bline, blist, label)
-            fcount += 1
-        elif choice == 4:
-            mirror, count = [], 0
-            linetemp = listemplar(aline.split(), bline.split())
-            for j in linetemp:
-                if j[:1] == '1':
-                    label = str(fcount)
-                    aattr, battr = j, linetemp[count+1]
-                    mirror.append(fieldattr % label)                        
-                    autofield(aattr[2:], alist, label)
-                    autofield(battr[2:], blist, label)
-                    fcount += 1
-                elif j[:1] != '2': mirror.append(j[1:])
-                count += 1
-            template.append(''.join(mirror))
-        elif choice == 5:
-            alist.append(aline)
-            blist.append(bline)
-        elif choice == 6: return fcount            
-        return fcount
+        def flog(olist, mlearn):
+            if len(olist) > 1:
+                nlist = list()
+                shuffle(olist)
+                while len(olist):
+                    extract = listsort(olist.pop(), olist.pop(), mlearn)
+                    mlearn.update(extract[3])
+                    nlist.append(extract[0])
+                flog(nlist, mlearn)
+            else: clist.append(olist[0])
 
-    if rawtemplate:
-        fullcount, fcount, template, alist, blist = 0, 1, [], [], []
-        fieldtag, fragend = '<psi:field name="%s" />', '</psi:fragment>'
-        fragstart = '<psi:fragment name="%s" class="%s">'
-        fieldattr = ' psi:field="%s"'
-        if not learn: learn = dict()
-        for i in rawtemplate:
-            if i[:1] == ' ': template.append(i[2:])
-            elif i[:1] == '-': fcount = manfield('-', alist, fcount)
-            elif i[:1] == '+': fcount = manfield('+', blist, fcount)
-            elif i[:1] == '1':
-                aline, bline = i[2:], rawtemplate[fullcount+1][2:]
-                if aline in learn: fcount = classifier(learn[aline], fcount)
+        def combine():
+            last = str()
+            for line in clist[0]:
+                if last.find('<psi:field') == -1:
+                    last = line
+                    yield line                
                 else:
-                    print 'Choose line to insert in template'
-                    print '1. %s' % aline
-                    print '2. %s' % bline
-                    print '3. Insert field element'
-                    print '4. Insert field attribute'
-                    print '5. Insert in resources without field'
-                    print '6. Discard both lines'
-                    choice = input('Enter a number: ')
-                    fcount = classifier(choice, fcount)
-                    learn[aline] = choice
-            fullcount += 1
-        return template, alist, blist, learn
+                    if line.find('<psi:field') == -1:
+                        last = line
+                        yield line
 
-def filesort(file1, file2, learn=None):
-    return sorter(filetemplar(file1, file2), learn)
+        if path != os.getcwd(): os.chdir(path)
+        hfiles = [i for i in os.listdir(path) if guess_type(i)[0] == 'text/html']
+        mlearn, tlist, clist = dict(), list(), list()
+        shuffle(hfiles)
+        if len(hfiles) % 2 != 0: hfiles.pop()
+        while len(hfiles):
+            extract = filesort(hfiles.pop(), hfiles.pop(), mlearn)
+            if extract:
+                mlearn.update(extract[3])
+                tlist.append(extract[0])
+        if len(tlist) % 2 != 0: tlist.pop()
+        flog(tlist, mlearn)    
+        return list(combine())
 
-def listsort(la, lb, learn=None):
-    return sorter(listemplar(la, lb), learn)
+    def wizsort(self, path):
+        from mimetypes import guess_type
+        from random import shuffle
+        import os
 
-def mixsort(file, elist, learn=None):
-    return sorter(mixtemplar(file, elist), learn)
+        def flog(olist, mlearn):
+            if len(olist) > 1:
+                nlist = list()
+                shuffle(olist)
+                while len(olist):
+                    extract = listsort(olist.pop(), olist.pop(), mlearn)
+                    mlearn.update(extract[3])
+                    nlist.append(extract[0])
+                flog(nlist, mlearn)
+            else: clist.append(olist[0])
 
-def autosort(path):
-    from mimetypes import guess_type
-    from random import shuffle
-    import os
+        def combine():
+            last = str()
+            for line in clist[0]:
+                if last.find('<psi:field') == -1:
+                    last = line
+                    yield line                
+                else:
+                    if line.find('<psi:field') == -1:
+                        last = line
+                        yield line
 
-    def flog(olist, mlearn):
-        if len(olist) > 1:
-            nlist = list()
-            shuffle(olist)
-            while len(olist):
-                extract = listsort(olist.pop(), olist.pop(), mlearn)
+        if path != os.getcwd(): os.chdir(path)
+        hfiles = [i for i in os.listdir(path) if guess_type(i)[0] == 'text/html']
+        hfiles.sort()
+        choices, count = dict(), 1
+        for i in hfiles:
+            choices[count] = i
+            count += 1    
+        for i in choices: print '%s. %s' % (i, choices.get(i))
+        print ''
+        print 'Choose an option'
+        
+        mlearn, tlist, clist = dict(), list(), list()
+        shuffle(hfiles)
+        if len(hfiles) % 2 != 0: hfiles.pop()
+        while len(hfiles):
+            extract = filesort(hfiles.pop(), hfiles.pop(), mlearn)
+            if extract:
                 mlearn.update(extract[3])
-                nlist.append(extract[0])
-            flog(nlist, mlearn)
-        else: clist.append(olist[0])
-
-    def combine():
-        last = str()
-        for line in clist[0]:
-            if last.find('<psi:field') == -1:
-                last = line
-                yield line                
-            else:
-                if line.find('<psi:field') == -1:
-                    last = line
-                    yield line
-
-    hfiles = [i for i in os.listdir(path) if guess_type(i)[0] == 'text/html']
-    mlearn, tlist, clist = dict(), list(), list()
-    shuffle(hfiles)
-    if len(hfiles) % 2 != 0: hfiles.pop()
-    while len(hfiles):
-        extract = filesort(hfiles.pop(), hfiles.pop(), mlearn)
-        if extract:
-            mlearn.update(extract[3])
-            tlist.append(extract[0])
-    if len(tlist) % 2 != 0: tlist.pop()
-    flog(tlist, mlearn)    
-    return list(combine())
+                tlist.append(extract[0])
+        if len(tlist) % 2 != 0: tlist.pop()
+        flog(tlist, mlearn)    
+        return list(combine())
 from xml.sax.saxutils import escape, _outputwrapper, writetext, writeattr
 
 
-class HTMLformat(xml.sax.ContentHandler):
+class SaxHash(xml.sax.ContentHandler):
 
     from htmldefs import inline, empty, head
     
         else: self.content.append(content)
 
 
-def listhtml(html):
+def htmlhash(html):
     from xml.sax.handler import feature_external_ges
     parser = xml.sax.make_parser()
-    h = HTMLformat()
+    h = SaxHash()
     parser.setFeature(feature_external_ges, 0)
     parser.setContentHandler(h)
     source = xml.sax.InputSource(None)
 def htmlprep(file):
     html = htmldom(file)
     widthtags = list(tagsinList(html, tagsWithAttribute('width')))
-    grid = makegrid(html, widthtags)
-    fixwidths(grid)
+    fixwidths(htmlgrid(html, widthtags))
     fixspans(html)
     notrwidth(html)
     return html
 
 def htmlines(file):
-    return listhtml(tostring(htmlprep(file)))
+    return htmlhash(tostring(htmlprep(file)))
 
 def tagsWithAttribute(attr):
     from htmldefs import html2css
     return [i for i in html2css if attr in html2css.get(i)]
 
-def makegrid(doc, structure):
+def htmlgrid(doc, structure):
     grid, temp, blist = [], [], []
     for node in structure:
         if isname(node, 'tr'):