Commits

Anonymous committed 3e992c9

* rename to unihistext (rem. extension)
* options support
* version printing
* available encs printing
* help printing

Comments (0)

Files changed (2)

+#!/usr/bin/env python
+
+import sys, os, os.path
+import unicodedata
+import codecs
+
+def unicode_xreadlines(input, encoding = 'utf-8'):
+    return codecs.iterdecode(input.xreadlines(), encoding=encoding)
+
+def main():
+
+    from optparse import OptionParser
+    parser = OptionParser()
+    parser.add_option("-i", "--input", dest="input", help="read Unicode stream from FILE ('-' means stdin, this is the default)", metavar="FILE", default="-")
+    parser.add_option("--encoding", help="set input stream binary encoding ('utf-8' is the default)", default='utf-8')
+    parser.add_option("-l", "--list-encodings", help="list some available encodings and exit", default=False, action="store_true")
+    parser.add_option("-v", "--version", help="print version and exit", default=False, action="store_true")
+
+    run(*parser.parse_args())
+
+def list_encodings():
+    encs = ['ascii', 'big5', 'big5hkscs', 'cp037', 'cp1006', 'cp1026', 'cp1140', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 'cp1256', 'cp1257', 'cp1258', 'cp424', 'cp437', 'cp500', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856', 'cp857', 'cp860', 'cp861', 'cp862', 'cp863', 'cp864', 'cp865', 'cp866', 'cp869', 'cp874', 'cp875', 'cp932', 'cp949', 'cp950', 'euc_jis_2004', 'euc_jisx0213', 'euc_jp', 'euc_kr', 'gb18030', 'gb2312', 'gbk', 'hz', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', 'iso2022_kr', 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', 'iso8859_7', 'iso8859_8', 'iso8859_9', 'johab', 'koi8_r', 'koi8_u', 'latin_1', 'mac_cyrillic', 'mac_greek', 'mac_iceland', 'mac_latin2', 'mac_roman', 'mac_turkish', 'ptcp154', 'shift_jis', 'shift_jis_2004', 'shift_jisx0213', 'utf_16', 'utf_16_be', 'utf_16_le', 'utf_7', 'utf_8']
+    maxlen = max(len(e) + 2 for e in encs)
+    termcols = 80
+    cols = termcols / maxlen
+    for row in range((len(encs) - 1) / cols + 1):
+        for col in range(cols):
+            pos = row * cols + col
+            if pos < len(encs):
+                sys.stdout.write(encs[pos].ljust(maxlen))
+        sys.stdout.write("\n")
+    print
+    print "Any encoding supported by Python's codecs module is suported."
+    print "For a complete list of supported encodings"
+    print "visit http://www.google.com/search?q=python+standard+encodings."
+    print
+
+def print_version():
+    print os.path.basename(sys.argv[0]) + " v0.1"
+
+def run(options, args):
+    if options.version:
+        return print_version()
+    if options.list_encodings:
+        return list_encodings()
+
+    print "Supported Unicode version: " + unicodedata.unidata_version
+
+    for line in unicode_xreadlines(sys.stdin):
+        
+        i = 0
+        while i != len(line):
+            off = i
+            unichr = line[i]
+            i += 1
+
+            # glue with next unichr if it's a Combining Character
+            # TODO support zero width joiner, or zero width non-joiner
+            while i != len(line) and unicodedata.category(line[i])[0] == 'M':
+                unichr += line[i]
+                i += 1
+
+            print off, unichr, repr(unichr)
+
+if __name__ == "__main__":
+    main()

unihistext.py

-#!/usr/bin/env python
-
-import unicodedata
-
-def main():
-    pass
-    print "Supported Unicode version: " + unicodedata.unidata_version
-
-if __name__ == "__main__":
-    main()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.