Commits

Anonymous committed 40821b9

missing files

Comments (0)

Files changed (5)

 
         d = wx.MessageDialog(self, u"IT IS GOING TO FREEZE THE GUI FOR AN HOUR OR SO.\n\n OK ? ", "alpha state")
         ans = d.ShowModal()
-        if not ans in [wx.OK, wx.YES, 5100]:
+        if not ans in [wx.OK, wx.YES, wx.ID_YES, wx.ID_OK]:
             return
 
         self.progress_indicator.Play()
+#coding: UTF8
+"""
+An subclass of wx.ListCtrl for displaying a list of files with icons.
+Windows only.
+
+Most of the list control code gleaned from the excellent wxpython demo.
+
+Image code taken from this post by Mark Hammond:
+http://mail.python.org/pipermail/python-win32/2005-March/003071.html
+
+MIT license.
+"""
+
+__author__ = "Ryan Ginstrom"
+__version__ = "0.1"
+
+import wx
+import sys, glob
+import os
+
+windows = sys.platform.startswith('win')
+if windows:
+    from win32com.shell import shell, shellcon
+    from win32con import FILE_ATTRIBUTE_NORMAL
+
+    def extension_to_bitmap(extension):
+        """dot is mandatory in extension"""
+
+        flags = shellcon.SHGFI_SMALLICON | \
+                shellcon.SHGFI_ICON | \
+                shellcon.SHGFI_USEFILEATTRIBUTES
+
+        retval, info = shell.SHGetFileInfo(extension,
+                                       FILE_ATTRIBUTE_NORMAL,
+                                       flags)
+        # non-zero on success
+        assert retval
+
+        hicon, iicon, attr, display_name, type_name = info
+
+        # Get the bitmap
+        icon = wx.EmptyIcon()
+        icon.SetHandle(hicon)
+        return wx.BitmapFromIcon(icon)
+
+else:   #non windows version
+
+    def extension_to_bitmap(extension):
+        """dot is mandatory in extension"""
+
+        # Get the bitmap
+        icon = wx.EmptyIcon()
+        #icon.SetHandle(hicon)
+        return wx.BitmapFromIcon(icon)
+
+
+
+
+class FileList(wx.ListCtrl):
+    """Subclass of list control that shows a list of files with
+    their file icons"""
+
+    columns = [u"Name", u"Path"]
+
+    def __init__(self, parent, id=-1):
+        """style must be wx.LC_REPORT for now"""
+        self.init_base(parent, id)
+
+        self.extension_images = {}
+
+    def init_base(self, parent, id):
+        """Put this in a method so we can subclass
+        and override for testing"""
+
+        wx.ListCtrl.__init__(self, parent, id, style=wx.LC_REPORT,
+                                pos=wx.Point(8, 224), size=wx.Size(360, 328) )
+
+        for col, text in enumerate(self.columns):
+            self.InsertColumn(col, text)
+
+        for i in range(len(self.columns)):
+            self.SetColumnWidth(i, wx.LIST_AUTOSIZE_USEHEADER)
+
+        self.il = wx.ImageList(16,16, True)
+
+    def get_image_id(self, extension):
+        """Get the id in the image list for the extension.
+        Will add the image if not there already"""
+
+        # Caching
+        if extension in self.extension_images:
+            return self.extension_images[extension]
+
+        bmp = extension_to_bitmap(extension)
+
+        index = self.il.Add(bmp)
+        self.SetImageList(self.il, wx.IMAGE_LIST_SMALL)
+
+        self.extension_images[extension] = index
+
+        return index
+
+    def add_file(self, filename, fullnames=True, autowidth=False):
+        """Add the filenames to the list, and returns the item index"""
+
+        full_path = os.path.abspath(filename)
+        path_part, file_part = os.path.split(full_path)
+
+        # Add the icon
+        extension = os.path.splitext(filename)[-1].lower()
+        img_id = self.get_image_id(extension)
+
+        # Add the file and path names
+        #if fullnames:
+        #    f = full_path
+        #else:
+        #    f = file_part
+        
+        index = self.InsertStringItem(sys.maxint, file_part, img_id)
+        self.SetStringItem(index, 1, path_part)
+        
+        if autowidth:  # performance
+            for i in range(len(self.columns)):
+                self.SetColumnWidth(i, wx.LIST_AUTOSIZE_USEHEADER)
+
+        return index
+
+    def auto_width(self):
+        for i in range(len(self.columns)):
+            self.SetColumnWidth(i, wx.LIST_AUTOSIZE_USEHEADER)
+
+
+
+
+class DemoFrame(wx.Frame):
+    def __init__(self):
+        wx.Frame.__init__(self, None, -1,
+                          u"File List Demo",
+                          size=(600,400))
+
+        self.file_list = FileList(self)
+
+        # add the rows
+        for filename in sorted(glob.glob("*.*")):
+            index = self.file_list.add_file(filename)
+
+
+if __name__ == '__main__':
+    os.chdir( os.path.dirname(__file__))
+
+    app = wx.PySimpleApp(redirect=False, filename="log.txt", useBestVisual=True)
+    frame = DemoFrame()
+    frame.Show()
+    app.MainLoop()
+
+# -*- encoding: utf-8 -*-
+'''
+    project:
+    misc functions library
+
+    in the future, support to wx might be dropped
+    @todo: think of alternative implementations for msgbox
+
+
+'''
+
+#from BeautifulSoup import BeautifulSoup
+from bs4 import BeautifulSoup
+from exceptions import AttributeError
+from os.path import expanduser, isdir, isfile, join, abspath, splitext, walk
+from os.path import isfile, isdir, join, abspath
+from shlex import split as split_shell_args
+import chardet
+import fnmatch
+import imp as _imp
+import json
+import os as _os
+import re
+import string
+import subprocess
+import sys, os, re
+import tempfile
+import time
+import urllib
+import json
+
+
+
+DEBUG = True
+DRY_RUN = False
+verbose = True
+valid_picture_files=['JPEG', 'PNG', 'BMP']  #, 'GIF' -- gif contain multiple layers, and crash the detector
+MAX_SIZE_FOR_ENOCDING = 1000000
+DEFAULT_ENCODING = 'cp1255'
+PKZIP_PREFIX = '\x50\x4B\x03'
+nikud_reg_range = 'ְֱֲֳִֵֶַָֹּׁׂ'
+accents_reg_range = 'çéèàêáïâîąïæÖØœß'
+hebrew_reg_range = 'א-ת'
+latin_reg_range = 'A-Za-z'
+hebrew_and_latin = hebrew_reg_range + latin_reg_range + nikud_reg_range + accents_reg_range
+words_reg = re.compile(r'\b([%s][%s0-9]+)\b' % (hebrew_and_latin, hebrew_and_latin, ))
+SIMPLE = 0
+
+flags = re.I+re.M+re.S
+reg_html_tag = re.compile('<[a-z]+[^>]*>', flags)
+reg_script = re.compile('<script[^>]*>.*?</script>', flags)
+reg_style = re.compile('<style[^>]*>.*?</style>', flags)
+reg_comment = re.compile('<!--[^>]*?-->,', flags)
+decimal_reg = re.compile('\d')
+filesystem_encoding = sys.getfilesystemencoding()
+
+if sys.platform in ['win32', 'win64', 'win', 'windows']:
+    IMAGEMAGICK_BIN='imagemagick.exe'
+else:
+    IMAGEMAGICK_BIN='convert'
+
+
+
+
+def run(cmd, needs_to_be_splitted=True, start_dir='', env='', wait=True):
+    if start_dir and os.path.isdir(start_dir):
+        cur_dir = abspath('.')
+        os.chdir( start_dir)
+    if needs_to_be_splitted:
+        cmd = split_shell_args_safe( cmd )
+    elif type(cmd)==type(''):
+        cmd = [cmd]
+
+
+    if not DRY_RUN:
+        if wait:
+            pid = subprocess.Popen(cmd)
+        else:
+            pid = subprocess.Popen(cmd, shell=True, stdin=None, stdout=None, stderr=None, close_fds=True)
+    if start_dir and os.path.isdir(start_dir):
+        os.chdir( cur_dir )
+
+
+
+
+
+
+def windows_to_browser_filename(f):
+    f = os.path.abspath(f)
+    f = 'file://' + f.replace('\\', '/')
+    return f
+
+
+
+
+def get_drives():
+    if 'win' in sys.platform:
+
+        # i know, iz ugly, but it doens't require external packages, it's fast, and simple
+        # find all currently available drives under windows OS
+        # @todo: try adding UNC paths
+        # @todo: consider win32api.GetLogicalDriveStrings().split("\x00")
+        drives = [c+':\\' for c in string.lowercase if os.path.isdir(c+':\\')]
+        return drives
+
+
+def import_file(fpath):
+    '''
+    fpath - the relative or absolute path to the .py file which is imported.
+
+    Returns the imported module.
+
+    NOTE: if import_file is called twice with the same module, the module is reloaded.
+    '''
+    original_path = _os.path.realpath(_os.path.curdir)
+    dst_path = _os.path.dirname(fpath)
+    if dst_path == '':
+        dst_path = '.'
+
+    # remove the .py suffix
+    script_name = _os.path.basename(fpath)
+    if script_name.endswith('.py'):
+        mod_name = script_name[:-3]
+    else:
+        # Modules without the py extension don't work I believe,
+        # maybe raise an exception?
+        mod_name = script_name
+
+    _os.chdir(dst_path)
+    fhandle = None
+    try:
+        tup = _imp.find_module(mod_name, ['.'])
+        module = _imp.load_module(mod_name, *tup)
+        fhandle = tup[0]
+    finally:
+        _os.chdir(original_path)
+        if fhandle is not None:
+            fhandle.close()
+
+    return module
+
+
+
+
+
+
+def find_user_dir( particular_file='', return_file_or_home='file', accept_current_dir=True, create_if_not_exist=True ):
+    if not particular_file:  return_file_or_home='dir'
+
+    possible_paths = [os.environ[d] for d in ('home', 'USERPROFILE', 'homepath') if d in os.environ]
+    if accept_current_dir: possible_paths.append('.')
+
+    if particular_file:
+        def j(h):   return os.path.join(h, particular_file)
+        def i(h,f): return isfile( os.path.join(h,f) )
+        def r(h):
+            if return_file_or_home=='file':
+                return j(h)
+            else:
+                return h
+
+        for h in possible_paths:
+            if i(h, particular_file):
+                return r(h)
+
+        #if not found particular file in possible homes, then
+        for h in possible_paths:  # in this order!
+            if os.path.isdir(h):
+                if create_if_not_exist:
+                    touch( j(h) )
+                return r(h)
+
+        return r(h)  # even if file does not exist...
+
+    else:
+        if 'home' in os.environ:           h = os.environ['home']
+        elif 'USERPROFILE' in os.environ:  h = os.environ['USERPROFILE']
+        elif 'homepath' in os.environ:     h = os.environ['homepath']
+        else:                              h = '.'
+        return h
+
+    return ''
+
+
+user_dir = find_user_dir()
+logfile = os.path.join(user_dir, LOGFILE)
+
+
+
+
+def split_shell_args_safe(cmd):
+    if sys.platform in ['win32', 'win64', 'dos']:
+        return _cmdline2list(cmd)
+    else:
+        return split_shell_args(cmd)
+
+
+def _cmdline2list(cmdline):
+   """Build an argv list from a Microsoft shell style cmdline str
+
+    The reverse of list2cmdline that follows the same MS C runtime
+    rules.
+
+    Java's ProcessBuilder takes a List<String> cmdline that's joined
+    with a list2cmdline-like routine for Windows CreateProcess
+    (which takes a String cmdline). This process ruins String
+    cmdlines from the user with escapes or quotes. To avoid this we
+    first parse these cmdlines into an argv.
+
+    Runtime.exec(String) is too naive and useless for this case.
+    """
+   whitespace = ' \t'
+   # count of preceding '\'
+   bs_count = 0
+   in_quotes = False
+   arg = []
+   argv = []
+
+   for ch in cmdline:
+       if ch in whitespace and not in_quotes:
+           if arg:
+               # finalize arg and reset
+               argv.append(''.join(arg))
+               arg = []
+           bs_count = 0
+       elif ch == '\\':
+           arg.append(ch)
+           bs_count += 1
+       elif ch == '"':
+           if not bs_count % 2:
+               # Even number of '\' followed by a '"'. Place one
+               # '\' for every pair and treat '"' as a delimiter
+               if bs_count:
+                   del arg[-(bs_count / 2):]
+               in_quotes = not in_quotes
+           else:
+               # Odd number of '\' followed by a '"'. Place one '\'
+               # for every pair and treat '"' as an escape sequence
+               # by the remaining '\'
+               del arg[-(bs_count / 2 + 1):]
+               arg.append(ch)
+           bs_count = 0
+       else:
+           # regular char
+           arg.append(ch)
+           bs_count = 0
+
+   # A single trailing '"' delimiter yields an empty arg
+   if arg or in_quotes:
+       argv.append(''.join(arg))
+
+   return argv
+
+
+
+
+
+def clean_broken_closing_tags(html):
+    brokens = (r'<\s', r'</\s', r"</s'", r"</scr'", r"</sc'", r"</scri'", r"</scrip'", )
+    for s in brokens:
+        html=html.replace(s, '</broken_tag>')
+    return html
+
+
+def html2text(html, brutal=False):
+    #html = clean_broken_closing_tags(html)
+
+    if not brutal:
+        html = decode_html_to_correct_encoding(html)
+
+    try:
+        soup = BeautifulSoup(html)
+        # remove script and style
+        [e.extract() for e in soup.findAll('script')]
+        [e.extract() for e in soup.findAll('style')]
+        txt = soup.findAll(text=True)
+        if len(txt)>1 and 'DOCTYPE' in txt[0]: txt=txt[1:]
+        txt = u' '.join(txt)
+
+    except:
+        html = reg_script.sub(' ', html)
+        html = reg_style.sub(' ', html)
+        html = reg_comment.sub(' ', html)
+        txt = reg_html_tag.sub(' ', html)
+        #open('d:\\souperror.html','wb').write(html)
+        #import sys
+        #sys.exit()
+
+    return txt
+
+
+def get_html_headers(html, flat=False):
+    ''' flat=true: return a list of all titles.
+        false=false: return a dict of <h1>s, <h2>s, <h3>s'''
+
+    headers_dict = { 'h1': [], 'h2':[], 'h3': [], }
+    headers_array=[]
+    #html = clean_broken_closing_tags(html)
+    try:
+        for x in headers_dict:
+            hs = [unicode(h.strip()) for h in soup.findAll(x)]
+            headers_dict[x] = hs
+            headers_array.extend(hs)
+        soup = BeautifulSoup(html)
+
+    except:
+        for h in headers_dict:
+            reg=re.compile('<%s[^>]*?>([^<]*?)<' % h, flags)
+            titles = reg.findall(html)
+            titles = [t.strip() for t in titles]
+            headers_dict[h].extend(titles)
+            headers_array.extend(titles)
+
+    if flat:
+        return headers_array
+
+    return headers_dict
+
+
+
+def detect_encoding(x):
+    try:
+        # @todo  check if still this is a name of an existing file somehow
+        if os.path.isfile(x):
+            x = open(x,'rb').read()
+    except:
+        pass
+    ret = chardet.detect(x)
+    enc = ret['encoding']
+    prob = ret['confidence']
+    return enc
+
+
+
+def decode_html_to_correct_encoding(html, return_encoding=False):
+    '''
+        return_encoding:
+            if true, return the text encoding
+            if false, return html
+
+    '''
+
+    if len(html)<5 or type(html) is unicode : return html
+    reg = re.compile(r'<meta\s+http-equiv="\s*Content-Type\s*"\s+content="\s*text/html;\s*charset=([^"]+?)"', flags)
+    enc = DEFAULT_ENCODING
+    encs = reg.findall(html)
+    if encs:
+        enc = encs[0]
+    else:
+        ret = chardet.detect(html)
+        if ret['confidence']>0.6:
+            enc = ret['encoding']
+    if not enc:
+
+    html = unicode(html, enc, 'replace')
+    if return_encoding:
+        return enc
+
+    return html
+
+
+
+
+
+def decode_bencode(data):
+    '''Main function to decode bencoded data'''
+    chunks = list(data)
+    chunks.reverse()
+    root = _bencode_decode_chunk(chunks)
+    return root
+
+def _bencode_decode_chunk(chunks):
+    item = chunks.pop()
+
+    if item == 'd':
+        item = chunks.pop()
+        hash = {}
+        while item != 'e':
+            chunks.append(item)
+            key = _bencode_decode_chunk(chunks)
+            hash[key] = _bencode_decode_chunk(chunks)
+            item = chunks.pop()
+        return hash
+    elif item == 'l':
+        item = chunks.pop()
+        list = []
+        while item != 'e':
+            chunks.append(item)
+            list.append(_bencode_decode_chunk(chunks))
+            item = chunks.pop()
+        return list
+    elif item == 'i':
+        item = chunks.pop()
+        num = ''
+        while item != 'e':
+            num  += item
+            item = chunks.pop()
+        return int(num)
+    elif decimal_reg.search(item):
+        num = ''
+        while decimal_reg.search(item):
+            num += item
+            item = chunks.pop()
+        line = ''
+        for i in range(int(num)):
+            line += chunks.pop()
+        return line
+    raise "Invalid input!"
+
+
+
+
+def encode_command_arguments(cmd):
+    return [arg.encode(filesystem_encoding) if isinstance(arg,unicode) else arg for arg in cmd]
+
+
+def decode_command_arguments(cmd_list):
+    #cmd = [arg.decode(filesystem_encoding) if isinstance(arg, unicode) else arg for arg in cmd]
+    cmd = [unicode(arg, filesystem_encoding, 'replace') if not isinstance(arg, unicode) else arg for arg in cmd]
+    return cmd
+
+def decode_filename(fname):
+    if isinstance(fname, unicode) or type(fname)==unicode:
+        return fname
+    else:
+        #return fname.decode(filesystem_encoding)    # tends to fail
+        return unicode(s, filesystem_encoding, 'replace') # failsafe
+
+    return fname
+
+
+def intersection(first, *others):
+    return set(first).intersection(*others)
+
+
+
+def transient(msg="", title="", timeout=10, okname="Ok", ):
+    ''' auto-dismiss after X seconds '''
+
+    class MyDialog:
+        def __init__(self, parent):
+            top = self.top = Toplevel(parent)
+            Label(top, text=msg).pack()
+            self.e = Entry(top)
+            self.e.bind('<KeyPress>', self.keyb)
+            self.e.pack(padx=5)
+            b = Button(top, text=okname, command=self.ok)
+            b.bind('<KeyPress>', self.keyb)
+            b.pack(pady=5)
+            root.after(timeout*1000, self.zehu)
+            parent.title("zuk")
+            self.title("pppo")
+        def ok(self):
+            print "value is", self.e.get()
+            self.top.destroy()
+        def keyb(self, event):
+            if ord(event.char)==13:
+                self.ok()
+            elif ord(event.char)==27:
+                self.top.destroy()
+        def zehu(self, **a):
+            print a
+            self.top.destroy()
+            #
+            #
+    root = Tk()
+    root.withdraw()
+    d = MyDialog(root)
+    root.wait_window(d.top)
+
+
+def file_extension( f ):
+    ext = os.path.splitext(f)[1]
+    if ext and ext[0]=='.':
+        ext = ext[1:]
+    return ext
+
+
+def generate_html_header( title='index', header1='index', subtitle='hello' ):
+    style="body { background-color: black; color: #FFFF99; } a { color: yellow; }"
+    html = '''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"><HTML><HEAD><TITLE>TITLESTR</TITLE> <meta http-equiv="Content-type" content="text/html; charset=UTF-8"> <style>%s</style></HEAD><BODY><h3>SUBTITLESTR</h3><h1>HEADERSTR</h1>''' % style
+    html = html.replace( 'SUBTITLESTR', subtitle)
+    print type(title)
+    print title
+    html = html.replace( 'TITLESTR', title)
+    html = html.replace( 'HEADERSTR', header1)
+    return html
+
+
+def filelist_to_html_index( files, **params ):
+
+    html = generate_html_header( **params )
+    html += '<table border=0 align=center>'
+    for f in files:
+        basename = os.path.basename(f).encode('utf-8')
+        ext = file_extension(f).strip().lower()
+        fu = f.encode('utf-8')
+        fup = os.path.dirname(f).encode('utf-8')
+        if isfile(fu): fu = 'file://' + fu
+        if isdir(fup): fup = 'file://' + fup
+        fu = fu.replace('\\', '/')
+        fup = fup.replace('\\', '/')
+        thumbwidth, thumbheight = 90, 50
+        iconfile = 'icon-'+ext+'.gif'
+        if ext in ('png', 'gif', 'jpg', 'ico', 'jpeg', 'jpe', 'bmp', 'wmf', 'svg', 'tif' ):
+            icon = '<img border=0 src="%s" width="%d" height="%d" />' % (fu, thumbwidth, thumbheight)
+        elif ext in ('swf', 'avi', 'mpg', 'mov' ):
+            icon = '<embed src="%s" width="%d" height="%d"></embed>' % (fu, thumbwidth, thumbheight)
+        #elif isfile(iconfile):
+        #    iconfile=
+        #    icon=os.path.abspath(
+        else:
+            icon = ext
+
+        try:
+            line = '<tr><td width="10%%">%s</td><td><a href="%s">%s</a></td><td><a href="%s">%s</a></td></tr>\n' % (icon, fup, fup, fu, basename)
+            html+= line
+        except:
+            print 'encoding fucking error for file:' + fu
+    html +="</table></body></html>"
+    return html
+
+
+def reverse_dict(d):
+    newd = {}
+    for k in d:
+        newd[d[k]] = k
+    return newd
+
+
+def int2bin(n, count=24):
+    return "".join([str((n >> y) & 1) for y in range(count-1, -1, -1)])
+
+
+def latest_file_name(path, pattern):
+    ''' e.g.
+    latest_file_name( r'd:\temp\backup', 'run*.avi')
+    '''
+
+    from fnmatch import fnmatch
+    files = [ f for f in os.listdir(path) if fnmatch(f, pattern) ]
+    files.sort()
+    return os.path.join(path, files[-1])
+
+
+def convert_percent_chars(s):
+	import sys
+	import urllib
+	return urllib.url2pathname(a)
+
+def reverse_words(s):
+    words = s.split(' ')
+    words.reverse()
+    s = ' '.join(words)
+    return words
+
+
+
+
+
+
+def create_timestamp_file( suffix='', folder='.', prefix='', timestamp=None):
+    if not timestamp:
+        timestamp=now()
+    return abspath( join( folder, prefix + '_' + timestamp  + '_' + prefix ) )
+
+
+def now(with_ms=0, datesep='', timesep='', datetimesep=''):
+    pattern = '%Y'+datesep+'%m'+datesep+'%d'+datetimesep+'%H'+timesep+'%M'+timesep+'%S'
+    return time.strftime(pattern)
+
+
+
+
+
+def verify_dir_exists(dirname):
+    "If given directory doesn't exist, creates it (even if it's a deep subdir under nonexisting dirs)"
+    if not isdir(dirname):
+        os.makedirs(dirname)
+
+
+
+
+
+
+def regex_find_or_none(reg, in_string, flags=0):
+    ''' accepts string pattern or compiled regex '''
+    if type(reg) is str:
+        reg = re.compile(pattern, flags)
+    s = reg.search(in_string)
+    if not s:
+        return None
+    gs=s.groups()
+    if not gs:
+        return None
+    return gs[0]
+
+
+
+
+def open_any_file(f):
+    # --- depends on the OS,
+    # starts a file with its associated program
+    import subprocess, os
+    if sys.platform.startswith('darwin'):
+        subprocess.call(('open', f))
+    elif os.name == 'nt':
+        os.startfile(f)
+    elif os.name == 'posix':
+        subprocess.call(('xdg-open', f))
+
+def mmm():
+    print "ahhh"
+import re
+import xlrd
+import docx
+import sys
+import chardet
+try:
+    import win32com.client
+    xlApp = win32com.client.Dispatch("Excel.Application")
+except:
+    xlApp = False
+
+
+from library import decode_html_to_correct_encoding, html2text, is_unicode_file, SIMPLE
+flags = re.I+re.M+re.S
+
+
+def err(s):  
+    sys.stderr.write(unicode(s) + u'\n')
+
+
+def docx2text(f):
+    d = docx.opendocx(f)
+    text = '\n'.join( [ t for t in d.itertext()] )
+    ## Print our documnts test with two newlines under each paragraph
+    return unicode( text )
+
+
+def excel_html_to_text(f):
+    #todo
+    #html=open(f).read()
+    #html = decode_html_to_correct_encoding(html)
+    return html2text(open(f).read())
+
+
+def xls2tab(f, line_header=False):
+    line = open(f).readline()
+    if is_unicode_file(f, inspection_level=SIMPLE) or 'xmlns:o' in line or '<html' in line:
+        #oops, this is a disguised html page
+        return excel_html_to_text(f)
+    try:
+        wb = xlrd.open_workbook(f)
+    except:  # struct.error, NameError; 
+             #or check for file size (94152) not 512 + multiple of sector size (512)
+        err('error in excel - probably old format!')
+        return u''
+
+    err('sheet_count='+str(len(wb.sheets())))
+
+    lines=[]
+    for sheet in wb.sheets():
+        for r in range(sheet.nrows):
+            if line_header:
+                line = sheet.name
+            else:
+                line=''
+            for c in range(sheet.ncols):
+                val = sheet.cell_value(r,c)  
+                if type(val)==type('a'):
+                    val = val.replace('\n', '{NEWLINE}') 
+                #if val not in ['',]:
+                #    val = unicode( val  )
+                val = unicode( val  )
+                line += '\t' + val
+            lines.append(line)
+
+        lines.append('\n\n') # new sheet...
+
+    return unicode('\n'.join(lines))

persistant_dict.py

+'''
+
+persistant_dict.py
+
+contact:    https://bitbucket.org/yodeleyihu/eyfo
+license:    LGPL
+created:    2013-02-03
+version:    0.9 (beta)
+
+description:
+    a simple configuration file hack
+    reasons:
+        1. config file format should be readable by humans and kittens
+        2. performance should be fast
+        3. (no need to support more than 2-5kb)
+        4. i want the changes to be written onChange, so in case the computer crashes, the settings so far are still saved (that might fuck up ACIDity, but i can live with it)
+
+    why not (shelve, configobj, blah)
+        i don't like their syntax
+        they dont solve #4
+
+    why not filedict?
+        http://erez.wikidot.com/filedict-0-2-code
+        erez is saving everything to mysql db. this is excelent! just doesn't answer #1, and i don't care about scalability, it's just a small settings file
+
+    why not using a python module as settings file?
+        i'd like to, but it is more difficult to save values this way, especially solving #4 above.
+
+
+TODO:
+    use python module as settings file
+    or access keys via object-notation (yeah, some of you dont like it, but it's SO MUCH EASIER to code this way.
+    prevent save()s during update and update_missing loops!
+
+USAGE:
+
+    >>> conf = persistant_dict(fname)
+
+    that's it!
+    your values are read from fname, if it exist,
+    and if not, it is created now.
+
+    >>> conf['x'] = 'yyy'
+
+    that's it!
+    the dictionary is saved in fname!
+
+'''
+
+from os.path import expanduser, isdir, isfile, join, basename, abspath, splitext
+import json
+
+class persistant_dict(dict):
+
+    def __init__(self, fname):
+        self.fname=fname
+        if not isfile(fname):
+            open(fname,'w').write('{}')
+        else:
+            self.load()
+
+    def __setitem__(self, key, value):
+        x = super(persistant_dict, self).__setitem__(key, value)
+        self.save()
+        return x
+
+    def __delitem__(self, key):
+        x = super(persistant_dict, self).__delitem__(key)
+        self.save()
+        return x
+
+    def __missing__(self, key):
+        return None
+
+    def update(self, *args, **kwargs):
+        super(persistant_dict, self).update(*args, **kwargs)
+        self.save()
+
+    def update_missing(self, d):
+        ''' update values only if they do not exist in object to update '''
+        for k in d:
+            if k not in self:
+                #self.__setitem__( k, d[k] )
+                self[k] = d[k]
+        self.save()
+
+    # --- additions to dict ------
+
+    def save(self):
+        s = json.dumps(self, sort_keys=True, indent=4)
+        open(self.fname,'w').write(s)
+
+    def load(self):
+        try:
+            self.update(json.loads(open(self.fname).read()))
+        except:
+            print "Raise error in file format (it must be a JSON dictionary). Fix the file manually or delete it, and try again"
+