Commits

Jure Žbontar committed f0e3413

Reorganize directory structure

Comments (0)

Files changed (8)

gui_vim.py

-from subprocess import Popen, PIPE
-import os
-import glob
-import re
-import sqlite3
-import sys
-import vim
-import collections
-
-import ref
-
-
-def search_documents(query):
-    global last_select_cmd
-
-    if not query:
-        reload_main()
-        return
-
-    last_select_cmd = lambda: search_documents(query)
-    del main_buf[:]
-    for field, docs in ref.search_documents(headers, query, order):
-        docs = map(str_document, docs)
-        if docs:
-            heading = '# {}'.format(field.upper())
-            if len(main_buf) == 1:
-                main_buf[:] = [heading]
-            else:
-                main_buf.append('')
-                main_buf.append(heading)
-            main_buf[len(main_buf):] = docs
-
-
-def parse_info():
-    bibtex, rest, notes = '\n'.join(info_buf).split('\n---')
-    doc = ref.parse_bibtex(bibtex)
-    doc.update(dict(re.findall(r'(\w+)=(.*)', rest)))
-    doc['bibtex'] = bibtex
-    doc['docid'] = int(doc['docid'])
-    doc['notes'] = notes.strip()
-    doc.update(next(ref.select_documents(('filename',), (doc['docid'],))))
-    tags.update(doc['tags'].split('; '))
-    return doc
-
-
-def write_info(doc):
-    if not doc:
-        info_buf[:] = []
-        return
-    buf = (doc['bibtex'] or '').splitlines()
-    if not buf:
-        buf = ['@{', '  title=' + (doc['title'] or ''), '}']
-    buf.append('---')
-    for attr in ('docid', 'tags', 'rating'):
-        buf.append('{}={}'.format(attr, doc[attr] or ''))
-    buf.append('---')
-    buf.extend(doc['notes'].splitlines())
-    info_buf[:] = buf
-
-
-def save_info(doc):
-    ref.update_document(doc)
-    update_main((doc['docid'],))
-
-
-def get_docid(line):
-    try:
-        return int(line.split()[0])
-    except (ValueError, IndexError):
-        return None
-
-
-def str_document(doc):
-    return '  '.join(
-        (str(doc[h] or '')[:col_size[h]].ljust(col_size[h]) for h in headers))
-
-
-def selected_document():
-    docid = get_docid(main_buf[main_win.cursor[0] - 1])
-    if docid:
-        fields = headers + ('bibtex', 'tags', 'filename', 'notes')
-        docs = list(ref.select_documents(fields, (docid,)))
-        if docs:
-            return docs[0]
-
-
-def resize():
-    global col_size
-
-    info_win.height = 15
-    col_size = {'year': 4, 'docid': 5, 'rating': 2, 'author': 30}
-    col_size['title'] = main_win.width - sum(col_size.values()) - 2 * len(col_size)
-
-    update_main()
-
-
-def update_main(docids=None):
-    if not docids:
-        docids = filter(None, (get_docid(line) for line in main_buf))
-        if not docids:
-            return
-    cur = ref.select_documents(headers, docids)
-    docs = {doc['docid']: str_document(doc) for doc in cur}
-
-    for i, line in enumerate(main_buf):
-        id = get_docid(line)
-        if id in docs:
-            main_buf[i] = docs[id]
-
-
-def reload_main():
-    global last_select_cmd
-
-    last_select_cmd = reload_main
-    docs = list(map(str_document, ref.select_documents(headers, order=order)))
-    main_buf[:] = docs
-
-
-def fetch_bibtex():
-    doc = parse_info()
-    doc['bibtex'] = ref.fetch_bibtex(doc['title'])
-    doc.update(ref.parse_bibtex(doc['bibtex']))
-    save_info(doc)
-    write_info(doc)
-
-
-def open_document():
-    filename = selected_document()['filename']
-    Popen(['xdg-open', os.path.join(ref.DOCUMENT_DIR, filename)], stderr=PIPE, stdout=PIPE)
-
-
-def add_document(fname):
-    docid = ref.insert_document(fname)
-    if docid:
-        doc = next(ref.select_documents(headers, (docid,)))
-        main_buf[:0] = [str_document(doc)]
-    main_win.cursor = (1, 0)
-
-
-def export_bib(fname):
-    ref.export_bib(fname)
-
-
-def delete_document(lineFrom, lineTo):
-    if vim.current.buffer != main_buf:
-        print 'Deletion is only possible from the main buffer'
-        return
-    docids = set()
-    for line in main_buf[lineFrom - 1:lineTo]:
-        docid = get_docid(line)
-        ref.delete_document(docid)
-        docids.add(docid)
-
-    for i, line in enumerate(main_buf):
-        id = get_docid(line)
-        if id in docids:
-            del main_buf[i]
-
-
-def insert_tag(tag):
-    for i, line in enumerate(info_buf):
-        if line.startswith('tags='):
-            info_buf[i] += '{}; '.format(tag)
-    save_info(parse_info())
-
-
-def toggle_unread():
-    for i, line in enumerate(info_buf):
-        if line.startswith('rating='):
-            info_buf[i] = 'rating=' + ('' if info_buf[i].endswith('U') else 'U')
-    save_info(parse_info())
-    
-
-
-def order_documents(o):
-    global order
-
-    order = o
-    last_select_cmd()
-
-ref.init()
-
-order = 'docid DESC'
-headers = 'docid', 'rating', 'author', 'title', 'year'
-tags = ref.get_tags()
-col_size = {}
-
-c = vim.command
-c('set buftype=nofile')
-c('set bufhidden=hide')
-c('setlocal noswapfile')
-c('file main')
-main_buf, main_win = vim.current.buffer, vim.current.window
-c('below new info') 
-c('set buftype=nofile')
-c('set bufhidden=hide')
-c('setlocal noswapfile')
-info_buf, info_win = vim.current.buffer, vim.current.window
-c(':1winc w')
-
-resize()
-reload_main()
-#ref.check_filenames()
-
-c('autocmd CursorMoved main python write_info(selected_document())')
-c('autocmd BufLeave,VimLeave info python save_info(parse_info())')
-c('autocmd VimResized * python resize()')
-c('set cursorline')
-c('set wildmode=longest,list')
-c('map q :qa!<CR>')
-c('map <c-o> :python open_document()<CR>')
-c('map <c-u> :python toggle_unread()<CR>')
-c('map <c-w>o <NOP>')
-c('map // :Search ')
-c('com Fetch py fetch_bibtex()')
-c('com -nargs=1 -complete=customlist,Tag Tag py insert_tag("<args>")')
-c("com -nargs=? -complete=customlist,Tag Search py search_documents('''<args>''')")
-c('com -nargs=? -complete=customlist,Column Order py order_documents("<args>")')
-c('com -nargs=1 -complete=file Add py add_document("<args>")')
-c('com -nargs=1 -complete=file Export py export_bib("<args>")')
-c('com -range Delete py delete_document(<line1>, <line2>)')
-
-c('''function Tag(ArgLead, CmdLine, CursorPos)
-    python c('let xs = {}'.format(list(tags)))
-    return filter(xs, 'a:ArgLead == strpart(v:val, 0, strlen(a:ArgLead))')
-endfunction''')
-
-c('''function Column(ArgLead, CmdLine, CursorPos)
-    let xs = {}
-    return filter(xs, 'a:ArgLead == strpart(v:val, 0, strlen(a:ArgLead))')
-endfunction'''.format([h for h in headers]))
-

ref

-#! /bin/sh
-
-vim -c "python from gui_vim import *"

ref.py

-from subprocess import Popen, PIPE
-import collections
-import filecmp
-import htmlentitydefs
-import itertools
-import os
-import random
-import re
-import shutil
-import sqlite3
-import struct
-import sys
-import tempfile
-import time
-import urllib2
-import HTMLParser
-
-
-documents_fields = (
-    ('docid', 'INTEGER PRIMARY KEY'), ('tags', 'TEXT'), ('title', 'TEXT'), 
-    ('author', 'TEXT'), ('year', 'INTEGER'), ('rating', 'INTEGER'), 
-    ('journal', 'TEXT'), ('filename', 'TEXT'), ('notes', 'TEXT'), 
-    ('bibtex', 'TEXT')
-)
-
-
-def import_dir(dir):
-    for base in os.listdir(dir):
-        print base
-        insert_document(os.path.join(dir, base))
-
-
-def check_filenames():
-    filenames = set(os.listdir(DOCUMENT_DIR))
-    for row in con.execute('SELECT filename FROM documents'):
-        if row['filename'] not in filenames:
-            raise IOError('Filename not found ' + row['filename'])
-        filenames.remove(row['filename'])
-    for filename in filenames:
-        raise IOError('Filename not in database ' + filename)
-
-
-def create_tables():
-    c = con.execute("SELECT rowid FROM sqlite_master WHERE type='table'")
-    if len(c.fetchall()) == 0:
-        fields = ','.join(name + ' ' + type for name, type in documents_fields)
-        con.execute('CREATE TABLE documents ({})'.format(fields))
-        con.execute('CREATE VIRTUAL TABLE fulltext.fulltext USING fts4')
-
-
-def select_documents(fields, docids=None, order='docid DESC'):
-    sql = 'SELECT {} FROM documents'.format(','.join(fields))
-    if docids:
-       sql += ' WHERE docid in ({})'.format(','.join(map(str, docids)))
-    sql += ' ORDER BY ' + order
-    return con.execute(sql)
-
-
-def update_document(doc):
-    filename = get_filename(doc)
-    if doc['filename'] != filename:
-        src = os.path.join(DOCUMENT_DIR, doc['filename'])
-        dst = os.path.join(DOCUMENT_DIR, filename)
-        os.rename(src, dst)
-        doc['filename'] = filename
-
-    fs = ','.join(name + '=?' for name, _ in documents_fields[1:])
-    vs = [doc[name] for name, _ in documents_fields[1:]] + [doc['docid']]
-    try:
-        con.execute('SAVEPOINT update_document')
-        con.execute('UPDATE documents SET {} WHERE docid=?'.format(fs), vs)
-        con.execute('RELEASE SAVEPOINT update_document')
-    except:
-        con.execute('ROLLBACK TO update_document')
-        raise
-        
-class DuplicateError(Exception):
-    pass
-    
-def insert_document(fname, fetch=True):
-    if not os.path.isfile(fname):
-        raise IOError('{} is not a file'.format(fname))
-
-    for base2 in os.listdir(DOCUMENT_DIR):
-        fname2 = os.path.join(DOCUMENT_DIR, base2)
-        if filecmp.cmp(fname, fname2):
-            raise DuplicateError(base2)
-
-    ext = os.path.splitext(fname)[1]
-    extract_funs = {'.pdf': extract_pdf, '.chm': extract_chm, '.djvu': extract_djvu}
-    if ext not in extract_funs:
-        raise ValueError('Unsupported file type {}'.format(ext))
-
-    doc = collections.defaultdict(str)
-    title, doc['fulltext'] = extract_funs[ext](fname)
-    doc['title'] = title[:127]
-    doc['rating'] = 'U'
-    if fetch:
-        doc['bibtex'] = fetch_bibtex(doc['title'])
-        doc.update(parse_bibtex(doc['bibtex']))
-    
-    try:
-        con.execute('SAVEPOINT insert_document')
-        ft_c = con.execute('INSERT INTO fulltext VALUES (?)', (doc['fulltext'],))
-        c = con.execute('INSERT INTO documents DEFAULT VALUES')
-        assert c.lastrowid == ft_c.lastrowid
-
-        doc['docid'] = c.lastrowid
-        doc['filename'] = fname  # setup arguments for get_filename
-        doc['filename'] = get_filename(doc)
-        update_document(doc)
-        shutil.copy(fname, os.path.join(DOCUMENT_DIR, doc['filename']))
-        con.execute('RELEASE SAVEPOINT insert_document')
-    except:
-        con.execute('ROLLBACK TO insert_document')
-        raise
-
-    return doc['docid']
-
-
-def delete_document(docid):
-    doc = next(select_documents(('docid', 'filename'), (docid,)))
-    try:
-        con.execute('BEGIN')
-        con.execute('DELETE FROM documents WHERE docid=?', (doc['docid'],))
-        con.execute('DELETE FROM fulltext WHERE docid=?', (doc['docid'],))
-        os.remove(os.path.join(DOCUMENT_DIR, doc['filename']))
-        con.execute('COMMIT')
-    except:
-        con.execute('ROLLBACK')
-        raise
-
-
-def search_documents(fields, query, order='docid DESC'):
-    res = []
-    for field in ('tags', 'title', 'author', 'journal', 'notes'):
-        cur = con.execute('''SELECT {} FROM documents WHERE {} LIKE ? 
-            ORDER BY {}'''.format(','.join(fields), field, order), 
-            ('%' + query + '%',))
-        res.append((field, cur))
-    cur = con.execute('''SELECT {} FROM documents JOIN 
-        (SELECT docid FROM fulltext WHERE content MATCH ?)
-        USING(docid) ORDER BY {}'''.format(','.join(fields), order), (query,))
-    res.append(('fulltext', cur))
-    return res
-        
-
-def get_filename(doc):
-    if doc['author'].count(', ') > 2:
-        author = doc['author'].split(', ')[0] + ' et al'
-    else:
-        author = doc['author']
-    fields = (author, doc['year'], doc['title'], doc['docid']) 
-    filename = ' - '.join(re.sub(r'[^-\w,. ]', '', str(f)) for f in fields if f)
-    filename += os.path.splitext(doc['filename'])[1]
-    return filename
-
-
-def parse_bibtex(bibtex):
-    d = collections.defaultdict(str)
-    reg = r'^\s*(\w+)\s*=\s*{*(.+?)}*,?$'
-    d.update(dict(re.findall(reg, bibtex, re.MULTILINE)))
-    for k, v in d.items():
-        d[k] = re.sub(r'[\'"{}\\=]', '', v)
-    d['author'] = ', '.join(a.split(',')[0] for a in d['author'].split(' and '))
-    if 'journal' not in d:
-        d['journal'] = d.get('booktitle', '')
-    return d
-
-
-def get_tags():
-    tags = set()
-    for row in con.execute('SELECT tags FROM documents'):
-        tags.update(tag for tag in row['tags'].split(';') if tag)
-    return tags
-
-
-def extract_djvu(fname):
-    fulltext = Popen(['djvutxt', fname], stdout=PIPE).communicate()[0]
-    title = re.match(r'(.*?)\n\n', fulltext, re.DOTALL).group(0)
-    title = re.sub(r'\s+', ' ', title).strip()
-    if len(title) > 100:
-        title = ''
-    return title, fulltext
-
-
-def extract_chm(fname):
-    dir = tempfile.mkdtemp(prefix='ref.')
-    Popen(['extract_chmLib', fname, dir], stdout=PIPE).communicate()
-    for base in os.listdir(dir):
-        name, ext = os.path.splitext(base)
-        if ext == '.hhc':
-            hhc = open(os.path.join(dir, base)).read()
-            title = re.search(r'name="Name" value="([^"]+)"', hhc).group(1)
-            fulltext = ''
-            for html in re.findall(r'"({}/[^"]+)"'.format(name), hhc):
-                fulltext += striptags(open(os.path.join(dir, html)).read())
-            break
-    shutil.rmtree(dir)
-    return title, fulltext
-        
-
-def extract_pdf(fname):
-    cmd = ['pdftotext', '-enc', 'ASCII7', fname, '-']
-    fulltext = Popen(cmd, stdout=PIPE).communicate()[0]
-
-    cmd = ['pdftohtml', '-enc', 'ASCII7', '-xml', '-stdout', '-l', '3', fname]
-    xml = Popen(cmd, stdout=PIPE).communicate()[0]
-
-    fontspec = re.findall(r'<fontspec id="([^"]+)" size="([^"]+)"', xml)
-    font_size = {id: int(size) for id, size in fontspec}
-
-    chunks = []
-    for id, text in re.findall(r'font="([^"]+)">(.*)</text>', xml):
-        chunks.append((font_size[id], id, striptags(text).strip()))
-
-    groups = []
-    for size_id, group in itertools.groupby(chunks, lambda xs: xs[:2]):
-        size, id = size_id
-        text_size = size + text.startswith('<b>') * 0.5
-        groups.append((text_size, list(group)))
-
-    title = ''
-    for _, group in sorted(groups, key=lambda xs: xs[0], reverse=True):
-        title = ' '.join(map(lambda xs: xs[2], group)).strip()
-        bad = ('abstract', 'introduction', 'relatedwork', 'originalpaper', 'bioinformatics')
-        if len(title) >= 5 and re.sub(r'[\d\s]', '', title).lower() not in bad:
-            break
-    return title, fulltext
-
-
-def fetch_bibtex(title):
-    try:
-        url = '/scholar?q=allintitle:' + urllib2.quote(title)
-        match = re.search(r'<a href="(/scholar.bib[^"]+)', scholar_read(url))
-        if not match:
-            raise ValueError('Title not found')
-        return scholar_read(match.group(1))
-    except urllib2.HTTPError:
-        return '@{{\n  title={}\n}}\n'.format(title)
-
-
-def delay(n, interval):
-    def decorator(f):
-        call_times = collections.deque()
-        def helper(*args):
-            if len(call_times) == n:
-                time.sleep(max(0, interval + call_times.pop() - time.time()))
-            call_times.appendleft(time.time())
-            return f(*args)
-        return helper
-    return decorator
-
-
-@delay(2, 3)
-def scholar_read(url):
-    id = ''.join(random.choice('0123456789abcdef') for i in range(16))
-    cookie = 'GSP=ID={}:CF=4;'.format(id)
-    h = {'User-agent': 'Mozilla/5.0', 'Cookie': cookie}
-    req = urllib2.Request('http://scholar.google.com' + url, headers=h)
-    return unescape(urllib2.urlopen(req).read().decode('utf8')).encode('utf8')
-
-
-def striptags(html):
-    return unescape(re.sub(r'<[^>]+>', '', html))
-
-
-unescape = HTMLParser.HTMLParser().unescape
-
-
-def export_bib(fname):
-    rows = con.execute('SELECT bibtex FROM documents')
-    open(fname, 'w').write('\n\n'.join(row['bibtex'] for row in rows))
-
-
-def init(base_dir=os.path.expanduser('~/.ref')):
-    global con, BASE_DIR, DOCUMENT_DIR
-
-    BASE_DIR = base_dir
-    DOCUMENT_DIR = os.path.join(BASE_DIR, 'documents')
-
-    for dir in (BASE_DIR, DOCUMENT_DIR):
-        if not os.path.exists(dir):
-            os.mkdir(dir)
-     
-    con = sqlite3.connect(os.path.join(BASE_DIR, 'documents.sqlite3'))
-    con.isolation_level = None
-    con.row_factory = sqlite3.Row
-    con.text_factory = str
-    con.execute("ATTACH '{}' as fulltext".format(os.path.join(BASE_DIR, 'fulltext.sqlite3')))
-    create_tables()
     url='https://bitbucket.org/jzbontar/ref/',
     description='A usable reference manager.',
     py_modules=['ref', 'gui_vim'],
-    scripts=['ref'],
+    package_dir={'': 'src'},
+    scripts=['src/ref'],
     cmdclass={'test': TestCommand})
+from subprocess import Popen, PIPE
+import os
+import glob
+import re
+import sqlite3
+import sys
+import vim
+import collections
+
+import ref
+
+
+def search_documents(query):
+    global last_select_cmd
+
+    if not query:
+        reload_main()
+        return
+
+    last_select_cmd = lambda: search_documents(query)
+    del main_buf[:]
+    for field, docs in ref.search_documents(headers, query, order):
+        docs = map(str_document, docs)
+        if docs:
+            heading = '# {}'.format(field.upper())
+            if len(main_buf) == 1:
+                main_buf[:] = [heading]
+            else:
+                main_buf.append('')
+                main_buf.append(heading)
+            main_buf[len(main_buf):] = docs
+
+
+def parse_info():
+    bibtex, rest, notes = '\n'.join(info_buf).split('\n---')
+    doc = ref.parse_bibtex(bibtex)
+    doc.update(dict(re.findall(r'(\w+)=(.*)', rest)))
+    doc['bibtex'] = bibtex
+    doc['docid'] = int(doc['docid'])
+    doc['notes'] = notes.strip()
+    doc.update(next(ref.select_documents(('filename',), (doc['docid'],))))
+    tags.update(doc['tags'].split('; '))
+    return doc
+
+
+def write_info(doc):
+    if not doc:
+        info_buf[:] = []
+        return
+    buf = (doc['bibtex'] or '').splitlines()
+    if not buf:
+        buf = ['@{', '  title=' + (doc['title'] or ''), '}']
+    buf.append('---')
+    for attr in ('docid', 'tags', 'rating'):
+        buf.append('{}={}'.format(attr, doc[attr] or ''))
+    buf.append('---')
+    buf.extend(doc['notes'].splitlines())
+    info_buf[:] = buf
+
+
+def save_info(doc):
+    ref.update_document(doc)
+    update_main((doc['docid'],))
+
+
+def get_docid(line):
+    try:
+        return int(line.split()[0])
+    except (ValueError, IndexError):
+        return None
+
+
+def str_document(doc):
+    return '  '.join(
+        (str(doc[h] or '')[:col_size[h]].ljust(col_size[h]) for h in headers))
+
+
+def selected_document():
+    docid = get_docid(main_buf[main_win.cursor[0] - 1])
+    if docid:
+        fields = headers + ('bibtex', 'tags', 'filename', 'notes')
+        docs = list(ref.select_documents(fields, (docid,)))
+        if docs:
+            return docs[0]
+
+
+def resize():
+    global col_size
+
+    info_win.height = 15
+    col_size = {'year': 4, 'docid': 5, 'rating': 2, 'author': 30}
+    col_size['title'] = main_win.width - sum(col_size.values()) - 2 * len(col_size)
+
+    update_main()
+
+
+def update_main(docids=None):
+    if not docids:
+        docids = filter(None, (get_docid(line) for line in main_buf))
+        if not docids:
+            return
+    cur = ref.select_documents(headers, docids)
+    docs = {doc['docid']: str_document(doc) for doc in cur}
+
+    for i, line in enumerate(main_buf):
+        id = get_docid(line)
+        if id in docs:
+            main_buf[i] = docs[id]
+
+
+def reload_main():
+    global last_select_cmd
+
+    last_select_cmd = reload_main
+    docs = list(map(str_document, ref.select_documents(headers, order=order)))
+    main_buf[:] = docs
+
+
+def fetch_bibtex():
+    doc = parse_info()
+    doc['bibtex'] = ref.fetch_bibtex(doc['title'])
+    doc.update(ref.parse_bibtex(doc['bibtex']))
+    save_info(doc)
+    write_info(doc)
+
+
+def open_document():
+    filename = selected_document()['filename']
+    Popen(['xdg-open', os.path.join(ref.DOCUMENT_DIR, filename)], stderr=PIPE, stdout=PIPE)
+
+
+def add_document(fname):
+    docid = ref.insert_document(fname)
+    if docid:
+        doc = next(ref.select_documents(headers, (docid,)))
+        main_buf[:0] = [str_document(doc)]
+    main_win.cursor = (1, 0)
+
+
+def export_bib(fname):
+    ref.export_bib(fname)
+
+
+def delete_document(lineFrom, lineTo):
+    if vim.current.buffer != main_buf:
+        print 'Deletion is only possible from the main buffer'
+        return
+    docids = set()
+    for line in main_buf[lineFrom - 1:lineTo]:
+        docid = get_docid(line)
+        ref.delete_document(docid)
+        docids.add(docid)
+
+    for i, line in enumerate(main_buf):
+        id = get_docid(line)
+        if id in docids:
+            del main_buf[i]
+
+
+def insert_tag(tag):
+    for i, line in enumerate(info_buf):
+        if line.startswith('tags='):
+            info_buf[i] += '{}; '.format(tag)
+    save_info(parse_info())
+
+
+def toggle_unread():
+    for i, line in enumerate(info_buf):
+        if line.startswith('rating='):
+            info_buf[i] = 'rating=' + ('' if info_buf[i].endswith('U') else 'U')
+    save_info(parse_info())
+    
+
+
+def order_documents(o):
+    global order
+
+    order = o
+    last_select_cmd()
+
+ref.init()
+
+order = 'docid DESC'
+headers = 'docid', 'rating', 'author', 'title', 'year'
+tags = ref.get_tags()
+col_size = {}
+
+c = vim.command
+c('set buftype=nofile')
+c('set bufhidden=hide')
+c('setlocal noswapfile')
+c('file main')
+main_buf, main_win = vim.current.buffer, vim.current.window
+c('below new info') 
+c('set buftype=nofile')
+c('set bufhidden=hide')
+c('setlocal noswapfile')
+info_buf, info_win = vim.current.buffer, vim.current.window
+c(':1winc w')
+
+resize()
+reload_main()
+#ref.check_filenames()
+
+c('autocmd CursorMoved main python write_info(selected_document())')
+c('autocmd BufLeave,VimLeave info python save_info(parse_info())')
+c('autocmd VimResized * python resize()')
+c('set cursorline')
+c('set wildmode=longest,list')
+c('map q :qa!<CR>')
+c('map <c-o> :python open_document()<CR>')
+c('map <c-u> :python toggle_unread()<CR>')
+c('map <c-w>o <NOP>')
+c('map // :Search ')
+c('com Fetch py fetch_bibtex()')
+c('com -nargs=1 -complete=customlist,Tag Tag py insert_tag("<args>")')
+c("com -nargs=? -complete=customlist,Tag Search py search_documents('''<args>''')")
+c('com -nargs=? -complete=customlist,Column Order py order_documents("<args>")')
+c('com -nargs=1 -complete=file Add py add_document("<args>")')
+c('com -nargs=1 -complete=file Export py export_bib("<args>")')
+c('com -range Delete py delete_document(<line1>, <line2>)')
+
+c('''function Tag(ArgLead, CmdLine, CursorPos)
+    python c('let xs = {}'.format(list(tags)))
+    return filter(xs, 'a:ArgLead == strpart(v:val, 0, strlen(a:ArgLead))')
+endfunction''')
+
+c('''function Column(ArgLead, CmdLine, CursorPos)
+    let xs = {}
+    return filter(xs, 'a:ArgLead == strpart(v:val, 0, strlen(a:ArgLead))')
+endfunction'''.format([h for h in headers]))
+
+#! /bin/sh
+
+vim -c "python from gui_vim import *"
+from subprocess import Popen, PIPE
+import collections
+import filecmp
+import htmlentitydefs
+import itertools
+import os
+import random
+import re
+import shutil
+import sqlite3
+import struct
+import sys
+import tempfile
+import time
+import urllib2
+import HTMLParser
+
+
+documents_fields = (
+    ('docid', 'INTEGER PRIMARY KEY'), ('tags', 'TEXT'), ('title', 'TEXT'), 
+    ('author', 'TEXT'), ('year', 'INTEGER'), ('rating', 'INTEGER'), 
+    ('journal', 'TEXT'), ('filename', 'TEXT'), ('notes', 'TEXT'), 
+    ('bibtex', 'TEXT')
+)
+
+
+def import_dir(dir):
+    for base in os.listdir(dir):
+        print base
+        insert_document(os.path.join(dir, base))
+
+
+def check_filenames():
+    filenames = set(os.listdir(DOCUMENT_DIR))
+    for row in con.execute('SELECT filename FROM documents'):
+        if row['filename'] not in filenames:
+            raise IOError('Filename not found ' + row['filename'])
+        filenames.remove(row['filename'])
+    for filename in filenames:
+        raise IOError('Filename not in database ' + filename)
+
+
+def create_tables():
+    c = con.execute("SELECT rowid FROM sqlite_master WHERE type='table'")
+    if len(c.fetchall()) == 0:
+        fields = ','.join(name + ' ' + type for name, type in documents_fields)
+        con.execute('CREATE TABLE documents ({})'.format(fields))
+        con.execute('CREATE VIRTUAL TABLE fulltext.fulltext USING fts4')
+
+
+def select_documents(fields, docids=None, order='docid DESC'):
+    sql = 'SELECT {} FROM documents'.format(','.join(fields))
+    if docids:
+       sql += ' WHERE docid in ({})'.format(','.join(map(str, docids)))
+    sql += ' ORDER BY ' + order
+    return con.execute(sql)
+
+
+def update_document(doc):
+    filename = get_filename(doc)
+    if doc['filename'] != filename:
+        src = os.path.join(DOCUMENT_DIR, doc['filename'])
+        dst = os.path.join(DOCUMENT_DIR, filename)
+        os.rename(src, dst)
+        doc['filename'] = filename
+
+    fs = ','.join(name + '=?' for name, _ in documents_fields[1:])
+    vs = [doc[name] for name, _ in documents_fields[1:]] + [doc['docid']]
+    try:
+        con.execute('SAVEPOINT update_document')
+        con.execute('UPDATE documents SET {} WHERE docid=?'.format(fs), vs)
+        con.execute('RELEASE SAVEPOINT update_document')
+    except:
+        con.execute('ROLLBACK TO update_document')
+        raise
+        
+class DuplicateError(Exception):
+    pass
+    
+def insert_document(fname, fetch=True):
+    if not os.path.isfile(fname):
+        raise IOError('{} is not a file'.format(fname))
+
+    for base2 in os.listdir(DOCUMENT_DIR):
+        fname2 = os.path.join(DOCUMENT_DIR, base2)
+        if filecmp.cmp(fname, fname2):
+            raise DuplicateError(base2)
+
+    ext = os.path.splitext(fname)[1]
+    extract_funs = {'.pdf': extract_pdf, '.chm': extract_chm, '.djvu': extract_djvu}
+    if ext not in extract_funs:
+        raise ValueError('Unsupported file type {}'.format(ext))
+
+    doc = collections.defaultdict(str)
+    title, doc['fulltext'] = extract_funs[ext](fname)
+    doc['title'] = title[:127]
+    doc['rating'] = 'U'
+    if fetch:
+        doc['bibtex'] = fetch_bibtex(doc['title'])
+        doc.update(parse_bibtex(doc['bibtex']))
+    
+    try:
+        con.execute('SAVEPOINT insert_document')
+        ft_c = con.execute('INSERT INTO fulltext VALUES (?)', (doc['fulltext'],))
+        c = con.execute('INSERT INTO documents DEFAULT VALUES')
+        assert c.lastrowid == ft_c.lastrowid
+
+        doc['docid'] = c.lastrowid
+        doc['filename'] = fname  # setup arguments for get_filename
+        doc['filename'] = get_filename(doc)
+        update_document(doc)
+        shutil.copy(fname, os.path.join(DOCUMENT_DIR, doc['filename']))
+        con.execute('RELEASE SAVEPOINT insert_document')
+    except:
+        con.execute('ROLLBACK TO insert_document')
+        raise
+
+    return doc['docid']
+
+
+def delete_document(docid):
+    doc = next(select_documents(('docid', 'filename'), (docid,)))
+    try:
+        con.execute('BEGIN')
+        con.execute('DELETE FROM documents WHERE docid=?', (doc['docid'],))
+        con.execute('DELETE FROM fulltext WHERE docid=?', (doc['docid'],))
+        os.remove(os.path.join(DOCUMENT_DIR, doc['filename']))
+        con.execute('COMMIT')
+    except:
+        con.execute('ROLLBACK')
+        raise
+
+
+def search_documents(fields, query, order='docid DESC'):
+    res = []
+    for field in ('tags', 'title', 'author', 'journal', 'notes'):
+        cur = con.execute('''SELECT {} FROM documents WHERE {} LIKE ? 
+            ORDER BY {}'''.format(','.join(fields), field, order), 
+            ('%' + query + '%',))
+        res.append((field, cur))
+    cur = con.execute('''SELECT {} FROM documents JOIN 
+        (SELECT docid FROM fulltext WHERE content MATCH ?)
+        USING(docid) ORDER BY {}'''.format(','.join(fields), order), (query,))
+    res.append(('fulltext', cur))
+    return res
+        
+
+def get_filename(doc):
+    if doc['author'].count(', ') > 2:
+        author = doc['author'].split(', ')[0] + ' et al'
+    else:
+        author = doc['author']
+    fields = (author, doc['year'], doc['title'], doc['docid']) 
+    filename = ' - '.join(re.sub(r'[^-\w,. ]', '', str(f)) for f in fields if f)
+    filename += os.path.splitext(doc['filename'])[1]
+    return filename
+
+
+def parse_bibtex(bibtex):
+    d = collections.defaultdict(str)
+    reg = r'^\s*(\w+)\s*=\s*{*(.+?)}*,?$'
+    d.update(dict(re.findall(reg, bibtex, re.MULTILINE)))
+    for k, v in d.items():
+        d[k] = re.sub(r'[\'"{}\\=]', '', v)
+    d['author'] = ', '.join(a.split(',')[0] for a in d['author'].split(' and '))
+    if 'journal' not in d:
+        d['journal'] = d.get('booktitle', '')
+    return d
+
+
+def get_tags():
+    tags = set()
+    for row in con.execute('SELECT tags FROM documents'):
+        tags.update(tag for tag in row['tags'].split(';') if tag)
+    return tags
+
+
+def extract_djvu(fname):
+    fulltext = Popen(['djvutxt', fname], stdout=PIPE).communicate()[0]
+    title = re.match(r'(.*?)\n\n', fulltext, re.DOTALL).group(0)
+    title = re.sub(r'\s+', ' ', title).strip()
+    if len(title) > 100:
+        title = ''
+    return title, fulltext
+
+
+def extract_chm(fname):
+    dir = tempfile.mkdtemp(prefix='ref.')
+    Popen(['extract_chmLib', fname, dir], stdout=PIPE).communicate()
+    for base in os.listdir(dir):
+        name, ext = os.path.splitext(base)
+        if ext == '.hhc':
+            hhc = open(os.path.join(dir, base)).read()
+            title = re.search(r'name="Name" value="([^"]+)"', hhc).group(1)
+            fulltext = ''
+            for html in re.findall(r'"({}/[^"]+)"'.format(name), hhc):
+                fulltext += striptags(open(os.path.join(dir, html)).read())
+            break
+    shutil.rmtree(dir)
+    return title, fulltext
+        
+
+def extract_pdf(fname):
+    cmd = ['pdftotext', '-enc', 'ASCII7', fname, '-']
+    fulltext = Popen(cmd, stdout=PIPE).communicate()[0]
+
+    cmd = ['pdftohtml', '-enc', 'ASCII7', '-xml', '-stdout', '-l', '3', fname]
+    xml = Popen(cmd, stdout=PIPE).communicate()[0]
+
+    fontspec = re.findall(r'<fontspec id="([^"]+)" size="([^"]+)"', xml)
+    font_size = {id: int(size) for id, size in fontspec}
+
+    chunks = []
+    for id, text in re.findall(r'font="([^"]+)">(.*)</text>', xml):
+        chunks.append((font_size[id], id, striptags(text).strip()))
+
+    groups = []
+    for size_id, group in itertools.groupby(chunks, lambda xs: xs[:2]):
+        size, id = size_id
+        text_size = size + text.startswith('<b>') * 0.5
+        groups.append((text_size, list(group)))
+
+    title = ''
+    for _, group in sorted(groups, key=lambda xs: xs[0], reverse=True):
+        title = ' '.join(map(lambda xs: xs[2], group)).strip()
+        bad = ('abstract', 'introduction', 'relatedwork', 'originalpaper', 'bioinformatics')
+        if len(title) >= 5 and re.sub(r'[\d\s]', '', title).lower() not in bad:
+            break
+    return title, fulltext
+
+
+def fetch_bibtex(title):
+    try:
+        url = '/scholar?q=allintitle:' + urllib2.quote(title)
+        match = re.search(r'<a href="(/scholar.bib[^"]+)', scholar_read(url))
+        if not match:
+            raise ValueError('Title not found')
+        return scholar_read(match.group(1))
+    except urllib2.HTTPError:
+        return '@{{\n  title={}\n}}\n'.format(title)
+
+
+def delay(n, interval):
+    def decorator(f):
+        call_times = collections.deque()
+        def helper(*args):
+            if len(call_times) == n:
+                time.sleep(max(0, interval + call_times.pop() - time.time()))
+            call_times.appendleft(time.time())
+            return f(*args)
+        return helper
+    return decorator
+
+
+@delay(2, 3)
+def scholar_read(url):
+    id = ''.join(random.choice('0123456789abcdef') for i in range(16))
+    cookie = 'GSP=ID={}:CF=4;'.format(id)
+    h = {'User-agent': 'Mozilla/5.0', 'Cookie': cookie}
+    req = urllib2.Request('http://scholar.google.com' + url, headers=h)
+    return unescape(urllib2.urlopen(req).read().decode('utf8')).encode('utf8')
+
+
+def striptags(html):
+    return unescape(re.sub(r'<[^>]+>', '', html))
+
+
+unescape = HTMLParser.HTMLParser().unescape
+
+
+def export_bib(fname):
+    rows = con.execute('SELECT bibtex FROM documents')
+    open(fname, 'w').write('\n\n'.join(row['bibtex'] for row in rows))
+
+
+def init(base_dir=os.path.expanduser('~/.ref')):
+    global con, BASE_DIR, DOCUMENT_DIR
+
+    BASE_DIR = base_dir
+    DOCUMENT_DIR = os.path.join(BASE_DIR, 'documents')
+
+    for dir in (BASE_DIR, DOCUMENT_DIR):
+        if not os.path.exists(dir):
+            os.mkdir(dir)
+     
+    con = sqlite3.connect(os.path.join(BASE_DIR, 'documents.sqlite3'))
+    con.isolation_level = None
+    con.row_factory = sqlite3.Row
+    con.text_factory = str
+    con.execute("ATTACH '{}' as fulltext".format(os.path.join(BASE_DIR, 'fulltext.sqlite3')))
+    create_tables()
 import re
 import filecmp
 import textwrap
+import sys
 from time import time
 from random import randint, choice, sample, random
 from pprint import pprint
 
+sys.path.append('../src')
 import ref
 
 class Test(unittest.TestCase):
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.