#!/usr/bin/env python3
import re
import sys
import magic
import shutil
import subprocess
import os.path as path
import pygit2 as git
import matplotlib.pyplot as plot
from itertools import chain
from datetime import datetime
DETEX = shutil.which("detex")
class MimeDispatcher():
def __init__(self):
self.mapping = {}
def __call__(self, entry, data):
mime = magic.from_buffer(data, mime=True).decode("utf-8")
fn = self.mapping.get(mime)
if fn:
return mime, fn(entry, data)
def register(self, mime, fn):
self.mapping[mime] = fn
def detex(bytes):
return subprocess.check_output([DETEX, "-n"], input=bytes, stderr=subprocess.DEVNULL)
def words(str):
return len(re.split(r"\s+", str))
def tex_middleware(fn):
def wrap(entry, data):
if DETEX:
data = detex(data)
return fn(entry, data)
return wrap
def count_handler(entry, data):
return words(data.decode("utf-8"))
handler = MimeDispatcher()
handler.register("text/x-tex", tex_middleware(count_handler))
def walk_tree(repo, tree):
queue = [tree]
for entry in chain.from_iterable(queue):
obj = repo.get(entry.id)
if obj.type == git.GIT_OBJ_TREE:
queue.append(obj)
else:
yield entry, obj.type, obj.data
def process_tree(repo, tree, handler):
for entry, type, data in walk_tree(repo, tree):
if type == git.GIT_OBJ_BLOB:
value = handler(entry, data)
if value:
yield value
def statistics(repo, handler):
for commit in repo.walk(repo.head.target, git.GIT_SORT_TIME | git.GIT_SORT_REVERSE):
time = datetime.utcfromtimestamp(commit.commit_time)
yield time, sum(v for m, v in process_tree(repo, commit.tree, handler))
def word_chart(repo, name, file):
dates, sums = zip(*list(statistics(repo, handler)))
plot.style.use("bmh")
fig = plot.figure()
fig.suptitle("Progress of {}".format(name), fontweight="bold")
ax = fig.add_subplot(1, 1, 1)
fig.autofmt_xdate()
ax.set_ylabel("Words")
ax.yaxis.grid(True)
ax.plot(dates, sums, marker="o", color="g")
plot.savefig(file, format="pdf")
def main(args):
if len(args) < 3:
print("Usage: {} repository output".format(args[0]), file=sys.stderr)
exit(1)
repo = git.Repository(args[1])
name = path.basename(path.normpath(repo.workdir))
if repo.is_empty:
print("{} is empty".format(name))
exit(1)
word_chart(repo, name, args[2])
if __name__ == "__main__":
main(sys.argv)