Snippets

Samuel Laurén Writing progress

Created by Samuel Laurén last modified
#!/usr/bin/env python3

import re
import sys
import magic
import shutil
import subprocess
import os.path as path
import pygit2 as git
import matplotlib.pyplot as plot
from itertools import chain
from datetime import datetime

DETEX = shutil.which("detex")

class MimeDispatcher():
	def __init__(self):
		self.mapping = {}

	def __call__(self, entry, data):
		mime = magic.from_buffer(data, mime=True).decode("utf-8")
		fn = self.mapping.get(mime)
		if fn:
			return mime, fn(entry, data)
	
	def register(self, mime, fn):
		self.mapping[mime] = fn

def detex(bytes):
	return subprocess.check_output([DETEX, "-n"], input=bytes, stderr=subprocess.DEVNULL)

def words(str):
	return len(re.split(r"\s+", str))

def tex_middleware(fn):
	def wrap(entry, data):
		if DETEX:
			data = detex(data)
		return fn(entry, data)
	return wrap

def count_handler(entry, data):
	return words(data.decode("utf-8"))

handler = MimeDispatcher()
handler.register("text/x-tex", tex_middleware(count_handler))

def walk_tree(repo, tree):
	queue = [tree]
	for entry in chain.from_iterable(queue):
		obj = repo.get(entry.id)
		if obj.type == git.GIT_OBJ_TREE:
			queue.append(obj)
		else:
			yield entry, obj.type, obj.data

def process_tree(repo, tree, handler):
	for entry, type, data in walk_tree(repo, tree):
		if type == git.GIT_OBJ_BLOB:
			value = handler(entry, data)
			if value:
				yield value
			
def statistics(repo, handler):
	for commit in repo.walk(repo.head.target, git.GIT_SORT_TIME | git.GIT_SORT_REVERSE):
		time = datetime.utcfromtimestamp(commit.commit_time)
		yield time, sum(v for m, v in process_tree(repo, commit.tree, handler))

def word_chart(repo, name, file):
	dates, sums = zip(*list(statistics(repo, handler)))
	plot.style.use("bmh")
	fig = plot.figure()
	fig.suptitle("Progress of {}".format(name), fontweight="bold")
	ax = fig.add_subplot(1, 1, 1)
	fig.autofmt_xdate()
	ax.set_ylabel("Words")
	ax.yaxis.grid(True)
	ax.plot(dates, sums, marker="o", color="g")
	plot.savefig(file, format="pdf")

def main(args):
	if len(args) < 3:
		print("Usage: {} repository output".format(args[0]), file=sys.stderr)
		exit(1)
	repo = git.Repository(args[1])
	name = path.basename(path.normpath(repo.workdir))
	if repo.is_empty:
		print("{} is empty".format(name))
		exit(1)
	word_chart(repo, name, args[2])

if __name__ == "__main__":
	main(sys.argv)

Comments (0)

HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.