Source

pycon2013 / ana.py

#!/usr/bin/env python

import re
from collections import Counter
from itertools import combinations


def tokenize(text):
    return set(
        w.lower()
        for w in re.findall('[a-z]+', text, re.I)
        if len(w) >= 4)


def histogram(w1, w2):
    return Counter(w1 + w2)


def is_anagram(w1, w2, w3, w4):
    return histogram(w1, w2) == histogram(w3, w4)


def find_2anagrams(words):
    for w1, w2 in combinations(words, 2):
        for w3, w4 in combinations(words - {w1, w2}, 2):
            if is_anagram(w1, w2, w3, w4):
                yield w1, w2, w3, w4


def process(fo):
    text = fo.read()
    words = tokenize(text)
    seen = set()
    for w1, w2, w3, w4 in find_2anagrams(words):
        wset = {w1, w2, w3, w4}
        if seen & wset:
            continue
        print('{} {}, {} {}'.format(w1, w2, w3, w4))
        seen |= wset


if __name__ == '__main__':
    from sys import stdin
    process(stdin)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.