Commits

Brendan Howell committed 0ed3231

cleanups and CFG experiment

  • Participants
  • Parent commits d896de6

Comments (0)

Files changed (6)

File danja.pdf

Binary file added.

File gordo.pdf

Binary file added.
+#paranoid text generator
+import random
+import nltk
+import glob
+import mailbox
+
+print "tagging Poe"
+poe = open("purloined.txt").read()
+poe = nltk.word_tokenize(poe)
+poe = nltk.pos_tag(poe)
+
+esc_chars = ["\\", '#','$','%','^','&','_','{',"}","~"]
+
+train_txt = ""
+
+username = "gordo"
+
+outfile = open(username + ".tex", "w")
+
+
+#train from email
+for archive in glob.glob("w7*"):
+    mbox = mailbox.mbox(archive)
+    for msg in mbox:
+        if username in msg["from"]:
+            for part in msg.walk():
+                if part.get_content_type() == "text/plain":
+                    chunk = part.get_payload(decode=True)
+                    for line in chunk.splitlines():
+                        if not(line.startswith(">")):
+                            train_txt += line + "\n"
+                            #print line
+
+user = {}
+
+print "tagging user"
+train_txt = nltk.word_tokenize(train_txt)
+train_txt = nltk.pos_tag(train_txt)
+
+print "building pos dict"
+for tword in train_txt:
+    if not(tword[1] in user):
+        user[tword[1]] = [tword[0]]
+    else:
+        user[tword[1]].append(tword[0])
+
+
+print "writing story"
+out = ""
+for word in poe:
+    pos = word[1]
+    newword = ""
+    try:
+        newword = random.choice(user[word[1]]) + " "
+        for ch in esc_chars:
+            newword = newword.replace(ch,"\\"+ch)
+    except:
+        newword = word[0] + " "
+        
+    out += newword 
+        
+outfile.write(out)
+outfile.close()
 
 train_txt = ""
 
-username = "brendan"
+username = "julian"
 
 #train from email
 for archive in glob.glob("w7*"):
+#paranoid text generator
+import random
+import nltk
+import glob
+import mailbox
+
+print "tagging Poe"
+poe = open("purloined.txt").read()
+poe = nltk.word_tokenize(poe)
+poe = nltk.pos_tag(poe)
+
+esc_chars = ["\\", '#','$','%','^','&','_','{',"}","~"]
+
+train_txt = ""
+
+username = "servando"
+
+outfile = open(username + ".tex", "w")
+
+
+#train from email
+for archive in glob.glob("w7*"):
+    mbox = mailbox.mbox(archive)
+    for msg in mbox:
+        if username in msg["from"]:
+            for part in msg.walk():
+                if part.get_content_type() == "text/plain":
+                    chunk = part.get_payload(decode=True)
+                    for line in chunk.splitlines():
+                        if not(line.startswith(">")):
+                            train_txt += line + "\n"
+                            #print line
+
+user = {}
+
+in_message = ""
+
+train_txt = train_txt.splitlines()
+
+while len(in_message) < 20:
+  in_message += random.choice(train_txt).strip() + " "
+
+print in_message
+
+table = {}
+for word in poe:
+    pos = word[1]
+    token = word[0]
+    for ch in esc_chars:
+        token = token.replace(ch,"\\"+ch)
+        
+    if table.has_key(pos):
+        table[pos].append(token)
+    else:
+        table[pos] = [token]
+
+for pos in table.keys():
+    print "pos: " + pos + " - " + str(len(table[pos]))
+        
+outfile.write(out)
+outfile.close()

File template.tex

 \documentclass[11pt,notitlepage,a4paper]{memoir}
 \begin{document}
-\include{danja}
+\include{gordo}
 
 \end{document}