Commits

Brendan Howell  committed c575e69

added bengt - doing some less crazy POS convolution

  • Participants
  • Parent commits 0ed3231

Comments (0)

Files changed (1)

+#paranoid text generator
+import random
+import nltk
+import glob
+import mailbox
+
+print "tagging Poe"
+poe = open("purloined.txt").read()
+poe = nltk.word_tokenize(poe)
+poe = nltk.pos_tag(poe)
+
+esc_chars = ['#','$','%','^','&','_','{',"}","~","\"]
+
+train_txt = ""
+
+username = "danja"
+
+#train from email
+for archive in glob.glob("w7*"):
+    mbox = mailbox.mbox(archive)
+    for msg in mbox:
+        if username in msg["from"]:
+            for part in msg.walk():
+                if part.get_content_type() == "text/plain":
+                    chunk = part.get_payload(decode=True)
+                    for line in chunk.splitlines():
+                        if not(line.startswith(">")):
+                            train_txt += line + "\n"
+                            #print line
+
+user = {}
+
+print "tagging user"
+train_txt = nltk.word_tokenize(train_txt)
+train_txt = nltk.pos_tag(train_txt)
+
+print "building pos dict"
+for tword in train_txt:
+    if not(tword[1] in user):
+        user[tword[1]] = [tword[0]]
+    else:
+        user[tword[1]].append(tword[0])
+
+
+print "writing story"
+out = ""
+for word in poe:
+    pos = word[1]
+    newword = ""
+    try:
+        newword = random.choice(user[word[1]]) + " "
+        for ch in esc_chars:
+            newword = newword.replace(ch,"\"+ch)
+    except:
+        newword = word[0]
+        
+    out += newword + " "
+        
+print out