Commits

Alexandru Moșoi committed 8b6d152

Better handling of c.

Comments (0)

Files changed (1)

 
 import re
 import sys
-import random
+
+from random import randrange, choice, sample
 
 
 def insert(count, word):
-  l = sorted([0] + random.sample(range(1, len(word)), count) + [len(word)])
+  l = sorted([0] + sample(range(1, len(word)), count) + [len(word)])
   result = ''
   for i in xrange(count + 1):
     result += '-' + word[l[i]:l[i+1]]
   original = m.group(0)
   word = re.sub(r"-", r"", original)
   count = original.count("-")
-  return insert(random.randrange(count + 1), word)
+  return insert(randrange(count + 1), word)
+
+
+def pick(arg):
+  def f(m):
+    return choice(arg)
+  return f
 
 
 def analfabetize(text):
-  text = re.sub(ur"(\w+)((-\w+)+)", cratima, text,
-                flags=re.UNICODE | re.IGNORECASE) 
+  text = re.sub(ur"(\w+)((-\w+)+)", cratima,
+                text, flags=re.UNICODE | re.IGNORECASE) 
 
   text = re.sub(ur"\b((al)|(care)|(mai)|(neam)|(sa)|(sar)|(sau)|(vau)|(va)|(vati))\b",
-                lambda m: insert(random.randrange(2), m.group(0)), text,
-                flags=re.UNICODE | re.IGNORECASE) 
+                lambda m: insert(randrange(2), m.group(0)),
+                text, flags=re.UNICODE | re.IGNORECASE) 
 
-  text = re.sub(ur"î|â", lambda x: random.choice(u"îâ"), text)
-  text = re.sub(ur"Î|Â", lambda x: random.choice(u"ÎÂ"), text)
-  text = re.sub(ur"\Bl\b", lambda x: random.choice((u"l", u"-l")), text,
-                flags=re.UNICODE)
-  text = re.sub(ur"\Bii\b", lambda x: random.choice((u"i", u"ii")), text,
-                flags=re.UNICODE)
-  text = re.sub(ur"(?=\B[^i])i\b", lambda x: random.choice((u"i", u"ii")), text,
-                flags=re.UNICODE)
+  text = re.sub(ur"nu ?mai", pick(("numai", "nu mai")), text)
+  text = re.sub(ur"Nu ?mai", pick(("Numai", "Nu mai")), text)
+
+  text = re.sub(ur"c(a|â|î|h)", r"k", text)
+  text = re.sub(ur"C(a|â|î|h)", r"K", text)
+  text = re.sub(ur"c(?=[^ei])", r"k", text)
+  text = re.sub(ur"C(?=[^ei])", r"K", text)
+
+  text = re.sub(ur"î|â", pick(u"îâ"), text)
+  text = re.sub(ur"Î|Â", pick(u"ÎÂ"), text)
+  text = re.sub(ur"\Bl\b", pick((u"l", u"-l")),
+                text, flags=re.UNICODE)
+  text = re.sub(ur"\Bii\b", pick((u"i", u"ii")),
+                text, flags=re.UNICODE)
+  text = re.sub(ur"(?=\B[^i])i\b", pick((u"i", u"ii")),
+                text, flags=re.UNICODE)
 
   text = re.sub(ur"ă|â", r"a", text)
   text = re.sub(ur"Ă|Â", r"A", text)
   text = re.sub(ur"\bTu\b", "U", text)
 
   text = re.sub(ur"([cdlmnprstvCDLMNPRSTV]e)+\b",
-                lambda m: m.group(0)[0::2], text,
-                flags=re.UNICODE)
-  text = re.sub(ur"ca", r"k", text)
-  text = re.sub(ur"Ca", r"K", text)
-  text = re.sub(ur"c(?=[^ei])", r"k", text)
-  text = re.sub(ur"C(?=[^ei])", r"K", text)
+                lambda m: m.group(0)[0::2],
+                text, flags=re.UNICODE)
 
   print text