Commits

Chris Grubbs committed f446239

naive first stab at differentiating ingredients from instructions in sk output

  • Participants
  • Parent commits f545106

Comments (0)

Files changed (2)

File phlombay/analyzer/analyzer.py

+import json
+import nltk
+import sys
+
+
+def ingredient_special_cases(sentence):
+    if '(optional' in set(sentence.lower().split()):
+        return True
+    return False
+
+
+if __name__ == '__main__':
+    try:
+        recipes = json.loads(open('smittenkitchen.json', 'r').read())
+    except IOError:
+        sys.exit('Recipe file not found.')
+    
+    for recipe in recipes:
+        if len(recipe['content'][0]):
+            for sentence in recipe['content'][0].split('\n'):
+                text = nltk.word_tokenize(sentence)
+                tagged_sentence = nltk.pos_tag(text)
+                if tagged_sentence[0][1] in {'CD', 'LS'} or ingredient_special_cases(sentence):
+                    print u'This is an ingredient: {}'.format(sentence)
+                else:
+                    print u'This is not an ingredient: {}'.format(sentence)

File requirements.txt

-Django==1.4
+Django==1.4
+nltk
+numpy
+Scrapy==0.14.4