Commits

rsvp committed c4f76cc Draft

Append to banalset in keywords.

Comments (0)

Files changed (1)

 #!/usr/bin/env bash
-#              bash 4.1.5(1)     Linux Ubuntu 10.04           Date : 2012-03-25
+#              bash 4.1.5(1)     Linux Ubuntu 10.04           Date : 2012-07-11
 #
 # _______________|  keywords : extract keywords from text.
 #
 
 #  CHANGE LOG  LATEST version available:   https://bitbucket.org/rsvp/gists/src
 #
+#  2012-07-11  Add to banalset.
 #  2012-03-25  Commit keywords and colline to gists repository.
 #  2012-03-13  Add digest and hash function for top keywords.
 #                 Accept pdf file as argument (set pdfconverter variable).
 { python <<EOHereDoc
 
 banalset = frozenset( [
+"@_@",
+"__@__",
 "000",
 "00",
 "0",
 "anything",
 "apparently",
 "appear",
+"application",
+"applications",
 "approve",
 "are",
 "aren't",
 "arms",
 "around",
 "arrange",
+"array",
 "arrive",
 "arrived",
 "article",
 "average",
 "avoid",
 "away",
+"b",
 "baby",
 "back",
 "bad",
 "berkeley's",
 "beside",
 "best",
+"beta",
 "better",
 "between",
 "beyond",
 "board",
 "boat",
 "body",
+"bold",
+"boldsymbol",
 "bone",
 "bones",
 "book",
 "books",
+"booksources",
 "born",
 "both",
 "bottom",
 "cases",
 "cat",
 "catch",
+"category",
 "cattle",
 "caught",
 "cause",
+"cdot",
 "cell",
 "cells",
 "cent",
 "chord",
 "chose",
 "circle",
+"cite",
 "city",
 "claim",
 "class",
 "cry",
 "current",
 "cut",
+"d",
+"d0",
+"d1",
 "dad",
 "daily",
 "dance",
 "difference",
 "different",
 "difficult",
+"digital",
 "dir",
 "direct",
 "direction",
 "doesn",
 "doesn't",
 "dog",
+"doi",
 "doing",
 "dollar",
 "dollars",
 "done",
 "don't",
 "door",
+"dot",
+"dots",
 "double",
 "doubt",
 "down",
 "duck",
 "due",
 "during",
+"dx",
+"e",
+"e0",
 "each",
 "ear",
 "earlier",
 "echo",
 "edge",
 "edit",
+"edu",
 "education",
 "effect",
+"effects",
 "effective",
 "effort",
 "efforts",
 "email",
 "emails",
 "employ",
+"en",
 "end",
 "enemy",
 "energy",
 "extent",
 "eye",
 "eyes",
+"f",
 "face",
 "fact",
 "factories",
 "functions",
 "further",
 "future",
+"g",
 "game",
 "garden",
 "gas",
 "gone",
 "good",
 "got",
+"gov",
 "govern",
 "grand",
 "grass",
 "house",
 "how",
 "however",
+"htm",
 "html",
 "http",
 "https",
 "ice",
 "idea",
 "ideas",
+"identifier",
 "if",
 "i'll",
 "i'm",
 "increase",
 "increased",
 "indeed",
+"index",
 "indicate",
 "industrial",
 "industry",
 "knows",
 "knowledge",
 "known",
+"l",
 "lack",
 "lady",
 "lake",
 "master",
 "match",
 "material",
+"mathbf",
 "matter",
 "may",
 "maybe",
 "nations",
 "natural",
 "nature",
+"ncbi",
 "near",
 "nearly",
 "necessary",
 "next",
 "nice",
 "night",
+"nih",
 "nine",
+"nlm",
 "no",
 "noise",
 "none",
 "observation",
 "observations",
 "observe",
+"observed",
 "obtained",
 "obviously",
 "occur",
 "operate",
 "operating",
 "operation",
+"operation",
+"operator",
+"operatorname",
 "opinion",
 "opportunity",
 "opposite",
 "pair",
 "paper",
 "paragraph",
+"parameter",
+"parameters",
 "parent",
 "park",
 "part",
 "person",
 "personal",
 "persons",
+"phi",
 "phrase",
+"php",
 "pick",
 "picked",
 "picture",
 "play",
 "please",
 "plural",
+"pmid",
 "point",
 "points",
 "pole",
 "price",
 "print",
 "printed",
+"prior",
 "private",
 "probable",
 "probably",
 "provide",
 "provided",
 "public",
+"pubmed",
 "pull",
 "pulled",
 "purpose",
 "quiet",
 "quite",
 "quotient",
+"r",
 "race",
 "radio",
 "rail",
 "science",
 "scientists",
 "score",
+"script",
+"scriptstyle",
 "sea",
 "search",
 "season",
 "sent",
 "sentence",
 "separate",
+"sequence",
 "serious",
 "serve",
 "served",
 "signal",
 "silent",
 "silver",
+"sim",
 "similar",
 "simple",
 "simply",
 "teeth",
 "tell",
 "temperature",
+"template",
 "ten",
 "term",
 "terms",
 "that",
 "that's",
 "the",
+"theta",
 "their",
 "them",
 "themselves",
 "times",
 "tiny",
 "tire",
+"title",
 "to",
 "today",
 "together",
 "vote",
 "voted",
 "vowel",
+"w",
 "wait",
 "waiting",
 "walk",
 "wide",
 "wife",
 "wild",
+"wiki",
+"wikipedia",
 "will",
 "win",
 "wind",