Commits

Miki Tebeka committed d1b6dbe

stop words

Comments (0)

Files changed (1)

 	"os"
 	"regexp"
 	"sort"
+	"strings"
 )
 
 type Token struct {
-	tok string
+	tok   string
 	count int
 }
 
 
 	for _, question := range reply.Questions {
 		for _, tok := range re.FindAllString(question.Title, -1) {
+			tok = strings.ToLower(tok)
+			if stopWords[tok] {
+				continue
+			}
 			historam[tok] = historam[tok] + 1
 		}
 	}
 	sort.Sort(toks)
 	i = 0
 	for _, tok := range toks {
-		fmt.Printf("%v\n", tok)
-		i ++
+		fmt.Printf("%s: %d\n", tok.tok, tok.count)
+		i++
 		if i > 10 {
 			break
 		}
 
 }
 
+var stopWords = map[string]bool{
+	// English
+	"a": true, "about": true, "above": true,
+	"after": true, "again": true, "against": true, "all": true, "am": true, "an": true,
+	"and": true, "any": true, "are": true, "aren't": true, "as": true, "at": true,
+	"be": true, "because": true, "been": true, "before": true, "being": true,
+	"below": true, "between": true, "both": true, "but": true, "by": true, "can't": true,
+	"cannot": true, "could": true, "couldn't": true, "did": true, "didn't": true,
+	"do": true, "does": true, "doesn't": true, "doing": true, "don't": true,
+	"down": true, "during": true, "each": true, "few": true, "for": true, "from": true,
+	"further": true, "had": true, "hadn't": true, "has": true, "hasn't": true,
+	"have": true, "haven't": true, "having": true, "he": true, "he'd": true,
+	"he'll": true, "he's": true, "her": true, "here": true, "here's": true, "hers": true,
+	"herself": true, "him": true, "himself": true, "his": true, "how": true,
+	"how's": true, "i": true, "i'd": true, "i'll": true, "i'm": true, "i've": true,
+	"if": true, "in": true, "into": true, "is": true, "isn't": true, "it": true,
+	"it's": true, "its": true, "itself": true, "let's": true, "me": true, "more": true,
+	"most": true, "mustn't": true, "my": true, "myself": true, "no": true, "nor": true,
+	"not": true, "of": true, "off": true, "on": true, "once": true, "only": true,
+	"or": true, "other": true, "ought": true, "our": true, "ours": true,
+	"ourselves": true, "out": true, "over": true, "own": true, "same": true,
+	"shan't": true, "she": true, "she'd": true, "she'll": true, "she's": true,
+	"should": true, "shouldn't": true, "so": true, "some": true, "such": true,
+	"than": true, "that": true, "that's": true, "the": true, "their": true,
+	"theirs": true, "them": true, "themselves": true, "then": true, "there": true,
+	"there's": true, "these": true, "they": true, "they'd": true, "they'll": true,
+	"they're": true, "they've": true, "this": true, "those": true, "through": true,
+	"to": true, "too": true, "under": true, "until": true, "up": true, "very": true,
+	"was": true, "wasn't": true, "we": true, "we'd": true, "we'll": true, "we're": true,
+	"we've": true, "were": true, "weren't": true, "what": true, "what's": true,
+	"when": true, "when's": true, "where": true, "where's": true, "which": true,
+	"while": true, "who": true, "who's": true, "whom": true, "why": true, "why's": true,
+	"with": true, "won't": true, "would": true, "wouldn't": true, "you": true,
+	"you'd": true, "you'll": true, "you're": true, "you've": true, "your": true,
+	"yours": true, "yourself": true, "yourselves": true,
+
+	// Go specific
+	"go": true, "golang": true}