Commits

Ross Light  committed 1ab0ec8

Fix behavior of tokenization on tags

  • Participants
  • Parent commits 4f615b9

Comments (0)

Files changed (2)

File catalog/search/search.go

 type textSearch struct {
 	c    catalog.Catalog
 	i    map[string][]indexEntry
+	tags map[string][]string
 	list []string
 }
 
 	ts := &textSearch{
 		c:    cat,
 		i:    make(map[string][]indexEntry),
+		tags: make(map[string][]string),
 		list: names,
 	}
 	for _, n := range names {
 }
 
 func (ts *textSearch) searchToken(q token, results resultMap) {
-	tsi := ts.i[sanitizeTerm(string(q))]
-	if len(tsi) == 0 {
+	qr := fold([]rune(string(q)))
+	for _, ent := range ts.i[string(qr)] {
+		results.Get(ent.shortName).Relevance += ent.kind.Weight()
+	}
+	stripped := stripNonToken(qr)
+	if len(stripped) == len(qr) {
 		return
 	}
-	for _, ent := range tsi {
+	for _, ent := range ts.i[string(stripped)] {
 		results.Get(ent.shortName).Relevance += ent.kind.Weight()
 	}
 	// XXX(light): should results be normalized?
 }
 
 func (ts *textSearch) searchTagAtom(q tagAtom, results resultMap) {
-	for _, ent := range ts.i[string(fold([]rune(string(q))))] {
-		if ent.kind == kindTag {
-			results.Put(&Result{ShortName: ent.shortName, Relevance: 1.0})
-		}
+	for _, sn := range ts.tags[string(fold([]rune(string(q))))] {
+		results.Put(&Result{ShortName: sn, Relevance: 1.0})
 	}
 }
 
-func sanitizeTerm(s string) string {
-	return string(stripNonToken(fold([]rune(s))))
-}
-
 func stripNonToken(r []rune) []rune {
 	// TODO(light): this function should have a better name
 
 	ts.index(sn, kindDescription, tokenize(fold([]rune(p.Description))))
 	for _, tag := range p.Tags {
 		t := fold([]rune(tag))
+		ts.indexTag(sn, t)
 		ts.index(sn, kindTag, [][]rune{t})
-		if parts := tokenize(t); len(parts) > 1 {
-			ts.index(sn, kindTagPart, parts)
-		}
-		t = stripNonToken(t)
-		ts.index(sn, kindTag, [][]rune{t})
+		ts.index(sn, kindTagPart, tokenize(t))
 	}
 	return nil
 }
 
+func (ts *textSearch) indexTag(sn string, tag []rune) {
+	stag := string(tag)
+	indexed := ts.tags[stag]
+	for _, indexedShortName := range indexed {
+		if indexedShortName == sn {
+			return
+		}
+	}
+	indexed = append(indexed, sn)
+	ts.tags[stag] = indexed
+}
+
 func (ts *textSearch) index(sn string, kind entryKind, words [][]rune) {
 	for _, w := range words {
 		if len(w) > 0 {

File catalog/search/search_test.go

 			[]string{"go"},
 		},
 		{
+			"Compiler",
+			mockCatalog{
+				"go": &catalog.Project{
+					ShortName:   "go",
+					Name:        "Go",
+					Tags:        catalog.TagSet{"compiler", "external", "lang-c", "lang-go", "language"},
+					Description: "Go is an open source programming environment that makes it easy to build simple, reliable, and efficient software.",
+				},
+			},
+			[]string{"go"},
+		},
+		{
 			"lang-c",
 			mockCatalog{
 				"go": &catalog.Project{
 			[]string{"go"},
 		},
 		{
-			// XXX(light): not sure whether this is desired, but documenting as a test case.
+			"langc",
+			mockCatalog{
+				"go": &catalog.Project{
+					ShortName:   "go",
+					Name:        "Go",
+					Tags:        catalog.TagSet{"compiler", "external", "lang-c", "lang-go", "language"},
+					Description: "Go is an open source programming environment that makes it easy to build simple, reliable, and efficient software.",
+				},
+			},
+			[]string{},
+		},
+		{
 			"tag:langc",
 			mockCatalog{
 				"go": &catalog.Project{
 					Description: "Go is an open source programming environment that makes it easy to build simple, reliable, and efficient software.",
 				},
 			},
-			[]string{"go"},
+			[]string{},
 		},
 		{
 			"programming",
 	b.SetBytes(n)
 }
 
-func TestSanitizeTerm(t *testing.T) {
+func TestStripNonToken(t *testing.T) {
 	tests := []struct {
 		term string
 		s    string
 	}{
 		{"", ""},
 		{"A", "A"},
-		{"a", "A"},
-		{"a.", "A"},
+		{"a", "a"},
+		{"a.", "a"},
 	}
 	for _, test := range tests {
-		s := sanitizeTerm(test.term)
+		s := string(stripNonToken([]rune(test.term)))
 		if s != test.s {
-			t.Errorf("sanitizeTerm(%q) = %q; want %q", test.term, s, test.s)
+			t.Errorf("stripNonToken(%q) = %q; want %q", test.term, s, test.s)
 		}
 	}
 }