snowball / example / stem.go

/* Example on using Snowball stemmer 

This program will read a file, then print "word -> stem(word)" for every word in file
*/
package main

import (
	"bytes"
	"flag"
	"fmt"
	"io/ioutil"
	"os"
	"regexp"

	"bitbucket.org/tebeka/snowball"
)

func main() {
	flag.Usage = func() {
		fmt.Fprintf(os.Stderr, "usage: %s FILENAME\n", os.Args[0])
		flag.PrintDefaults()
	}
	lang := flag.String("lang", "english", "stemmer language")
	flag.Parse()

	if flag.NArg() != 1 {
		fmt.Fprintf(os.Stderr, "error: wrong number of arguments\n")
		os.Exit(1)
	}

	stmr, err := snowball.New(*lang)
	if err != nil {
		fmt.Fprintf(os.Stderr, "error: %s\n", err)
		os.Exit(1)
	}

	data, err := ioutil.ReadFile(flag.Arg(0))
	if err != nil {
		fmt.Fprintf(os.Stderr, "error: can't open %s - %s\n", flag.Arg(0), err)
		os.Exit(1)
	}

	re := regexp.MustCompile("[a-zA-Z]+")

	for _, field := range re.FindAll(data, -1) {
		word := string(bytes.ToLower(field))
		fmt.Printf("%s -> %s\n", word, stmr.Stem(word))
	}
}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.