Commits

Michael Granger committed 5e5e6e7

Adding support for Ruby-Stemmer

  • Participants
  • Parent commits 39f3367

Comments (0)

Files changed (4)

 linkparser -v1.1.3
 simplecov -v0.6.4
 wordnet -v1.0.0
+ruby-stemmer -v0.9.3
 	self.dependency 'hoe-deveiate', '~> 0.1', :development
 	self.dependency 'linkparser', '~> 1.1', :development
 	self.dependency 'wordnet', '~> 0.99', :development
+	self.dependency 'ruby-stemmer', '~> 0.9', :development
 
 	self.spec_extras[:licenses] = ["BSD"]
 	self.spec_extras[:rdoc_options] = ['-f', 'fivefish', '-t', 'Ruby Linguistics Toolkit']

lib/linguistics/en/stemmer.rb

+#!/usr/bin/env ruby
+
+require 'linguistics/en' unless defined?( Linguistics::EN )
+
+# Ruby-Stemmer support for the English-language Linguistics module. It
+# requires the Ruby-Stemmer gem to be installed; if it is not
+# installed, calling the functions defined by this file will raise
+# NotImplementedErrors.
+#
+#   # Test to be sure the Stemmer gem loaded okay.
+#   Linguistics::EN.has_stemmer?
+#   # => true
+#
+module Linguistics::EN::Stemmer
+
+	# Module instance variables -- copied over to the EN module when registered
+	@has_stemmer   = false
+	@stemmer_error = nil
+	@stemmer       = nil
+
+	# Load Ruby-Stemmer if possible, saving the error that occurs if anything goes wrong.
+	begin
+		require 'lingua/stemmer'
+		@has_stemmer = true
+	rescue LoadError => err
+		@stemmer_error = err
+	end
+
+
+	# Container for methods intended to extend the EN module as singleton methods.
+	module SingletonMethods
+
+		### Returns +true+ if Ruby-Stemmer was loaded okay
+		def has_stemmer? ; @has_stemmer; end
+
+		### If #has_stemmer? returns +false+, this can be called to fetch the
+		### exception which was raised when Ruby-Stemmer was loaded.
+		def stemmer_error ; @stemmer_error; end
+
+	end # module SingletonMethods
+	extend SingletonMethods
+
+
+	# Register this module to the list of modules to include
+	Linguistics::EN.register_extension( self )
+
+	#################################################################
+	###	M O D U L E   M E T H O D S
+	#################################################################
+
+	### The instance of the Lingua::Stemmer used for all Linguistics Stemmer
+	### functions.
+	def self::stemmer
+		raise self.stemmer_error unless self.has_stemmer?
+		@stemmer ||= Lingua::Stemmer.new
+	end
+
+
+	#################################################################
+	###	S T E M M E R   I N T E R F A C E
+	#################################################################
+
+	######
+	public
+	######
+
+
+	### Return the stem of the receiving word.
+	def stem
+		return Linguistics::EN::Stemmer.stemmer.stem( self.obj.to_s )
+	end
+
+end # module Linguistics::EN::Stemmer
+

spec/linguistics/en/stemmer_spec.rb.rb

+#!/usr/bin/env spec -cfs
+
+BEGIN {
+	require 'pathname'
+	basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent
+
+	libdir = basedir + "lib"
+
+	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
+	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
+}
+
+require 'rspec'
+require 'spec/lib/helpers'
+
+require 'linguistics'
+require 'linguistics/en/stemmer'
+
+
+describe Linguistics::EN::Stemmer do
+
+	before( :all ) do
+		setup_logging( :debug )
+		Linguistics.use( :en )
+	end
+
+	after( :all ) do
+		reset_logging()
+	end
+
+
+	it "adds EN::Stemmer to the list of English language modules" do
+		Linguistics::EN::MODULES.include?( Linguistics::EN::Stemmer )
+	end
+
+
+	describe "on a system that has the 'ruby-stemmer' library installed" do
+
+		before( :each ) do
+			pending "installation of the ruby-stemmer library" unless
+				Linguistics::EN.has_stemmer?
+		end
+
+		it "can fetch the stem of a word" do
+			"communication".en.stem.should == 'communic'
+		end
+
+	end
+
+
+	describe "on a system that doesn't have the 'ruby-stemmer' library" do
+
+		before( :all ) do
+			# If the system *does* have stemmer support, pretend it doesn't.
+			if Linguistics::EN.has_stemmer?
+				error = LoadError.new( "no such file to load -- lingua/stemmer" )
+				Linguistics::EN::Stemmer.instance_variable_set( :@has_stemmer, false )
+				Linguistics::EN::Stemmer.instance_variable_set( :@stemmer_error, error )
+			end
+		end
+
+		it "raises an NotImplementedError when you try to use stemmer functionality" do
+			expect {
+				"communication".en.stem
+			}.to raise_error( LoadError, %r{lingua/stemmer}i )
+		end
+
+	end
+
+end
+