Commits

Michael Granger committed a63acb0

- Fixed up subversion constants
- Merged contributions from Francis Hwang <sera@fhwang.net>
(#camel_case_to_english, #english_to_camel_case, #proper_noun). Closes #2
- Set svn:keyword

Comments (0)

Files changed (10)

lib/linguistics.rb

 # 
 # == Version
 #
-#  $Id: linguistics.rb,v 1.6 2003/09/11 04:55:11 deveiant Exp $
+#  $Id$
 # 
 
 require 'linguistics/iso639'
 	### Class constants
 
 	# Subversion revision
-	SVNRev = /([\d\.]+)/.match( %q{$Rev: 1.6 $} )[1]
+	SVNRev = %q$Rev$
 
 	# Subversion ID
-	SVNid = %q$Id: linguistics.rb,v 1.6 2003/09/11 04:55:11 deveiant Exp $
+	SVNid = %q$Id$
 
 	# Language module implementors should do something like:
 	#   Linguistics::DefaultLanguages.push( :ja ) # or whatever

lib/linguistics/en.rb

 # 
 # == Copyright
 #
-# This module is copyright (c) 2003 The FaerieMUD Consortium. All rights
+# This module is copyright (c) 2003-2005 The FaerieMUD Consortium. All rights
 # reserved.
 # 
 # This module is free software. You may use, modify, and/or redistribute this
 #  $Id: en.rb,v 1.8 2003/09/14 10:47:12 deveiant Exp $
 # 
 
-require 'hashslice'
-
-
-module Linguistics
 
 ### This module contains English-language linguistics functions accessible from
 ### the Linguistics module, or as a standalone function library.
-module EN
+module Linguistics::EN
 
 	begin
 		require 'crosscase'
 	require 'linguistics/en/wordnet'
 	require 'linguistics/en/linkparser'
 
-	# CVS version tag
-	Version = /([\d\.]+)/.match( %q{$Revision: 1.8 $} )[1]
+	# Subversion revision
+	SVNRev = %q$Rev$
 
-	# CVS revision tag
-	Rcsid = %q$Id: en.rb,v 1.8 2003/09/14 10:47:12 deveiant Exp $
+	# Subversion revision tag
+	SVNId = %q$Id: en.rb,v 1.8 2003/09/14 10:47:12 deveiant Exp $
 
 	# Add 'english' to the list of default languages
 	Linguistics::DefaultLanguages.push( :en )
 	###	C O N S T A N T S
 	#################################################################
 
+	# :stopdoc:
+
 	#
 	# Plurals
 	#
 	}
 
 	# Ordinal word parts
-	Ordinals = {}
-	Ordinals[ *(%w(ty    one   two    three five  eight  nine  twelve )) ] =
-		      %w[tieth first second third fifth eighth ninth twelfth]
+	Ordinals = {
+		'ty' => 'tieth',
+		'one' => 'first',
+		'two' => 'second',
+		'three' => 'third',
+		'five' => 'fifth',
+		'eight' => 'eighth',
+		'nine' => 'ninth',
+		'twelve' => 'twelfth',
+	}
 	OrdinalSuffixes = Ordinals.keys.join("|") + "|"
 	Ordinals[""] = 'th'
 
 	}
 
 
+	#
+	# Title case
+	#
+
+	# "In titles, capitalize the first word, the last word, and all words in
+	# between except articles (a, an, and the), prepositions under five letters
+	# (in, of, to), and coordinating conjunctions (and, but). These rules apply
+	# to titles of long, short, and partial works as well as your own papers"
+	# (Anson, Schwegler, and Muth. The Longman Writer's Companion 240).
+	
+	# Build the list of exceptions to title-capitalization
+	Articles = %w[a and the]
+	ShortPrepositions = ["amid", "at", "but", "by", "down", "from", "in",
+		"into", "like", "near", "of", "off", "on", "onto", "out", "over",
+		"past", "save", "with", "till", "to", "unto", "up", "upon", "with"]
+	CoordConjunctions = %w[and but as]
+	TitleCaseExceptions = Articles | ShortPrepositions | CoordConjunctions
+
+
+	# :startdoc:
 
 	#################################################################
 	###	" B A C K E N D "   F U N C T I O N S
 	#################################################################
 
+
 	###############
 	module_function
 	###############
 	###	P U B L I C   F U N C T I O N S
 	#################################################################
 
-	###############
-	module_function
-	###############
-
 	### Return the name of the language this module is for.
 	def language
 		"English"
 				strip
 		end
 	end
+	alias_method :NUMWORDS, :numwords
 
 
 	### Transform the given +number+ into an ordinal word. The +number+ object
 		return phrases.join( sep )
 	end
 
-end # module EN
-end # module Linguistics
+
+	### Turns a camel-case +string+ ("camelCaseToEnglish") to plain English
+	### ("camel case to english"). Each word is decapitalized.
+	def camel_case_to_english( string )
+		string.to_s.gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
+	end
+
+
+	### Turns an English language +string+ into a CamelCase word.
+	def english_to_camel_case( string )
+		string.to_s.gsub( /\s+([a-z])/ ) { $1.upcase }
+	end
+
+
+	### This method doesn't work quite right yet. It does okay for simple cases,
+	### but it misses more complex ones, e.g. 'as' used as a coordinating
+	### conjunction in "A Portrait of the Artist as a Young Man". Perhaps after
+	### there's a working (non-leaking) LinkParser for Ruby, this can be fixed
+	### up. Until then it'll just be undocumented.
+
+	### Returns the given +string+ as a title-cased phrase.
+	def titlecase( string ) # :nodoc:
+
+		# Split on word-boundaries
+		words = string.split( /\b/ )
+ 		
+		# Always capitalize the first and last words
+		words.first.capitalize!
+		words.last.capitalize!
+
+		# Now scan the rest of the tokens, skipping non-words and capitalization
+		# exceptions.
+		words.each_with_index do |word, i|
+
+			# Non-words
+			next unless /^\w+$/.match( word )
+
+			# Skip exception-words
+			next if TitleCaseExceptions.include?( word )
+
+			# Skip second parts of contractions
+			next if words[i - 1] == "'" && /\w/.match( words[i - 2] )
+
+			# Have to do it this way instead of capitalize! because that method
+			# also downcases all other letters.
+			word.gsub!( /^(\w)(.*)/ ) { $1.upcase + $2 }
+		end
+
+		return words.join
+	end
+
+
+	### Returns the proper noun form of a string by capitalizing most of the
+	### words.
+	###
+	### Examples:
+	###   English.proper_noun("bosnia and herzegovina") ->
+	###     "Bosnia and Herzegovina"
+	###   English.proper_noun("macedonia, the former yugoslav republic of") ->
+	###     "Macedonia, the Former Yugoslav Republic of"
+	###   English.proper_noun("virgin islands, u.s.") ->
+	###     "Virgin Islands, U.S."
+	def proper_noun( string )
+		return string.split(/([ .]+)/).collect {|word|
+			next word unless /^[a-z]/.match( word ) &&
+				! (%w{and the of}.include?( word ))
+			word.capitalize
+		}.join
+	end
+
+end # module Linguistics::EN
+
 
 ### Add the #separate and #separate! methods to Array.
 class Array # :nodoc:

lib/linguistics/en/infinitive.rb

 # 
 # == Copyright
 #
-# Copyright (c) 2003 The FaerieMUD Consortium. All rights reserved.
+# Copyright (c) 2003-2005 The FaerieMUD Consortium. All rights reserved.
 # 
 # This module is free software. You may use, modify, and/or redistribute this
 # software under the terms of the Perl Artistic License. (See
 #  $Id: infinitive.rb,v 1.2 2003/09/14 10:35:32 deveiant Exp $
 # 
 
-module Linguistics
-module EN
+module Linguistics::EN
+
+	# :stopdoc:
 
 	# Irregular words => infinitive forms
 	IrregularInfinitives = {
 	}
 	InfSuffixRuleOrder = InfSuffixRules.keys.sort_by {|rule| InfSuffixRules[rule][:order]}
 
+	# :startdoc:
 
 	### The object class for the result returned from calling
 	### Linguistics::EN::infinitive.
 		return Infinitive::new( word1, word2, suffix, rule )
 	end
 
-end # module EN
-end # module Linguistics
+end # module EN::Linguistics
 

redist/crosscase.rb

 #
 # == Version
 #
-#  $Id: crosscase.rb,v 1.3 2003/07/31 13:25:42 deveiant Exp $
+#  $Id$
 # 
 
 
 module CrossCase
 
 	### Versioning constants
-	Version = /([\d\.]+)/.match( %q{$Revision: 1.3 $} )[1]
-	Rcsid = %q$Id: crosscase.rb,v 1.3 2003/07/31 13:25:42 deveiant Exp $
+	Version = /([\d\.]+)/.match( %q{$Revision$} )[1]
+	Rcsid = %q$Id$
 
 	### The inclusion callback -- uses the Ouroboros trick to extend including
 	### classes.

redist/hashslice.rb

-#!/usr/bin/ruby
-#
-# = Ruby-HashSlice
-# 
-# Adds slicing to Ruby Hashes
-# 
-# == Synopsis
-# 
-#   require "hashslice"
-# 
-#   hash = {
-#     "this" => "is",
-#     "a" => "test",
-#     "hash" => "for",
-#     "the" => "synopsis"
-#   }
-#   hash[ "this", "hash" ]
-#   #  ==>["is", "for"]
-# 
-#   hash[ "a", "hash" ] = "something", "else"
-# 
-# == Description
-# 
-# This module adds slicing to Ruby hashes, similar to Perl^s hash slices. If the
-# argument to (({Hash#[]})) is an (({Array})) object with one or more keys, the
-# return value will be an array of the corresponding values.
-# 
-# == Authors
-# 
-# * Michael Granger <ged@FaerieMUD.org>
-# 
-# == License
-#
-# Copyright (c) 2001, 2002 The FaerieMUD Consortium. All rights reserved.
-# 
-# This module is free software. You may use, modify, and/or redistribute this
-# software under the terms of the Perl Artistic License. (See
-# http://language.perl.com/misc/Artistic.html)
-# 
-
-class Hash
-
-	### Alias the regular methods out of the way so we can override 'em.
-	alias :__bracketBracket__ :[]
-	alias :__bracketBracketEq__ :[]=
-
-	### Add slicing to element reference operator
-	def []( *sliceKeys )
-		if sliceKeys.length == 1
-			return __bracketBracket__( sliceKeys[0] )
-		end
-		return sliceKeys.collect {|k| __bracketBracket__( k )}
-	end
-
-	### Add slicing to element assignment operator
-	def []=( *args )
-		if args.length <= 2
-			return __bracketBracketEq__( *args )
-		end
-		aVals = args.pop
-		aVals = [aVals] unless aVals.kind_of?( Array )
-		args.each_index {|i| __bracketBracketEq__( args[i], aVals[i] )}
-	end
-
-end
-
-

tests/en/lafcadio.tests.rb

+#!/usr/bin/ruby -w
+#
+# Unit test for additions donated by Francis Hwang, author of Lafcadio
+# $Id$
+#
+# Converted from ts_english.rb.
+# 
+
+unless defined? Linguistics::TestCase
+	testsdir = File::dirname( File::dirname(File::expand_path( __FILE__ )) )
+	$LOAD_PATH.unshift testsdir unless $LOAD_PATH.include?( testsdir )
+
+	require 'lingtestcase'
+end
+
+### This test case tests ...
+class LafcadioAdditionsTestCase < Linguistics::TestCase
+
+	Linguistics::use( :en )
+	include Linguistics::EN
+
+	CamelCaseStrings = [
+		["productCategory", 	"product category"],
+		["ProductCategory", 	"product category"],
+		["catalogOrder",		"catalog order"],
+		["product",				"product"],
+	]
+
+	ProperNouns = {
+		"albania" => "Albania",
+		"bosnia and herzegovina" => "Bosnia and Herzegovina",
+		"faroe islands" => "Faroe Islands",
+		"macedonia, the former yugoslav republic of" =>
+			"Macedonia, the Former Yugoslav Republic of",
+		"virgin islands, u.s." => "Virgin Islands, U.S.",
+	}
+
+	
+
+	#################################################################
+	###	T E S T S
+	#################################################################
+
+	def test_camelCaseToEnglish
+		printTestHeader "Lafcadio Additions: CamelCase to English"
+		res = nil
+
+		CamelCaseStrings.each do |src, dst|
+			assert_nothing_raised { res = src.en.camel_case_to_english }
+			assert_equal dst, res
+		end
+	end
+
+	### This already worked before the additions, but might as well test 'em
+	### some more.
+	def test_plural
+		assert_equal "product categories", "product category".en.plural
+		assert_equal "products", "product".en.plural
+		assert_equal 'addresses', 'address'.en.plural
+		assert_equal 'taxes', 'tax'.en.plural
+	end
+
+
+	### String#proper_noun
+	def test_properNoun
+		printTestHeader "Lafcadio Additions: Proper Nouns"
+
+		ProperNouns.each do |key,expected|
+			input = key.dup # Get around hash keys being frozen
+			debugMsg "Trying %p, expect: %p" % [input, expected]
+			assert_equal expected, input.en.proper_noun
+		end
+	end
+
+end
+

tests/en/linkparser.tests.rb

 # Unit test for English link grammar
 # $Id: linkparser.tests.rb,v 1.2 2003/09/14 10:30:38 deveiant Exp $
 #
-# Copyright (c) 2003 The FaerieMUD Consortium.
+# Copyright (c) 2003-2005 The FaerieMUD Consortium.
 #
 
-if !defined?( Linguistics ) || !defined?( Linguistics::TestCase )
-	if File::exists?( "lib/linguistics.rb" )
-		require 'tests/lingtestcase'
-	else
-		require 'lingtestcase'
-	end
+unless defined? Linguistics::TestCase
+	testsdir = File::dirname( File::dirname(File::expand_path( __FILE__ )) )
+	$LOAD_PATH.unshift testsdir unless $LOAD_PATH.include?( testsdir )
+
+	require 'lingtestcase'
 end
 
 ### This test case tests the English language link grammar extension of

tests/en/titlecase.tests.rb

+#!/usr/bin/ruby -w
+#
+# Unit test for English language module's title case method 
+# $Id$
+#
+# Copyright (c) 2005 The FaerieMUD Consortium.
+# 
+
+unless defined? Linguistics::TestCase
+	testsdir = File::dirname( File::dirname(File::expand_path( __FILE__ )) )
+	$LOAD_PATH.unshift testsdir unless $LOAD_PATH.include?( testsdir )
+
+	require 'lingtestcase'
+end
+
+### This test case tests the #titlecase method of the english-language extension
+### for the Linguistics library.
+class TitleCaseTestCase < Linguistics::TestCase
+
+	Linguistics::use( :en )
+
+	Titles = File::open(__FILE__).read.split("__END__").last.split("\n")
+
+	def test_nothing
+		debugMsg "Not tested, as the functionality it tests hasn't been released yet."
+	end
+
+	def dont_test_titles
+		printTestHeader "TitleCase: Titles"
+		rval = nil
+
+		debugMsg "Titles = %p" % [Titles]
+
+		Titles.each do |title|
+			next if !/\w/.match( title )
+
+			assert_nothing_raised do
+				rval = title.downcase.en.titlecase
+			end
+
+			assert_instance_of String, rval
+			assert_equal title, rval
+		end
+	end
+
+end
+
+
+__END__
+A Certain Kind of Longing
+A Connecticut Yankee in King Arthur's Court
+A Portrait of the Artist as a Young Man
+Alice in Wonderland
+Catcher in the Rye
+Crime and Punishment
+Death of a Salesman
+Dr. Jekyll and Mr. Hyde
+Gone With the Wind
+Gone but Not Forgotten
+Gulliver's Travels
+Last of the Mohicans
+Lord of the Flies
+Love in the Time of Cholera
+Maggie, A Girl of the Streets
+Notes from the Underground
+Of Mice and Men
+Pride and Prejudice
+The Adventures of Don Quixote
+The Good Earth
+The Heart of Darkness
+The Lord of the Rings
+The Old Man and the Sea
+Mrs. Frisby and the Rats of N.I.M.H.

tests/lingtestcase.rb

 # 
 # == Rcsid
 # 
-# $Id: lingtestcase.rb,v 1.3 2003/09/11 05:00:56 deveiant Exp $
+# $Id$
 # 
 # == Authors
 # 

tests/use.tests.rb

 #!/usr/bin/ruby -w
 #
 # Unit test for the 'use' function of the Linguistics module.
-# $Id: use.tests.rb,v 1.4 2003/09/11 05:02:04 deveiant Exp $
+# $Id$
 #
 # Copyright (c) 2003 The FaerieMUD Consortium.
 #