Commits

Jason Baldridge  committed 74bb0f0

Moved scala.WordCount to example.WordCountScala, and ensured dependencies are included in CLASSPATH for fogbow script.

  • Participants
  • Parent commits c1e5858

Comments (0)

Files changed (5)

 
 To do Scala word count, run:
 
-$ fogbow run fogbow.scala.WordCount pg1661.txt wc_out_holmes_scala
+$ fogbow run fogbow.example.WordCountScala pg1661.txt wc_out_holmes_scala
 
 
 Using the Fogbow assembly jar and calling it with Hadoop
 
 To do Scala word count, run:
 
-$ hadoop jar $FOGBOW_DIR/target/fogbow-assembly.jar fogbow.scala.WordCount pg1661.txt wc_out_holmes_scala_assembly
+$ hadoop jar $FOGBOW_DIR/target/fogbow-assembly.jar fogbow.example.WordCountScala pg1661.txt wc_out_holmes_scala_assembly
 
 
 Now what?
 #!/bin/bash
 
-JARS=`echo $FOGBOW_DIR/lib/*.jar $FOGBOW_DIR/target/*.jar $HADOOP_DIR/*.jar | tr ' ' ':'`
+JARS=`echo $FOGBOW_DIR/lib/*.jar $FOGBOW_DIR/target/*.jar $HADOOP_DIR/*.jar $HADOOP_DIR/lib/*.jar | tr ' ' ':'`
+JARS_MANAGED=`find $FOGBOW_DIR/lib_managed -name '*.jar' -print | tr '\n' ':'`
+
 SCALA_LIB="$FOGBOW_DIR/project/boot/scala-2.9.0/lib/scala-library.jar"
 
-CP=$FOGBOW_DIR/target/classes:$JARS:$SCALA_LIB:$CLASSPATH
+CP=$FOGBOW_DIR/target/classes:$JARS:$JARS_MANAGED:$SCALA_LIB:$CLASSPATH
 
 if [ -z $JAVA_MEM_FLAG ] 
 then
 
 crossPaths := false
 
+retrieveManaged := true
+
 libraryDependencies += "org.apache.hadoop" % "hadoop-core" % "0.20.2"
 
 seq(sbtassembly.Plugin.assemblySettings: _*)

File src/main/scala/fogbow/example/WordCount.scala

+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package fogbow.example;
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.io.{IntWritable,Text}
+import org.apache.hadoop.mapreduce.{Job,Mapper,Reducer}
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
+import org.apache.hadoop.util.GenericOptionsParser
+import scala.collection.JavaConversions._
+
+class TokenizerMapper extends Mapper[Object, Text, Text, IntWritable] {
+  
+  val one = new IntWritable(1)
+  var word = new Text
+
+  override
+  def map (key: Object, value: Text, context: Mapper[Object,Text,Text,IntWritable]#Context) {
+    value.toString.split("\\s").foreach { token => word.set(token); context.write(word, one) }
+  }
+}
+
+class IntSumReducer extends Reducer[Text,IntWritable,Text,IntWritable] {
+  
+  val result = new IntWritable()
+  
+  override
+  def reduce (key: Text, values: java.lang.Iterable[IntWritable], 
+              context: Reducer[Text,IntWritable,Text,IntWritable]#Context) {
+    result set(values.foldLeft(0) { _ + _.get })
+    context write(key, result)
+  }
+}
+
+object WordCountScala {
+
+  def main (args: Array[String]) {
+    val conf = new Configuration()
+    val job = new Job(conf, "word count")
+    job.setJarByClass(classOf[TokenizerMapper])
+    job.setMapperClass(classOf[TokenizerMapper])
+    job.setCombinerClass(classOf[IntSumReducer])
+    job.setReducerClass(classOf[IntSumReducer])
+    job.setOutputKeyClass(classOf[Text])
+    job.setOutputValueClass(classOf[IntWritable])
+    FileInputFormat.addInputPath(job, new Path(args(0)))
+    FileOutputFormat.setOutputPath(job, new Path(args(1)))
+    System.exit(if(job.waitForCompletion(true)) 0 else 1)
+  }
+
+}

File src/main/scala/fogbow/scala/WordCount.scala

-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package fogbow.scala;
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.io.{IntWritable,Text}
-import org.apache.hadoop.mapreduce.{Job,Mapper,Reducer}
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
-import org.apache.hadoop.util.GenericOptionsParser
-import scala.collection.JavaConversions._
-
-class TokenizerMapper extends Mapper[Object, Text, Text, IntWritable] {
-  
-  val one = new IntWritable(1)
-  var word = new Text
-
-  override
-  def map (key: Object, value: Text, context: Mapper[Object,Text,Text,IntWritable]#Context) {
-    value.toString.split("\\s").foreach { token => word.set(token); context.write(word, one) }
-  }
-}
-
-class IntSumReducer extends Reducer[Text,IntWritable,Text,IntWritable] {
-  
-  val result = new IntWritable()
-  
-  override
-  def reduce (key: Text, values: java.lang.Iterable[IntWritable], 
-              context: Reducer[Text,IntWritable,Text,IntWritable]#Context) {
-    result set(values.foldLeft(0) { _ + _.get })
-    context write(key, result)
-  }
-}
-
-object WordCount {
-
-  def main (args: Array[String]) {
-    val conf = new Configuration()
-    val job = new Job(conf, "word count")
-    job.setJarByClass(classOf[TokenizerMapper])
-    job.setMapperClass(classOf[TokenizerMapper])
-    job.setCombinerClass(classOf[IntSumReducer])
-    job.setReducerClass(classOf[IntSumReducer])
-    job.setOutputKeyClass(classOf[Text])
-    job.setOutputValueClass(classOf[IntWritable])
-    FileInputFormat.addInputPath(job, new Path(args(0)))
-    FileOutputFormat.setOutputPath(job, new Path(args(1)))
-    System.exit(if(job.waitForCompletion(true)) 0 else 1)
-  }
-
-}