Commits

Russell Power committed a8891c0

Update documentation.

Comments (0)

Files changed (2)

 Starting your cluster::
   
   # list each machine and the number of cores to use
-  cluster = mycloud.Cluster([('machine1', 4), ('machine2', 4)]
-                            fs_prefix='/path/to/store/results')
+  cluster = mycloud.Cluster([('machine1', 4),
+                             ('machine2', 4)],
+                             tmp_prefix='/path/to/store/results')
 
 Invoke a function over a list of inputs::
   
 Use the MapReduce interface to easily handle processing of larger datasets::
   
   from mycloud.resource import CSV  
-  input_desc = [CSV('my_input_%d.csv' % i for i in range(100)]
-  output_desc = [CSV('my_output_file_%d.csv' % i) for i in range(1)]
+  input_desc = [CSV('/path/to/my_input_%d.csv' % i for i in range(100)]
+  output_desc = [CSV('/path/to/my_output_file.csv']
    
-  def map_identity(k, v):
-    yield (k, int(v[0]))
+  def map_identity(k, v, output):
+    output(k, int(v[0]))
   
-  def reduce_sum(k, values):
-    yield (k, sum(values))
+  def reduce_sum(k, values, output):
+    output(k, sum(values))
   
   mr = mycloud.mapreduce.MapReduce(cluster,
                                    map_identity,
   # list each machine and the number of cores to use
   cluster = mycloud.Cluster([('machine1', 4),
                              ('machine2', 4)],
-                            fs_prefix='/path/to/store/results')
+                             tmp_prefix='/path/to/store/results')
 
 Invoke a function over a list of inputs::
   
 Use the MapReduce interface to easily handle processing of larger datasets::
   
   from mycloud.resource import CSV  
-  input_desc = [CSV('my_input_%d.csv' % i for i in range(100)]
-  output_desc = [CSV('my_output_file.csv']
+  input_desc = [CSV('/path/to/my_input_%d.csv' % i for i in range(100)]
+  output_desc = [CSV('/path/to/my_output_file.csv']
    
-  def map_identity(k, v):
-    yield (k, int(v[0]))
+  def map_identity(k, v, output):
+    output(k, int(v[0]))
   
-  def reduce_sum(k, values):
-    yield (k, sum(values))
+  def reduce_sum(k, values, output):
+    output(k, sum(values))
   
   mr = mycloud.mapreduce.MapReduce(cluster,
                                    map_identity,
     author="Russell Power",
     author_email="power@cs.nyu.edu",
     license="BSD",
-    version="0.24",
+    version="0.25",
     url="http://rjpower.org/mycloud",
     package_dir={ '' : 'src' },
     packages=[ 'mycloud' ],