Anonymous avatar Anonymous committed 6aaf460

Adding my updates to js-mapreduce.org detailing named functions and pre-defined built ins

Comments (0)

Files changed (4)

    Assuming you have a working Erlang (R13B03 or later) installation,
    building Riak should be as simple as:
 
+
   $ cd $RIAK
   $ make all rel
 
-
 2.2 Starting Riak 
 ==================
 
    Once you have successfully built Riak, you can start the server with the
    following commands:
 
+
   $ cd $RIAK/rel/riak
   $ bin/riak start
 
-
    Now, verify that the server started up cleanly and is working:
 
    $ bin/riak-admin test
    Now that you have a functional server, let's try storing some data in
    it. First, start up a erlang node using our embedded version of erlang:
 
+
   $ erts-<vsn>/bin/erl -name riaktest -setcookie riak
   
   Eshell V5.7.4  (abort with ^G)
   (riaktest@example.com)1>
 
+   Now construct the node name of Riak server and make sure we can talk to it:
 
-   Now construct the node name of Riak server and make sure we can talk to it:
 
   (riaktest@example.com)4> RiakNode = riak_util:str_to_node(riak).
   
   (riaktest@example.com)2> net_adm:ping(RiakNode).
   pong
   (riaktest@example.com)2>
-
    
    We are now ready to start the Riak client:
 
+
   (riaktest@example.com)2> {ok, C} = riak:client_connect(RiakNode).
   {ok,{riak_client,'riak@example.com',<<4,136,81,151>>}}
 
+   Let's create a shopping list for bread at /groceries/mine:
 
-   Let's create a shopping list for bread at /groceries/mine:
 
   (riaktest@example.com)6> O0 = riak_object:new(<<"groceries">>, <<"mine">>, ["bread"]).
   O0 = riak_object:new(<<"groceries">>, <<"mine">>, ["bread"]).
          undefined}
   
    (riaktest@example.com)3> C:put(O0, 1).
-
     
     Now, read the list back from the Riak server and extract the value
 
+
   (riaktest@example.com)4> {ok, O1} = C:get(<<"groceries">>, <<"mine">>, 1).
   {ok,{r_object,<<"groceries">>,<<"mine">>,
             [{r_content,{dict,2,16,16,8,80,48,
    (riaktest@example.com)5> V = riak_object:get_value(O1).
    ["bread"]
 
+     Add milk to our list of groceries and write the new value to Riak:
 
-     Add milk to our list of groceries and write the new value to Riak:
 
   (riaktest@example.com)6> %% add milk to the list
   (riaktest@example.com)6> O2 = riak_object:update_value(O1, ["milk" | V]).
   (riaktest@example.com)7> C:put(O2, 1).
   ok
 
+     Finally, see what other keys are available in groceries bucket:
 
-     Finally, see what other keys are available in groceries bucket:
 
   (riaktest@example.com)8> C:list_keys(<<"groceries">>).
   {ok,[<<"mine">>]}
 
 
-
 3 Server Management 
 ~~~~~~~~~~~~~~~~~~~~
 
 
     To join a new Riak node to an existing cluster:
 
+
   $ bin/riak start # If a local server is not already running
   $ bin/riak-admin join <node in cluster>
 
-
     (Note that you must have a local node already running for this to work)
     
     To verify that the local Riak node is able to read/write data:

apps/js_data/src/mrstress.erl

     %M = <<"function(v, _, _) { var value = v[\"values\"][0][\"data\"]; return [parseInt(value)]; }">>,
     R = <<"function(v, _) { var sum = 0; v.forEach(function(x) { sum = sum + x; }); return [sum]; }">>,
     R1 = <<"function(values, _) { return values.map(function(v) { return parseInt(v); }); }">>,
-    Selected = select_inputs(Inputs, InputSize, []),
+    %Selected = select_inputs(Inputs, InputSize, []),
+    Selected = <<"stress">>,
     Start = erlang:now(),
-    case Client:mapred(Selected, [{map, {jsfun, <<"Riak.mapValues">>}, none, false},
-                                  {reduce, {jsanon, R1}, none, false},
-                                  {reduce, {jsanon, R}, none, true}]) of
+    case Client:mapred_bucket_stream(Selected, [{map, {jsfun, <<"Riak.mapValues">>}, none, false},
+                                                {reduce, {jsanon, R1}, none, false},
+                                                {reduce, {jsanon, R}, none, true}]) of
+%%     case Client:mapred(Selected, [{map, {jsfun, <<"Riak.mapValues">>}, none, false},
+%%                                   {reduce, {jsanon, R1}, none, false},
+%%                                   {reduce, {jsanon, R}, none, true}]) of
         {ok, [InputSize]} ->
             End = erlang:now(),
             stress_collector:log(timer:now_diff(End, Start), 0),

apps/riak/priv/mapred_builtins.js

       }
       return undefined;
     },
-    mapValues: function(value, key_data, arg) {
+    mapValues: function(value, keyData, arg) {
       var data = value["values"][0]["data"];
       if (Riak.getClassName(data) !== "Array") {
 	return [data];
       else {
 	return data;
       }},
-     mapValuesJson: function(value, key_data, arg) {
-      return [JSON.parse(value)];
+     mapValuesJson: function(value, keyData, arg) {
+      var newValues = Riak.mapValues(value, keyData, arg);
+      return newValues.map(function(nv) { return JSON.parse(nv); });
     },
     reduceSum: function(values, arg) {
       return [values.reduce(function(prev, curr, index, array) { return prev + curr; })];

doc/js-mapreduce.org

       the documents.
 
 * Query Syntax
-  
+
   Map/Reduce queries are issued over HTTP via a POST to the /mapred
   resource.  The body should be =application/json= of the form
   ={"inputs":[...inputs...],"query":[...query...]}=.
     A map phase should produce a list of results.  You will see errors
     if the output of your map function is not a list.  Return the
     empty list if your map function chooses not to produce output.
-    
 *** Reduce functions
 
     Reduce functions are passed two parameters: a list of inputs to
     "Following a link" means adding it to the output list of this
     phase.  The output of this phase is often most useful as input to
     a map phase, or another reduce phase.
+*** Using Named Functions
+    Riak can also use pre-defined named functions for map and reduce phase
+    processing. Named functions are invoked with the following form:
+
+#+BEGIN_EXAMPLE
+{"map": {"language": "javascript", "name": "Riak.mapValues", "keep": true}}
+
+{"reduce": {"language": "javascript", "name": "Riak.reduceSort", "keep": true}}
+#+END_EXAMPLE
+
+    The key =name= in both examples points to the name of the function to
+    be used. Riak expects the function to be defined prior to the execution
+    of the phase using it.
+**** Defining Named Functions
+     Defining a named function for Riak is a simple process.
+
+     1. Create a Javascript source file containing the definitions for
+	all the funcitons you'd like Riak to pre-define.
+     2. Edit the =app.config= of your Riak nodes and add the line
+	={js_source_dir, <path_to_source_dir>}= to
+	the =riak= configuration block. =<path_to_source_dir>= should point to
+	the directory where the file created in step #1 was saved.
+     3. Start using the functions in your map/reduce jobs.
+
+     When =js_source_dir= is enabled, Riak scans the directory for files
+     ending in =.js=. These files are then loaded into each Javascript VM
+     when it is created.
+
+     NOTE: Named functions must be available on all nodes in a cluster for
+     proper map/reduce results.
+**** Why Use Named Functions?
+     Named functions can be better than anonymous functions in certain
+     situations. Since named functions live in a file they can be
+     managed using source code control and deployed automatically using
+     tools such as Chef or Puppet. This can be a significant advantage
+     when administering large Riak clusters.
+
+     More important, though, is the fact named functions execute much
+     faster than the equivalent anonymous functions. Invoking anonymous
+     functions requires Riak to insure the anonymous function is defined
+     before invoking it. Named functions allow Riak to skip the definition
+     check and execute the function call immediately.
+
+     Also, since named functions do not change between invocations, Riak
+     is able to cache named function call results and short circuit the
+     call entirely. Currently, Riak performs this optimization on named
+     functions executed during map phases only.
+
+     In general, anonymous functions should be during development and
+     named functions should be used for production deployments where
+     possible. This combination provides the maximum flexibility and
+     performance.
+**** Riak Supplied Functions
+     Riak supplies several named functions out of the box. These functions
+     are defined on a global Javascript object named =Riak= and should not
+     be modified or overridden. These functions, along with descripts and
+     notes on their use are described in the next two sections.
+***** Named Map Functions
+      + =Riak.mapValues(values, keyData, arg)=
+	Extracts and returns only the values contained in a bucket and key.
+
+      + =Riak.mapValuesJson(values, keyData, arg)=
+	Same as =mapValues= except the values are passed through a JSON
+	decoder first.
+***** Named Reduce Functions
+      + =Riak.reduceSum(values, arg)=
+	Returns the sum of =values=
+
+      + =Riak.reduceMin(values, arg)=
+	Returns the minimum value from =values=
+
+      + =Riak.reduceMax(values, arg)=
+	Returns the maximum value from =values=
+
+      + =Riak.reduceSort(values, arg)=
+	Returns the sorted version of =values=. If =arg= is the source to a Javascript
+	function, it will be eval'd and used to control the sort via =Array.sort=.
+
+      + =Riak.reduceLimit(values, arg)=
+	Returns the leftmost n members of values where =arg= is used as n.
+
+      + =Riak.reduceSlice(values, arg)=
+	Returns a slice of the values array. =arg= must be a two element array
+	containing the starting and ending positions for the slice.
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.