Commits

Anonymous committed 4964693 Merge

Merge branch 'gpascale-presort-patch'

Comments (0)

Files changed (3)

apps/riak_search/src/riak_search_client.erl

     %% Searching...
     parse_query/2,
     search/5,
+    search/6,
     search_fold/5,
     search_doc/5,
+    search_doc/6,
 
     %% Indexing...
     index_doc/2,
 %% Timeout is in milliseconds.
 %% Return the {Length, Results}.
 search(IndexOrSchema, QueryOps, QueryStart, QueryRows, Timeout) ->
+    search(IndexOrSchema, QueryOps, QueryStart, QueryRows, score, Timeout).
+
+search(IndexOrSchema, QueryOps, QueryStart, QueryRows, PresortBy, Timeout)
+  when PresortBy == key; PresortBy == score ->
     %% Execute the search.
     SearchRef1 = stream_search(IndexOrSchema, QueryOps),
 
                 end
         end,
     {ok, SearchRef2, {Results, _}} = fold_results(SearchRef1, Timeout, F, {[], 0}),
-    SortedResults = sort_by_score(SearchRef2, Results),
+
+    case PresortBy of
+        key ->
+            SortedResults = sort_by_key(SearchRef2, Results);
+        _ ->
+            SortedResults = sort_by_score(SearchRef2, Results)
+    end,
+             
 
     %% Dedup, and handle start and max results. Return matching
     %% documents.
     AccOut.
 
 search_doc(IndexOrSchema, QueryOps, QueryStart, QueryRows, Timeout) ->
+    search_doc(IndexOrSchema, QueryOps, QueryStart, QueryRows, score, Timeout).
+
+search_doc(IndexOrSchema, QueryOps, QueryStart, QueryRows, PresortBy, Timeout)
+  when PresortBy == key; PresortBy == score ->
     %% Get results...
-    {Length, Results} = search(IndexOrSchema, QueryOps, QueryStart, QueryRows, Timeout),
+    {Length, Results} = search(IndexOrSchema, QueryOps, QueryStart, QueryRows, PresortBy, Timeout),
     MaxScore = case Results of
                    [] ->
                        "0.0";
 get_scoring_props_1(_) ->
     [].
 
-sort_by_score(#riak_search_ref{querynorm=QNorm, termcount=TermCount}, Results) ->
-    SortedResults = lists:sort(calculate_scores(QNorm, TermCount, Results)),
+sort_by_key(SearchRef, Results) ->
+    sort_results(SearchRef, Results, 3).
+
+sort_by_score(SearchRef, Results) ->
+    sort_results(SearchRef, Results, 1).
+
+sort_results(#riak_search_ref{querynorm=QNorm, termcount=TermCount},
+             Results, Element) ->
+    SortedResults = lists:keysort(Element,
+                                  calculate_scores(QNorm, TermCount, Results)),
     [{Index, DocID, Props} || {_, Index, DocID, Props} <- SortedResults].
 
 calculate_scores(QueryNorm, NumTerms, [{Index, DocID, Props}|Results]) ->
     [{-1 * Score, Index, DocID, NewProps}|calculate_scores(QueryNorm, NumTerms, Results)];
 calculate_scores(_, _, []) ->
     [].
+

apps/riak_search/src/riak_search_utils.erl

 %% has some subtle bugs because messages are not tagged with
 %% Refs. This causes heisenbugs.
 ptransform(F, List) ->
+    %% Maintain order by adding a position to the list. Then run the
+    %% results, sort, and return the unwrapped list.
+    WrappedF = fun({Pos, X}) -> {Pos, F(X)} end,
+    WrappedList = lists:zip(lists:seq(1, length(List)), List),
+
+    %% Run in parallel for however many schedulers there are.
     Schedulers = erlang:system_info(schedulers),
-    ptransform(F, List, Schedulers).
+    Results = ptransform(WrappedF, WrappedList, Schedulers),
+
+    %% Unwrap and return the results.
+    [X || {_,X} <- lists:sort(Results)].
 
 %% Run a map operation in parallel.
 ptransform(F, List, NumProcesses) ->

apps/riak_solr/src/riak_solr_searcher_wm.erl

                 schema,
                 squery,
                 query_ops,
-                sort}).
+                sort,
+                presort
+}).
 
 -define(DEFAULT_RESULT_SIZE, 10).
 -define(DEFAULT_TIMEOUT, 60000).
                         {ok, QueryOps} = Client:parse_query(Schema, SQuery#squery.q),
                         {false, Req, State#state{schema=Schema, squery=SQuery, query_ops=QueryOps,
                                                  sort=wrq:get_qs_value("sort", "none", Req),
-                                                 wt=wrq:get_qs_value("wt", "standard", Req)}}
+                                                 wt=wrq:get_qs_value("wt", "standard", Req),
+                                                 presort=to_atom(string:to_lower(wrq:get_qs_value("presort", "score", Req)))}}
                     catch _ : Error ->
                         {true, riak_solr_error:log_error(Req, Error), State}
                     end;
     {riak_solr_output:xml_response(Schema, SortBy, ElapsedTime, SQuery, NumFound, MaxScore, Docs), Req, State}.
 
 run_query(#state{client=Client, schema=Schema, squery=SQuery,
-                 query_ops=QueryOps}) ->
+                 query_ops=QueryOps, presort=Presort}) ->
     #squery{query_start=QStart, query_rows=QRows}=SQuery,
 
     %% Run the query...
     StartTime = erlang:now(),
-    {NumFound, MaxScore, Docs} = Client:search_doc(Schema, QueryOps, QStart, QRows, ?DEFAULT_TIMEOUT),
+    {NumFound, MaxScore, Docs} = Client:search_doc(Schema, QueryOps, QStart, QRows, Presort, ?DEFAULT_TIMEOUT),
     ElapsedTime = erlang:round(timer:now_diff(erlang:now(), StartTime) / 1000),
     {ElapsedTime, NumFound, MaxScore, Docs}.
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.