Commits

Anonymous committed 3229f70

Presort by key patch, originally from gpascale in https://github.com/basho/riak_search/pull/54

Comments (0)

Files changed (2)

apps/riak_search/src/riak_search_client.erl

 
     %% Searching...
     parse_query/2,
-    search/5,
+    search/6,
     search_fold/5,
-    search_doc/5,
+    search_doc/6,
 
     %% Indexing...
     index_doc/2,
 %% Run the Query, return the list of keys.
 %% Timeout is in milliseconds.
 %% Return the {Length, Results}.
-search(IndexOrSchema, QueryOps, QueryStart, QueryRows, Timeout) ->
+search(IndexOrSchema, QueryOps, QueryStart, QueryRows, PresortBy, Timeout) ->
     %% Execute the search.
     SearchRef1 = stream_search(IndexOrSchema, QueryOps),
 
                 end
         end,
     {ok, SearchRef2, {Results, _}} = fold_results(SearchRef1, Timeout, F, {[], 0}),
-    SortedResults = sort_by_score(SearchRef2, Results),
+
+    case PresortBy of
+        key ->
+            SortedResults = sort_by_key(Results);
+        score ->
+            SortedResults = sort_by_score(SearchRef2, Results);
+        _ ->
+            SortedResults = sort_by_score(SearchRef2, Results)
+    end,
+             
 
     %% Dedup, and handle start and max results. Return matching
     %% documents.
     {ok, _NewSearchRef, AccOut} = fold_results(SearchRef, Timeout, Fun, AccIn),
     AccOut.
 
-search_doc(IndexOrSchema, QueryOps, QueryStart, QueryRows, Timeout) ->
+search_doc(IndexOrSchema, QueryOps, QueryStart, QueryRows, PresortBy, Timeout) ->
     %% Get results...
-    {Length, Results} = search(IndexOrSchema, QueryOps, QueryStart, QueryRows, Timeout),
+    {Length, Results} = search(IndexOrSchema, QueryOps, QueryStart, QueryRows, PresortBy, Timeout),
     MaxScore = case Results of
                    [] ->
                        "0.0";
 get_scoring_props_1(_) ->
     [].
 
+sort_by_key(Results) ->
+      %% Don't care about the score, but the caller expects it in this format.
+      %% Could potentially squeeze out some perf by modifying the calling code to expect DocID first.
+      SortedList = lists:sort([{DocID, Index, [{score, 0.0}]} || {Index, DocID, _} <- Results]),
+      [{Index, DocID, Props} || {DocID, Index, Props} <- SortedList].
+
 sort_by_score(#riak_search_ref{querynorm=QNorm, termcount=TermCount}, Results) ->
     SortedResults = lists:sort(calculate_scores(QNorm, TermCount, Results)),
     [{Index, DocID, Props} || {_, Index, DocID, Props} <- SortedResults].
     [{-1 * Score, Index, DocID, NewProps}|calculate_scores(QueryNorm, NumTerms, Results)];
 calculate_scores(_, _, []) ->
     [].
+

apps/riak_solr/src/riak_solr_searcher_wm.erl

                 schema,
                 squery,
                 query_ops,
-                sort}).
+                sort,
+                presort
+}).
 
 -define(DEFAULT_RESULT_SIZE, 10).
 -define(DEFAULT_TIMEOUT, 60000).
                         {ok, QueryOps} = Client:parse_query(Schema, SQuery#squery.q),
                         {false, Req, State#state{schema=Schema, squery=SQuery, query_ops=QueryOps,
                                                  sort=wrq:get_qs_value("sort", "none", Req),
-                                                 wt=wrq:get_qs_value("wt", "standard", Req)}}
+                                                 wt=wrq:get_qs_value("wt", "standard", Req),
+                                                 presort=to_atom(string:to_lower(wrq:get_qs_value("presort", "score", Req)))}}
                     catch _ : Error ->
                         {true, riak_solr_error:log_error(Req, Error), State}
                     end;
     {riak_solr_output:xml_response(Schema, SortBy, ElapsedTime, SQuery, NumFound, MaxScore, Docs), Req, State}.
 
 run_query(#state{client=Client, schema=Schema, squery=SQuery,
-                 query_ops=QueryOps}) ->
+                 query_ops=QueryOps, presort=Presort}) ->
     #squery{query_start=QStart, query_rows=QRows}=SQuery,
 
     %% Run the query...
     StartTime = erlang:now(),
-    {NumFound, MaxScore, Docs} = Client:search_doc(Schema, QueryOps, QStart, QRows, ?DEFAULT_TIMEOUT),
+    {NumFound, MaxScore, Docs} = Client:search_doc(Schema, QueryOps, QStart, QRows, Presort, ?DEFAULT_TIMEOUT),
     ElapsedTime = erlang:round(timer:now_diff(erlang:now(), StartTime) / 1000),
     {ElapsedTime, NumFound, MaxScore, Docs}.