HTML in JSON output when encountering an error

Issue #1 new
Édouard Hue created an issue

When attempting to start reconciling data (a catalog of an entomology collection with tens of taxons to match against Wikidata), OpenRefine 2.6-beta.1 hangs while trying to guess the column type. After a long time (30 seconds or more), a stack trace appears in the console, indicating a JSON parsing error. It looks like the reconcile service returns a mix of HTML and JSON instead of a plain JSON document. I guess the HTML is produced by an uncaught PHP error that looks like a timeout when querying WDQ.

As OpenRefine doesn't recover properly and keeps spinning undefinitely, this is a blocker. I am also filling an issue against OpenRefine for it to better handle garbage in the input.

00:51:07.167 [                  command] Exception caught (162529ms)
org.json.JSONException: <br />
<font size='1'><table class='xdebug-error xe-warning' dir='ltr' border='1' cellspacing='0' cellpadding='1'>
<tr><th align='left' bgcolor='#f57900' colspan="5"><span style='background-color: #cc0000; color: #fce94f; font-size: x-large;'>( ! )</span> Warning: file_get_contents(http://wdq.wmflabs.org/api?q=items%5B1320946%2C21352088%2C21361448%5D&amp;amp;props=31): failed to open stream: HTTP request failed!  in /data/project/wikidata-reconcile/public_html/index.php on line <i>147</i></th></tr>
<tr><th align='left' bgcolor='#e9b96e' colspan='5'>Call Stack</th></tr>
<tr><th align='center' bgcolor='#eeeeec'>#</th><th align='left' bgcolor='#eeeeec'>Time</th><th align='left' bgcolor='#eeeeec'>Memory</th><th align='left' bgcolor='#eeeeec'>Function</th><th align='left' bgcolor='#eeeeec'>Location</th></tr>
<tr><td bgcolor='#eeeeec' align='center'>1</td><td bgcolor='#eeeeec' align='center'>0.0029</td><td bgcolor='#eeeeec' align='right'>231720</td><td bgcolor='#eeeeec'>{main}(  )</td><td title='/data/project/wikidata-reconcile/public_html/index.php' bgcolor='#eeeeec'>../index.php<b>:</b>0</td></tr>
<tr><td bgcolor='#eeeeec' align='center'>2</td><td bgcolor='#eeeeec' align='center'>0.0182</td><td bgcolor='#eeeeec' align='right'>271000</td><td bgcolor='#eeeeec'>runQuery(  )</td><td title='/data/project/wikidata-reconcile/public_html/index.php' bgcolor='#eeeeec'>../index.php<b>:</b>255</td></tr>
<tr><td bgcolor='#eeeeec' align='center'>3</td><td bgcolor='#eeeeec' align='center'>0.3288</td><td bgcolor='#eeeeec' align='right'>314496</td><td bgcolor='#eeeeec'><a href='http://www.php.net/function.file-get-contents' target='_new'>file_get_contents</a>(  )</td><td title='/data/project/wikidata-reconcile/public_html/index.php' bgcolor='#eeeeec'>../index.php<b>:</b>147</td></tr>
</table></font>
<br />
<font size='1'><table class='xdebug-error xe-warning' dir='ltr' border='1' cellspacing='0' cellpadding='1'>
<tr><th align='left' bgcolor='#f57900' colspan="5"><span style='background-color: #cc0000; color: #fce94f; font-size: x-large;'>( ! )</span> Warning: file_get_contents(http://wdq.wmflabs.org/api?q=items%5B1479779%5D&amp;amp;props=31): failed to open stream: HTTP request failed!  in /data/project/wikidata-reconcile/public_html/index.php on line <i>147</i></th></tr>
<tr><th align='left' bgcolor='#e9b96e' colspan='5'>Call Stack</th></tr>
<tr><th align='center' bgcolor='#eeeeec'>#</th><th align='left' bgcolor='#eeeeec'>Time</th><th align='left' bgcolor='#eeeeec'>Memory</th><th align='left' bgcolor='#eeeeec'>Function</th><th align='left' bgcolor='#eeeeec'>Location</th></tr>
<tr><td bgcolor='#eeeeec' align='center'>1</td><td bgcolor='#eeeeec' align='center'>0.0029</td><td bgcolor='#eeeeec' align='right'>231720</td><td bgcolor='#eeeeec'>{main}(  )</td><td title='/data/project/wikidata-reconcile/public_html/index.php' bgcolor='#eeeeec'>../index.php<b>:</b>0</td></tr>
<tr><td bgcolor='#eeeeec' align='center'>2</td><td bgcolor='#eeeeec' align='center'>87.0809</td><td bgcolor='#eeeeec' align='right'>305016</td><td bgcolor='#eeeeec'>runQuery(  )</td><td title='/data/project/wikidata-reconcile/public_html/index.php' bgcolor='#eeeeec'>../index.php<b>:</b>255</td></tr>
<tr><td bgcolor='#eeeeec' align='center'>3</td><td bgcolor='#eeeeec' align='center'>87.2549</td><td bgcolor='#eeeeec' align='right'>329992</td><td bgcolor='#eeeeec'><a href='http://www.php.net/function.file-get-contents' target='_new'>file_get_contents</a>(  )</td><td title='/data/project/wikidata-reconcile/public_html/index.php' bgcolor='#eeeeec'>../index.php<b>:</b>147</td></tr>
</table></font>
{"q0":{"result":[{"id":"Q1320946","score":1,"match":true,"type":[],"name":"Charaxes jasius"},{"id":"Q21352088","score":0.33333333333333,"match":false,"type":[],"name":"Charaxes jasius jasius"},{"id":"Q21361448","score":0.25,"match":false,"type":[],"name":"Charaxes jasiuspagenstecheri"}],"total_search_results":7},"q1":{"result":[{"id":"Q503989","score":1,"match":true,"type":["Q16521"],"name":"Nymphalis antiopa"}],"total_search_results":1},"q2":{"result":[{"id":"Q156454","score":0.5,"match":false,"type":["Q16521"],"name":"Red Admiral"},{"id":"Q10088108","score":0.33333333333333,"match":false,"type":["Q4167836"],"name":"Categor\u00eda:Imaxes de Vanessa atalanta"},{"id":"Q1638584","score":0.25,"match":false,"type":["Q16521"],"name":"Vanessa vulcania"}],"total_search_results":3},"q3":{"result":[{"id":"Q158699","score":0.5,"match":false,"type":["Q16521"],"name":"european Peacock"},{"id":"Q10051246","score":0.33333333333333,"match":false,"type":["Q4167836"],"name":"Categor\u00eda:Imaxes de Inachis io de Galicia"}],"total_search_results":2},"q4":{"result":[{"id":"Q1462214","score":1,"match":true,"type":["Q16521"],"name":"Samia cynthia"}],"total_search_results":1},"q5":{"result":[{"id":"Q2191566","score":1,"match":true,"type":["Q16521"],"name":"Colias wiskotti"}],"total_search_results":1},"q6":{"result":[{"id":"Q531946","score":0.5,"match":false,"type":["Q16521"],"name":"Poplar Admiral"},{"id":"Q21366871","score":0.33333333333333,"match":false,"type":["Q16521"],"name":"Limenitis populi populi"},{"id":"Q21357825","score":0.25,"match":false,"type":["Q16521"],"name":"Limenitis populiszechwanica"}],"total_search_results":4},"q7":{"result":[{"id":"Q13580251","score":1,"match":true,"type":["Q16521"],"name":"Davidina armandi"}],"total_search_results":1},"q8":{"result":[{"id":"Q26764","score":1,"match":true,"type":["Q16521","Q16521"],"name":"Pieris brassicae"},{"id":"Q1235485","score":0.33333333333333,"match":false,"type":["Q16521"],"name":"Madeiran Large White"},{"id":"Q21349573","score":0.25,"match":false,"type":["Q16521"],"name":"Pieris brassicae brassicae"}],"total_search_results":12},"q9":{"result":[{"id":"Q1479779","score":0.5,"match":false,"type":[],"name":"Purple-Shot Copper"}],"total_search_results":1}} couldn't be parsed as JSON object
        at com.google.refine.util.ParsingUtilities.evaluateJsonStringToObject(ParsingUtilities.java:124)
        at com.google.refine.commands.recon.GuessTypesOfColumnCommand.guessTypes(GuessTypesOfColumnCommand.java:196)
        at com.google.refine.commands.recon.GuessTypesOfColumnCommand.doPost(GuessTypesOfColumnCommand.java:89)
        at com.google.refine.RefineServlet.service(RefineServlet.java:177)
        at javax.servlet.http.HttpServlet.service(HttpServlet.java:820)
        at org.mortbay.jetty.servlet.ServletHolder.handle(ServletHolder.java:511)
        at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1166)
        at org.mortbay.servlet.UserAgentFilter.doFilter(UserAgentFilter.java:81)
        at org.mortbay.servlet.GzipFilter.doFilter(GzipFilter.java:132)
        at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1157)
        at org.mortbay.jetty.servlet.ServletHandler.handle(ServletHandler.java:388)
        at org.mortbay.jetty.security.SecurityHandler.handle(SecurityHandler.java:216)
        at org.mortbay.jetty.servlet.SessionHandler.handle(SessionHandler.java:182)
        at org.mortbay.jetty.handler.ContextHandler.handle(ContextHandler.java:765)
        at org.mortbay.jetty.webapp.WebAppContext.handle(WebAppContext.java:418)
        at org.mortbay.jetty.handler.HandlerWrapper.handle(HandlerWrapper.java:152)
        at org.mortbay.jetty.Server.handle(Server.java:326)
        at org.mortbay.jetty.HttpConnection.handleRequest(HttpConnection.java:542)
        at org.mortbay.jetty.HttpConnection$RequestHandler.headerComplete(HttpConnection.java:923)
        at org.mortbay.jetty.HttpParser.parseNext(HttpParser.java:547)
        at org.mortbay.jetty.HttpParser.parseAvailable(HttpParser.java:212)
        at org.mortbay.jetty.HttpConnection.handle(HttpConnection.java:404)
        at org.mortbay.jetty.bio.SocketConnector$Connection.run(SocketConnector.java:228)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
        at java.lang.Thread.run(Unknown Source)