Commits

Lars Yencken committed 055e801

Fixes #2, implementing Q-learning update for nodes visited.

Comments (0)

Files changed (7)

simsearch/media/js/search.js

 function initLookup(pivotKanjiVal) {
     if (currentIndex != null) {
         var previousKanji = historyStore[currentIndex].pivotKanji;
-        var newDoc = loadJSONDoc(
-                g_pivotPath,
-                {pivot: pivotKanjiVal, previous: previousKanji}
-            );
+        var newDoc = loadJSONDoc(g_pivotPath + pivotKanjiVal + "/", {});
     } else {
-        var newDoc = loadJSONDoc(g_pivotPath, {pivot: pivotKanjiVal});
+        var newDoc = loadJSONDoc(g_pivotPath + pivotKanjiVal + "/", {});
     }
 
     var success = function(obj) {
     var lookupPlane = getLookupPlane();
     var pivotLoc = toCornerLoc(lookupPlane.center);
     var pivotKanji = historyStore[currentIndex].pivot_kanji;
+    var path = ""
+    for (var i = 0; i < historyStore.length; i++) {
+        path += historyStore[i]['pivot_kanji']
+    }
+
     newPivot = DIV(
             {
                 id: "pivotKanji",
                 style: locToStyle(pivotLoc)
             }, 
             A(
-                {href: g_translatePath + pivotKanji + "/"},
+                {href: g_translatePath + pivotKanji + "/?path=" + path},
                 pivotKanji
             )
         );

simsearch/search/models.py

 class Neighbour(mongoengine.EmbeddedDocument):
     "A weighted graph edge."
     kanji = mongoengine.StringField(max_length=1)
-    weight = mongoengine.FloatField(min_value=0.0, max_value=1.0)
+    weight = mongoengine.FloatField(min_value=0.0)
+
+    def __cmp__(self, rhs):
+        return cmp(self.weight, rhs.weight)
+
+    def __unicode__(self):
+        return self.kanji
 
 class Node(mongoengine.Document):
     """
             Neighbour))
     n_updates = mongoengine.IntField(default=0, min_value=0)
 
+    def at(self, kanji):
+        "Gets the neighbour described by the given kanji."
+        for neighbour in self.neighbours:
+            if neighbour.kanji == kanji:
+                return neighbour
+
+        raise KeyError(kanji)
+
     @classmethod
     def build(cls, cache=None):
         "Builds the initial graph for Q learning."
         db = cls.objects._collection
         return set(r['_id'] for r in db.find({}, fields=['_id']))
 
+    @classmethod
+    def update(cls, path):
+        nodes = cls.objects.filter(pivot__in=list(path))
+        if len(nodes) != len(path):
+            raise ValueError('path not found in database')
+
+        # cache Q(s, a) for the subgraph we're interested in
+        q = cls._cache_subgraph(nodes)
+
+        # Calculate Q'(s, a) in reverse order along the path
+        # Q'(s, a) = (1 - A(s))Q(s, a) + A(s)*(r(a) + G * max_a Q(s', a))
+        gamma = settings.UPDATE_GAMMA
+        for i in xrange(len(path) - 2, -1, -1):
+            s = path[i]
+            q_s = q[s]
+            alpha = 1.0 / (4.0 + 0.5 * q_s.n_updates)
+            
+            # update very action available from state s
+            for a in sorted(q_s.neighbours, key=lambda n: n.weight,
+                    reverse=True):
+                q_old = a.weight
+
+                r_a = (1 if a.kanji == path[-1] else 0)
+                q_opt = r_a + gamma * max(q[a.kanji].neighbours).weight
+                
+                a.weight = (1.0 - alpha) * q_old + alpha * q_opt
+                print u"Q(%s, %s): %.02f --> %0.02f" % (s, a.kanji, q_old,
+                        a.weight)
+
+            q_s.n_updates += 1
+            q_s.save()
+
+    @classmethod
+    def _cache_subgraph(cls, nodes):
+        q = {}
+        missing_neighbours = set()
+        for node in nodes:
+            q[node.pivot] = node
+            missing_neighbours.update(n.kanji for n in node.neighbours)
+        missing_neighbours.discard(node.pivot for node in nodes)
+
+        extra_nodes = cls.objects.filter(pivot__in=missing_neighbours)
+        if len(extra_nodes) != len(missing_neighbours):
+            raise ValueError('cannot cache subgraph -- neighbours missing')
+
+        for node in extra_nodes:
+            q[node.pivot] = node
+
+        return q
+
+    def __unicode__(self):
+        return self.pivot
+
 class Trace(mongoengine.Document):
     "A search path through the graph, as taken by a user."
     ip_address = mongoengine.StringField(max_length=15)

simsearch/search/templates/search/index.html

 {% extends "base.html" %}
 
+{% block headers %}
+  <link href="{{MEDIA_URL}}css/lookup.css" rel="stylesheet" type="text/css">
+  <script type="text/javascript" src="{{MEDIA_URL}}js/MochiKit.js"></script>
+  <script type="text/javascript">
+    var g_pivotPath = "{% url search_json %}";
+    var g_translatePath = "{% url search_target %}";
+  </script>
+  <script type="text/javascript" src="{{MEDIA_URL}}js/search.js"></script>
+{% endblock %}
+
+{% block body_tags %}onload="initInterface()"{% endblock %}
+
 {% block content %}
-<h1>Kanji similarity search</h1>
+  <div id="lookupBorder" style="position:absolute;top:0;bottom:0;left:0;right:0;width:80%; height:80%; margin:auto; opacity:0.0;">
+  </div>
+  
+  <!-- Add a hidden dummy pivot to start with, to determine size -->
+  <div style="position:absolute; top:30px; left:30px; opacity:0.0;" id="pivotKanji">鮨</div>
+  
+  <!-- Add hidden navigation links -->
+  <a href="javascript:;"
+    onmouseover="roll('backControl', '{{MEDIA_URL}}img/lookup_back_hover.png')"
+    onmouseout="roll('backControl', '{{MEDIA_URL}}img/lookup_back_static.png')"
+    onclick="previousPivot()"
+    >
+    <div style="opacity:0; position:absolute;" id="backControl">
+      <img src="{{MEDIA_URL}}img/lookup_back_static.png" name="backControl">
+    </div>
+  </a>
+  <a href="javascript:;"
+    onmouseover="roll('resetControl', '{{MEDIA_URL}}img/lookup_reset_hover.png')"
+    onmouseout="roll('resetControl', '{{MEDIA_URL}}img/lookup_reset_static.png')"
+    onclick="switchState('seeding')"
+    >
+    <div style="opacity:0; position:absolute;" id="resetControl">
+      <img src="{{MEDIA_URL}}img/lookup_reset_static.png" name="resetControl">
+    </div>
+  </a>
+  <a href="javascript:;"
+    onmouseover="roll('forwardControl', '{{MEDIA_URL}}img/lookup_forward_hover.png')"
+    onmouseout="roll('forwardControl', '{{MEDIA_URL}}img/lookup_forward_static.png')"
+    onclick="nextPivot()"
+    >
+    <div style="opacity:0; position:absolute;" id="forwardControl">
+      <img src="{{MEDIA_URL}}img/lookup_forward_static.png" name="forwardControl">
+    </div>
+  </a>
 
-<form method="GET">
-  <input type="text" value="{{query}}" name="query" />
-  <input type="submit" value="Search" />
-</form>
+  <!-- Add a simple submission form -->
+  <form name="seedForm" onsubmit="return submitSeed()">
+    <div id="seedLookup">
+      <input type="text" name="seedKanji" size="10"/>
+      <a href="javascript:;" onclick="submitSeed()">
+        <img src="{{MEDIA_URL}}img/magnifier.png" name="magnifier">
+      </a>
+    </div>
+  </form>
 
-{% if messages %}
-<ul class="messages">
-  {% for message in messages %}
-  <li{% if message.tags %} class="{{message.tags}}"{% endif %}>{{message}}</li>
-  {% endfor %}
-</ul>
-{% endif %}
+  <div class="error" id="errorMessage" style="opacity:0.00"></div>
 
-{% if results %}
-<hr/>
-<table>
-  <thead>
-    <tr>
-      <th>Score</th><th>Neighbour</th><th>Translate</th>
-    </tr>
-  </thead>
-  <tbody>
-  {% for result in results %}
-  <tr>
-    <td>{{result.weight|floatformat:3}}</td>
-    <td>
-      <a href="?query={{result.kanji}}&trace={{trace}}">{{result.kanji}}</a>
-    </td>
-    <td>
-      <a
-      href="http://kub.gakusha.info/kanji/translate/?kanji={{result.kanji}}">translate</a>
-    </td>
-  </tr>
-  {% endfor %}
-  </tbody>
-</table>
-{% endif %}
+  <p id="copy">
+    Visual kanji search (<a
+    href="http://bitbucket.org/lars512/simsearch/">r{{revision.number}}:{{revision.short}}</a>)
+  </p>
 {% endblock %}

simsearch/search/templates/search/old.html

-{% extends "base.html" %}
-
-{% block headers %}
-  <link href="{{MEDIA_URL}}css/lookup.css" rel="stylesheet" type="text/css">
-  <script type="text/javascript" src="{{MEDIA_URL}}js/MochiKit.js"></script>
-  <script type="text/javascript">
-    var g_pivotPath = "{% url old_search_xhr %}";
-    var g_translatePath = "/translate/";
-  </script>
-  <script type="text/javascript" src="{{MEDIA_URL}}js/search.js"></script>
-{% endblock %}
-
-{% block body_tags %}onload="initInterface()"{% endblock %}
-
-{% block content %}
-  <div id="lookupBorder" style="position:absolute;top:0;bottom:0;left:0;right:0;width:80%; height:80%; margin:auto; opacity:0.0;">
-  </div>
-  
-  <!-- Add a hidden dummy pivot to start with, to determine size -->
-  <div style="position:absolute; top:30px; left:30px; opacity:0.0;" id="pivotKanji">鮨</div>
-  
-  <!-- Add hidden navigation links -->
-  <a href="javascript:;"
-    onmouseover="roll('backControl', '{{MEDIA_URL}}img/lookup_back_hover.png')"
-    onmouseout="roll('backControl', '{{MEDIA_URL}}img/lookup_back_static.png')"
-    onclick="previousPivot()"
-    >
-    <div style="opacity:0; position:absolute;" id="backControl">
-      <img src="{{MEDIA_URL}}img/lookup_back_static.png" name="backControl">
-    </div>
-  </a>
-  <a href="javascript:;"
-    onmouseover="roll('resetControl', '{{MEDIA_URL}}img/lookup_reset_hover.png')"
-    onmouseout="roll('resetControl', '{{MEDIA_URL}}img/lookup_reset_static.png')"
-    onclick="switchState('seeding')"
-    >
-    <div style="opacity:0; position:absolute;" id="resetControl">
-      <img src="{{MEDIA_URL}}img/lookup_reset_static.png" name="resetControl">
-    </div>
-  </a>
-  <a href="javascript:;"
-    onmouseover="roll('forwardControl', '{{MEDIA_URL}}img/lookup_forward_hover.png')"
-    onmouseout="roll('forwardControl', '{{MEDIA_URL}}img/lookup_forward_static.png')"
-    onclick="nextPivot()"
-    >
-    <div style="opacity:0; position:absolute;" id="forwardControl">
-      <img src="{{MEDIA_URL}}img/lookup_forward_static.png" name="forwardControl">
-    </div>
-  </a>
-
-  <!-- Add a simple submission form -->
-  <form name="seedForm" onsubmit="return submitSeed()">
-    <div id="seedLookup">
-      <input type="text" name="seedKanji" size="10"/>
-      <a href="javascript:;" onclick="submitSeed()">
-        <img src="{{MEDIA_URL}}img/magnifier.png" name="magnifier">
-      </a>
-    </div>
-  </form>
-
-  <div class="error" id="errorMessage" style="opacity:0.00"></div>
-
-  <p id="copy">
-    Visual kanji search (<a
-    href="http://bitbucket.org/lars512/simsearch/">r{{revision.number}}:{{revision.short}}</a>)
-  </p>
-{% endblock %}

simsearch/search/urls.py

 from django.conf.urls.defaults import patterns, url
 
 urlpatterns = patterns('simsearch.search.views',
-    url(r'^raw/$',      'raw_search',       name='raw_search'),
-    url(r'^$',          'old_search',       name='old_search'),
-    url(r'^old/xhr/$',  'old_search_xhr',   name='old_search_xhr'),
+    url(r'^$',                      'search',       name='search_index'),
+    url(r'^xhr/$',                  'search_json',  name='search_json'),
+    url(r'^xhr/(?P<pivot>.*)/$',    'search_json',  name='search_json_kanji'),
+    url(r'^target/$',               'translate',    name='search_target'),
+    url(r'^target/(?P<kanji>.*)/$', 'translate',    name='search_target_kanji'),
 )
 
 # vim: ts=4 sw=4 sts=4 et tw=78:

simsearch/search/views.py

 #
 
 """
+Views for the search app.
 """
 
 from django.shortcuts import render_to_response
 from django.template import RequestContext
-from django.contrib import messages
 from django.utils import simplejson
-from django.http import HttpResponse
+from django.http import HttpResponse, Http404, HttpResponseRedirect
 from django.conf import settings
+from django.core.urlresolvers import reverse
 
 from cjktools import scripts
-from mongoengine.queryset import DoesNotExist
 
 import models
 
-def raw_search(request):
-    context = {}
-    kanji = request.GET.get('query') or ''
-    trace = request.GET.get('trace', '') + kanji
-    context['query'] = kanji
-    context['trace'] = trace
-
-    if kanji:
-        if _is_valid_query(kanji):
-            try:
-                node = models.Node.objects.get(pivot=kanji)
-                context['results'] = node.neighbours
-            except DoesNotExist:
-                messages.add_message(request, messages.INFO,
-                        'Sorry, no matching results for this query.')
-
-        else:
-            messages.add_message(request, messages.WARNING,
-                    'The query should be a single kanji only.')
-
-    return render_to_response('search/index.html', context,
+def search(request):
+    "Renders the inital search display."
+    return render_to_response('search/index.html', {},
             context_instance=RequestContext(request))
 
-def old_search(request):
-    return render_to_response('search/old.html', {},
-            context_instance=RequestContext(request))
+def translate(request, kanji=None):
+    "Updates the query model before redirecting to the real translation."
+    kanji = kanji or request.GET.get('kanji')
+    if not _is_kanji(kanji):
+        raise Http404
 
-def old_search_xhr(request):
-    pivot = request.GET.get('pivot')
-    neighbours = [n.kanji for n in models.Node.objects.get(
-            pivot=pivot).neighbours][:settings.N_NEIGHBOURS_RECALLED]
+    path = request.GET.get('path')
+    if path and len(path) > 1 and all(map(_is_kanji, path)) \
+            and path.endswith(kanji):
+        models.Node.update(path)
+
+    return HttpResponseRedirect(reverse('translate_kanji', args=[kanji]))
+
+def search_json(request, pivot=None):
+    "Returns the search display data as JSON."
+    pivot = pivot or request.GET.get('pivot')
+    node = models.Node.objects.get(pivot=pivot)
+    neighbours = [n.kanji for n in sorted(node.neighbours, reverse=True)]
+    neighbours = neighbours[:settings.N_NEIGHBOURS_RECALLED]
+
     response_dict = {
                 'pivot_kanji': pivot,
                 'tier1': neighbours[:4],
             mimetype='application/javascript',
         )
 
-def _is_valid_query(kanji):
+def _is_kanji(kanji):
     return len(kanji) == 1 and scripts.script_type(kanji) == \
             scripts.Script.Kanji
 
 # vim: ts=4 sw=4 sts=4 et tw=78:
-

simsearch/settings.py

 
 N_NEIGHBOURS_RECALLED = 15
 
+# Tradeoff in Pr(a|s) and likelihood of reaching a further target from s'
+UPDATE_GAMMA = 0.7
+
 # Local time zone for this installation. Choices can be found here:
 # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name
 # although not all choices may be available on all operating systems.