orange-text / _text / widgets / OWTextDistance.py

"""
<name>Distance</name>
<description>Computes distances between documents.</description>
<icon>icons/TextDistance.svg</icon>
<contact></contact> 
<priority>2200</priority>
"""

from __future__ import absolute_import

from Orange.OrangeWidgets.OWWidget import *
from Orange.OrangeWidgets import OWGUI

from .. import orngText

class OWTextDistance(OWWidget):
    def __init__(self, parent=None, signalManager=None):
        OWWidget.__init__(self,parent,signalManager,"Preprocess")
        self.inputs = [("Example Table", ExampleTable, self.dataset)]
        self.outputs = [("Distance Matrix", orange.SymMatrix)]
        self.data = None
        self.distanceType = 0
        self.nDocuments = "N/A"; self.nWords = "N/A"
        self.loadSettings()

        box = OWGUI.widgetBox(self.controlArea, "Info", addSpace = True)
        OWGUI.label(box, self, "Documents: %(nDocuments)s")
        OWGUI.label(box, self, "Words: %(nWords)s")

        OWGUI.radioButtonsInBox(self.controlArea, self, "distanceType", box = "Matrix type",
            btnLabels = ["Similarity [cos(fi)]", "Distance [1/cos(fi)]"], addSpace = True)
        OWGUI.button(self.controlArea, self, "Apply", self.apply)
        OWGUI.rubber(self.controlArea)
        self.adjustSize()

    def dataset(self, data):
        if data:
            words = len(data.domain.getmetas(orngText.TEXTMETAID))
            if words == 0:
                self.error("Bag-of-words data set empty (no words)")
                self.send("Distance Matrix", None)
                self.nDocuments = "N/A"; self.nWords = "N/A"
                self.data = None
                return
            self.nDocuments = len(data); self.nWords = words
        self.data = data
        self.apply()

    def apply(self):
        if self.data:
            # pb = OWGUI.ProgressBar(self, iterations=(len(self.data) ** 2)/2.)
            # dist = orngText.cos(self.data, distance = self.distanceType, callback = pb.advance)
            pb = OWGUI.ProgressBar(self, iterations=2)
            pb.advance()
            dist = orange.textCos(self.data, self.distanceType, orngText.TEXTMETAID)
            dist.setattr("items", self.data)
            self.send("Distance Matrix", dist)
            pb.finish()
        else:
            self.send("Distance Matrix", None)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.