Commits

Anonymous committed 29fdfc7

Release v1.4.2 -- Bug fixes

* Fixed a bug preventing Extract XML widget to send any data to output.
* Fixed a bug preventing Select widget to properly handle conditions
bearing on missing annotation keys.
* Fixed a minor bug lurking in the Table class.
* Added a new 'presence-absence' mode of normalization for pivot tables
in Convert widget.
* Minor updates to the documentation.

Comments (0)

Files changed (12)

 Orange Textable
 ===============
 
-Textable is an add-on for Orange_ data mining software package. It enables users to build data
-tables on the basis of text data, by means of a flexible and intuitive
-interface. It offers in particular the following features:
+Orange Textable documentation
+=============================
+
+Orange Textable is an add-on for Orange_ data mining software package. It
+enables users to build data tables on the basis of text data, by means of a
+flexible and intuitive interface. Look at the following :doc:`example
+<illustration>` to see it in typical action.
+
+Orange Textable offers in particular the following features:
 
 - import text data from various sources
 - apply systematic recoding operations
 - apply analytical processes such as segmentation and annotation
+- extract and exploit XML-encoded annotations
 - manually, automatically or randomly select unit subsets
 - build concordances and collocation lists
 - compute quantitative indices such as frequency and complexity measures
 
 http://orange-textable.readthedocs.org/
 
-Textable was designed and implemented by `LangTech Sarl <http://langtech.ch>`_ on behalf of the
-department of language and information sciences (SLI_) at the `University of Lausanne <http://www.unil.ch>`_.
+Textable was designed and implemented by `LangTech Sarl <http://langtech.ch>`_
+on behalf of the department of language and information
+sciences (SLI_) at the `University of Lausanne <http://www.unil.ch>`_ (see
+:doc:`Credits <credits>` and :doc:`How to cite Orange Textable <citing>`).
 
 .. _SLI: http://www.unil.ch/sli
 

_textable/widgets/LTTL/Segmenter.py

 #=============================================================================
-# Class LTTL.Segmenter, v0.19
+# Class LTTL.Segmenter, v0.20
 # Copyright 2012-2014 LangTech Sarl (info@langtech.ch)
 #=============================================================================
 # This file is part of the LTTL package v1.4
             if copy_annotations:
                 old_segment_annotation_copy = segment.annotations.copy()
             if annotation_key:
-                match = regex.search(segment.annotations[annotation_key])
+                if annotation_key in segment.annotations:
+                    match = regex.search(segment.annotations[annotation_key])
+                else:
+                    match = None
             else:
                 match = regex.search(segment.get_content())
             address = segment.address

_textable/widgets/LTTL/Table.py

 #=============================================================================
-# Module LTTL.Table, v0.07
+# Module LTTL.Table, v0.08
 # Copyright 2012-2014 LangTech Sarl (info@langtech.ch)
 #=============================================================================
 # This file is part of the LTTL package v1.4
                 self.cached_row_id,
         ))
 
-    def to_normalized(self, mode='row', type='l1'):
+    def to_normalized(self, mode='rows', type='l1'):
         """Return a sorted copy of the crosstab"""
         new_values = {}
         denominator = 0
                         ]),
                         [0 for v in values]
                 ))
+        elif mode == 'presence/absence':
+            row_ids   = self.row_ids
+            col_ids   = self.col_ids
+            for col_id in col_ids:
+                for row_id in row_ids:
+                    try:
+                        value = self.values[(row_id, col_id)]
+                        new_values[(row_id, col_id)] = 1 if value > 0 else 0
+                    except KeyError:
+                        pass
         elif mode == 'quotients':
             row_ids   = self.row_ids
             col_ids   = self.col_ids

_textable/widgets/OWTextableConvert.py

 #=============================================================================
-# Class OWTextableConvert, v0.10
+# Class OWTextableConvert, v0.12
 # Copyright 2012-2014 LangTech Sarl (info@langtech.ch)
 #=============================================================================
 # This file is part of the Textable (v1.4) extension to Orange Canvas.
                 self,
                 parent,
                 signalManager,
-                'TextableConvert_0_10',
+                'TextableConvert_0_12',
                 wantMainArea=0,
         )
         
                                             u'rows',
                                             u'columns',
                                             u'table',
-                                            u'quotients'
+                                            u'quotients',
+                                            u'presence/absence'
                                     ],
                 sendSelectedValue   = True,
                 callback            = self.sendButton.settingsChanged,
                 tooltip             = (
                         u"Select the units to which normalization will be\n"
-                        u"applied: rows, columns, or the entire table;\n"
-                        u"in 'quotients' mode, the count stored in each\n"
+                        u"applied: rows, columns, or the entire table.\n\n"
+                        u"In 'quotients' mode, the count stored in each\n"
                         u"cell is divided by the corresponding theoretical\n"
                         u"count under independence: the result is greater\n"
                         u"than 1 in case of attraction between line and\n"
                         u"column, lesser than 1 in case of repulsion, and\n"
-                        u"1 if there is no specific interaction between them."
+                        u"1 if there is no specific interaction between\n"
+                        u"them.\n\n"
+                        u"In 'presence/absence' mode, counts greater than 0\n"
+                        u"are replaced with value 1."
                 ),
         )
         self.normalizeModeCombo.setMinimumWidth(150)
                         self.transformBoxLine4.setDisabled(False)
                         if self.normalize:
                             self.normalizeModeCombo.setDisabled(False)
-                            if self.normalizeMode != u'quotients':
+                            if (
+                                    self.normalizeMode != u'quotients'
+                                and self.normalizeMode != u'presence/absence'
+                            ):
                                 self.normalizeTypeCombo.setDisabled(False)
                             self.transformBoxLine5.setDisabled(True)
                         else:

_textable/widgets/OWTextableExtractXML.py

 #=============================================================================
-# Class OWTextableExtractXML, v0.11
+# Class OWTextableExtractXML, v0.12
 # Copyright 2012-2014 LangTech Sarl (info@langtech.ch)
 #=============================================================================
 # This file is part of the Textable (v1.4) extension to Orange Canvas.
                 self,
                 parent,
                 signalManager,
-                'TextableExtractXML_0_11',
+                'TextableExtractXML_0_12',
                 wantMainArea=0,
         )
 
         # Check that there's something on input...
         if not self.inputSegmentation:
             self.infoBox.noDataSent(u'No input.')
-            self.send('Extracted Data', None, self)
+            self.send('Extracted data', None, self)
             return
 
         # Check that element field is not empty...
         if not self.element:
             self.infoBox.noDataSent(u'No XML element was specified.')
-            self.send('Extracted Data', None, self)
+            self.send('Extracted data', None, self)
             return
 
         # Check that label is not empty...
         if not self.label:
             self.infoBox.noDataSent(u'No label was provided.')
-            self.send('Extracted Data', None, self)
+            self.send('Extracted data', None, self)
             return
 
         # Check that importElementAs is not empty (if necessary)...
                 self.infoBox.noDataSent(
                         u'No annotation key was provided for element import.'
                 )
-                self.send('Extracted Data', None)
+                self.send('Extracted data', None)
                 return
         else:
             importElementAs = None
                 self.infoBox.noDataSent(
                         u'No annotation key was provided for auto-numbering.'
                 )
-                self.send('Extracted Data', None, self)
+                self.send('Extracted data', None, self)
                 return
         else:
             autoNumberKey = None
         message = pluralize(message, len(xml_extracted_data))
         self.infoBox.dataSent(message)
 
-        self.send( 'Extracted Data', xml_extracted_data, self)
+        self.send( 'Extracted data', xml_extracted_data, self)
         self.sendButton.resetSettingsChangedFlag()
 
 

docs/rst/citing.rst

+Citing
+======
+
+If Orange Textable has been useful in preparing a scientific publication of
+yours, a citation would be a great way to say so. Here is the relevant
+bibliographic reference (to appear in June 2014):
+
+Xanthos, Aris (in press). Textable: programmation visuelle pour l'analyse de
+données textuelles. In *Actes des 12èmes Journées internationales d'analyse
+statistique des données textuelles (JADT 2014)*.

docs/rst/index.rst

 - import text data from various sources
 - apply systematic recoding operations
 - apply analytical processes such as segmentation and annotation
+- extract and exploit XML-encoded annotations
 - manually, automatically or randomly select unit subsets
 - build concordances and collocation lists
 - compute quantitative indices such as frequency and complexity measures
 Orange Textable was designed and implemented by `LangTech Sàrl 
 <http://langtech.ch>`_ on behalf of the department of language and information 
 sciences (SLI_) at the `University of Lausanne <http://www.unil.ch>`_ (see 
-:doc:`Credits <credits>`).
+:doc:`Credits <credits>` and :doc:`How to cite Orange Textable <citing>`).
 
 .. _SLI: http://www.unil.ch/sli
 
     Widget reference <widget_reference>
     Cookbook <cookbook>
     Case studies <case_studies>
+    How to cite Orange Textable <citing>
     Credits <credits>

docs/rst/segment.rst

 applied to each segment of the input segmentation. It also allows the user to
 specify if a given regular expression describes the form of the targeted
 segments (**Tokenize** mode) or rather the form of the separators in-between
-these segments (**Split** mode).
+these segments (**Split** mode). [#]_
 
 .. _segment_fig2:
 
        activated by default (see `Python documentation
        <http://docs.python.org/library/re.html>`_).
 
+.. [#] NB: in **Split** mode, empty segments that might occur between two
+       consecutive occurrences of separators are automatically removed.

docs/user_guide/figures/Thumbs.db

Binary file modified.

docs/user_guide/guide_utilisateur.doc

Binary file modified.

docs/user_guide/guide_utilisateur.pdf

Binary file modified.
 #!/usr/bin/env python
 
 #=============================================================================
-# File setup.py, v0.10
+# File setup.py, v0.11
 # Copyright 2012-2014 LangTech Sarl (info@langtech.ch)
 #=============================================================================
 # This file is part of the Textable (v1.4) extension to Orange Canvas.
 NAME = 'Orange-Textable'
 DOCUMENTATION_NAME = 'Orange Textable'
 
-VERSION = '1.4.1'
+VERSION = '1.4.2'
 
 DESCRIPTION = 'Orange Textable add-on for Orange data mining software package.'
 LONG_DESCRIPTION = open(os.path.join(os.path.dirname(__file__), 'README.rst')).read()