Commits

Anonymous committed ad857b7

checkpoint - refactoring _enum_terms() to be right-side-out

Comments (0)

Files changed (1)

         return [t] + cls.sub_fields.get((t, f), []) + [f]
 
     def curated_terms(self):
-        folders_parts = [
-            (t, parents[:np + 1])
-            for ((t, f), parents) in self.sub_fields.items()
-            for np in range(len(parents))]
-
-        folders = [I2B2MetaData.term(pfx=['', self.root],
-                                     audit=self.audit,
-                                     parts=[t] + curated,
-                                     name=curated[0],  # kludge
-                                     code=None,
-                                     viz='FAE')
-                   for (t, curated) in folders_parts]
+        tree = [(t, parents[:np + 1])
+                for ((t, f), parents) in self.sub_fields.items()
+                for np in range(len(parents))]
+        folders = I2B2MetaData.folders(
+            pfx=['', self.root], audit=self.audit,
+            path_parts=[[t] + curated for (t, curated) in tree],
+            names=[curated[0] for (t, curated) in tree],  # kludge
+            tooltips=['' for _ in tree],
+            vizs=['FAE' for _ in tree])
 
         distinct_paths = dict([(I2B2MetaData.fullname(term), term)
                                for term in folders])
         return distinct_paths.values()
 
     def scalar_terms(self):
-        field_parts = [(field['FIELD_NAME'],
-                        field['TABLE_NAME'],
-                        self._value_info(field['DATA_TYPE'],
-                                         field['UNIT_OF_MEASURE']),
-                        field['UNIT_OF_MEASURE'],
-                        field['DEFINITION'])
-                       for field in self.fields()]
+        discrete = self._has_values()
+        scalar_fields = [(t, n, f) for (t, n, f) in self.fields()
+                         if n not in self.excluded_fields
+                         and (t, n) not in discrete]
 
-        discrete = self._has_values()
-        return [I2B2MetaData.term(pfx=['', self.root],
-                                  audit=self.audit,
-                                  parts=self.path_parts(t, n),
-                                  code=n + ':', name=n,
-                                  c_columndatatype=ct,
-                                  c_metadataxml=I2B2MetaData.metadataxml(
-                                      datatype=datatype,
-                                      normalunits=units),
-                                  viz=self.mkviz(n),
-                                  hint=hint)
-                for (n, t, (ct, datatype), units, hint) in field_parts
-                if n not in self.excluded_fields
-                and (t, n) not in discrete]
+        return I2B2MetaData.scalars(
+            pfx=['', self.root], audit=self.audit,
+            field_parts=[self.path_parts(t, n) for (t, n, _) in scalar_fields],
+            codes=[n + ':' for (t, n, f) in scalar_fields],
+            names=[n for (t, n, f) in scalar_fields],
+            value_info=[self._value_info(field['DATA_TYPE'],
+                                         field['UNIT_OF_MEASURE'])
+                   for (_, _, field) in scalar_fields],
+            vizs=[self.mkviz(n) for (t, n, f) in scalar_fields],
+            tooltips=[field['DEFINITION'] for (t, n, field) in scalar_fields])
 
     @classmethod
     def mkviz(cls, name, c1='L'):
         '''Convert datatype, units from CDM norms to i2b2 norms.
 
         >>> CDM._value_info('TEXT(x)', 'TIME')
-        ('T', 'String')
+        ('T', 'String', 'TIME')
         >>> CDM._value_info('TEXT(x)', 'DATE')
-        ('D', 'String')
+        ('D', 'String', 'DATE')
         >>> CDM._value_info('NUMBER(8)', 'INCH')
-        ('N', 'Float')
+        ('N', 'Float', 'INCH')
 
         >>> CDM._value_info('OOPS', 'INCH')
         Traceback (most recent call last):
 
         datatype = 'Float' if is_num else 'String'
 
-        return coltype, datatype
+        return coltype, datatype, units
 
     @classmethod
     def fields(cls):
         col_names = row_text(cls.sheet.row(0))
         fields = [dict(zip(col_names, row_text(cls.sheet.row(ix))))
                   for ix in range(1, cls.sheet.nrows)]
-        return [f for f in fields
+        return [(f['TABLE_NAME'], f['FIELD_NAME'], f) for f in fields
                 if f['REPLICATED'] != 'YES']
 
     @classmethod
     def _has_values(cls):
-        return set((field['TABLE_NAME'], field['FIELD_NAME'])
-                   for field in cls.fields()
+        return set((t, n)
+                   for (t, n, field) in cls.fields()
                    if field['VALUE_DESCRIPTION'].strip())
 
     def discrete_terms(self):
-        return self._enum_terms(
-            field_ok=lambda t, n: (n not in self.excluded_fields
-                                   and (t, n) in self._has_values()),
-            pfx=['', self.root])
+        discrete = self._has_values()
+        discrete_fields = [(t, n, f) for (t, n, f) in self.fields()
+                           if n not in self.excluded_fields
+                           and (t, n) in discrete]
 
-    def _enum_terms(self, field_ok, pfx,
-                    leaf='L', folder='F',
-                    f_parts=lambda t, f: CDM.path_parts(t, f),
-                    v_parts=lambda v: ([v[k]
-                                        for k in ['table', 'field', 'code']]),
-                    applies=lambda t, f: []):
-        fields = self.fields()
-        field_parts = [(field['FIELD_NAME'],
-                        field['TABLE_NAME'],
-                        field['DEFINITION'])
-                       for field in fields]
-        values = [
-            dict(table=field['TABLE_NAME'],
-                 field=field['FIELD_NAME'],
-                 code=code,
-                 label=label)
-            for field in fields
-            for code, label in discrete_values(field['VALUE_DESCRIPTION'])]
+        value_fields = self._elaborate(discrete_fields)
 
-        folders = [I2B2MetaData.term(pfx=pfx,
-                                     audit=self.audit,
-                                     parts=f_parts(t, n),
-                                     name=n,
-                                     viz=self.mkviz(n, folder),
-                                     applies_to=applies(t, n),
-                                     hint=hint)
-                   for (n, t, hint) in field_parts
-                   if field_ok(t, n)]
+        value_terms = self._enum_terms(value_fields)
 
-        v_code = lambda v: ('%(label)s:' % v
-                            if v['field'] in self.terminology_fields
-                            else '%(field)s:%(code)s' % v)
-        value_terms = [
-            I2B2MetaData.term(pfx=pfx,
-                              audit=self.audit,
-                              parts=v_parts(value),
-                              code=v_code(value),
-                              applies_to=applies(value['table'],
-                                                 value['field']),
-                              name=value['label'],
-                              viz=self.mkviz(value['field'], leaf))
-            for value in values
-            if field_ok(value['table'], value['field'])]
+        folders = self._folders(discrete_fields)
 
         return folders + value_terms
 
+    @classmethod
+    def _elaborate(cls, named_fields):
+        return [
+            (t, n, f, code, label) for (t, n, f) in named_fields
+            for code, label in discrete_values(f['VALUE_DESCRIPTION'])]
+
+    def _folders(self, named_fields,
+                 path_parts=None, vizs=None, pfx=None, applies=None):
+        return I2B2MetaData.folders(
+            pfx=pfx or ['', self.root], audit=self.audit,
+            path_parts=path_parts or [
+                CDM.path_parts(t, n) for (t, n, f) in named_fields],
+            mapp=applies,
+            vizs=vizs or ['FAE' for _ in named_fields],
+            names=[n for (t, n, f) in named_fields],
+            tooltips=[f['DEFINITION'] for (t, n, f) in named_fields])
+
+    def _enum_terms(self, value_fields, pfx=None,
+                    leaf='L', path_parts=None, applies=None, codes=None):
+        return I2B2MetaData.discrete_terms(
+            pfx=pfx or ['', self.root], audit=self.audit,
+            path_parts=path_parts or [
+                [t, n, code]
+                for (t, n, f, code, l) in value_fields],
+            codes=codes or [
+                '%s:%s' % (n, code)
+                for (t, n, f, code, l) in value_fields],
+            applies=applies or [[] for _ in value_fields],
+            names=[label for (t, n, f, c, label) in value_fields],
+            vizs=[self.mkviz(n, leaf) for (t, n, f, c, l) in value_fields])
+
     def terminology_stubs(self):
-        return self._enum_terms(
+        ts_fields = [(t, n, f) for (t, n, f) in self.fields()
+                     if n in self.terminology_fields]
+
+        value_fields = self._elaborate(ts_fields)
+
+        value_terms = self._enum_terms(
+            value_fields,
             leaf='F',
-            field_ok=lambda t, n: n in self.terminology_fields,
-            pfx=['', self.root])
+            codes=['%s:' % label
+                   for (t, n, f, code, label) in value_fields])
+
+        folders = self._folders(ts_fields)
+
+        return folders + value_terms
 
     def modifiers(self):
         mods = self.modifier_fields
-        return self._enum_terms(
-            folder='D', leaf='R',
-            field_ok=lambda t, n: n in mods,  # i.e. in mods.keys()
-            applies=lambda t, f: ['', self.root] + mods[f],
-            f_parts=lambda t, n: [n],
-            v_parts=lambda v: ([v[k]
-                                for k in ['field', 'code']]),
-            pfx=['', self.root + '_MOD'])
+        mod_fields = [(t, n, f) for (t, n, f) in self.fields()
+                      if n in mods]  # i.e. in mods.keys()
+
+        value_fields = self._elaborate(mod_fields)
+
+        value_terms = self._enum_terms(
+            value_fields,
+            leaf='R',
+            pfx=['', self.root + '_MOD'],
+            path_parts=[[n, c] for (t, n, f, c, l) in value_fields],
+            applies=[['', self.root] + mods[n]
+                     for (t, n, f, c, l) in value_fields])
+
+        folders = self._folders(
+            mod_fields,
+            pfx=['', self.root + '_MOD'],
+            path_parts=[[n] for (t, n, f) in mod_fields],
+            vizs=['DAE' for _ in mod_fields],
+            applies=[['', self.root] + mods[n] for (t, n, f) in mod_fields])
+
+        return folders + value_terms
 
 
 def _nodups(l):
     @classmethod
     def term(cls, pfx, parts, name,
              audit,
-             code=None, viz='CAE', applies_to=[], hint=None,
+             code=None, viz='CAE', applies_to=[], tooltip=None,
              c_synonym_cd='N',
              c_metadataxml=None,
              c_operator='like',
              c_columndatatype='@',
              c_columnname='concept_path',
              c_tablename='concept_dimension',
-             c_facttablecolumn='concept_cd'):
+             c_facttablecolumn='concept_cd',
+             max_tooltip_len=850, encoding='utf-8'):
+        # TODO: move pfx to caller's responsibility
+        # TODO: use namedtuples rather than lists
+        # TODO: postpone serialization?
         hlevel, path = len(parts), '\\'.join(pfx + parts + [''])
         m_path = '\\'.join(applies_to + ['%']) if applies_to else '@'
-        tooltip = (hint or '')[:850].encode('utf-8')
+        tooltip = (tooltip or '')[:max_tooltip_len].encode(encoding)
         update_date, sourcesystem_cd = audit
         return [
             hlevel, path,
         return cls.term(parts=[], audit=audit, pfx=['', root], name=root)
 
     @classmethod
+    def folders(cls, pfx, audit, path_parts, names, tooltips, vizs,
+                mapp=None):
+        return [cls.term(pfx=pfx,
+                         audit=audit,
+                         parts=parts,
+                         applies_to=app,
+                         name=n,
+                         viz=viz,
+                         tooltip=tooltip)
+                for (parts, n, viz, tooltip, app)
+                in zip(path_parts, names, vizs, tooltips,
+                       mapp or [[] for _ in path_parts])]
+
+    @classmethod
+    def discrete_terms(cls, pfx, audit,
+                       path_parts, codes, applies, names, vizs):
+        return [
+            cls.term(pfx=pfx, audit=audit,
+                     parts=parts,
+                     code=code,
+                     applies_to=applies_to,
+                     name=name,
+                     viz=viz)
+            for (parts, code, applies_to, name, viz)
+            in zip(path_parts, codes, applies, names, vizs)]
+
+    @classmethod
+    def scalars(cls, pfx, audit, field_parts, codes,
+                names, value_info, vizs, tooltips):
+        return [cls.term(pfx=pfx, audit=audit,
+                         parts=parts, code=code, name=name,
+                         c_columndatatype=ct,
+                         c_metadataxml=cls.metadataxml(
+                             datatype=datatype,
+                             normalunits=units),
+                         viz=viz,
+                         tooltip=tooltip)
+                for (parts, code, name, (ct, datatype, units), viz, tooltip)
+                in zip(field_parts, codes, names, value_info, vizs, tooltips)]
+
+    @classmethod
     def modifier(cls, root, parts, name, code, viz, hint=None):
         return cls.term(
             root,