Commits

AJ R committed 1d8d48b

simplified reportSpssDataDictionary method

Comments (0)

Files changed (4)

savReaderWriter/doc_tests/test_SavHeaderReader_report1.txt

 >>> with SavHeaderReader(savFileName) as header: # doctest: +ELLIPSIS
 ...     print(str(header))
 #ALIGNMENTS
+AGE2 -- right
 AGE3 -- right
-DATE_ -- left
-weightVar -- right
-aShortStringVar -- left
-aLongStringVar -- left
+Age -- left
 AvgIncome -- right
+DATE_ -- left
+ID -- left
+Income1 -- right
+Income2 -- right
+Income3 -- center
+MONTH_ -- right
 MaxIncome -- right
+QUARTER_ -- right
+Region -- left
 SEX -- right
 V1 -- right
 V2 -- right
 V3 -- right
-Region -- left
-QUARTER_ -- right
-Income1 -- right
-Income2 -- right
-Income3 -- center
-Age -- left
-ID -- left
 YEAR_ -- right
-MONTH_ -- right
+aLongStringVar -- left
+aShortStringVar -- left
 someDate -- right
-AGE2 -- right
+weightVar -- right
 #CASEWEIGHTVAR
 weightVar
 #COLUMNWIDTHS
+AGE2 -- 10
 AGE3 -- 10
-DATE_ -- 10
-weightVar -- 11
-aShortStringVar -- 17
-aLongStringVar -- 26
+Age -- 10
 AvgIncome -- 11
+DATE_ -- 10
+ID -- 10
+Income1 -- 14
+Income2 -- 14
+Income3 -- 15
+MONTH_ -- 8
 MaxIncome -- 11
+QUARTER_ -- 10
+Region -- 10
 SEX -- 10
 V1 -- 10
 V2 -- 10
 V3 -- 10
-Region -- 10
-QUARTER_ -- 10
-Income1 -- 14
-Income2 -- 14
-Income3 -- 15
-Age -- 10
-ID -- 10
 YEAR_ -- 10
-MONTH_ -- 8
+aLongStringVar -- 26
+aShortStringVar -- 17
 someDate -- 13
-AGE2 -- 10
+weightVar -- 11
 #FILEATTRIBUTES
-$VariableView2[13] -- @DerivedFrom
-$VariableView2[10] -- measure
-$VariableView2[11] -- role
 $VariableView2[01] -- name
-VersionNumber -- 1
-$VariableView2[03] -- width
-$VariableView2[09] -- alignment
 $VariableView2[02] -- type
-$VariableView2[08] -- columns
-$VariableView2[05] -- label
-$VariableView2[14] -- @Notes
+$VariableView2[03] -- width
 $VariableView2[04] -- decimals
+$VariableView2[05] -- label
+$VariableView2[06] -- values
 $VariableView2[07] -- missing
+$VariableView2[08] -- columns
+$VariableView2[09] -- alignment
+$VariableView2[10] -- measure
+$VariableView2[11] -- role
 $VariableView2[12] -- @Formula
-$VariableView2[06] -- values
+$VariableView2[13] -- @DerivedFrom
+$VariableView2[14] -- @Notes
+VersionNumber -- 1
 #FILELABEL
 This is a file label
 #FORMATS
+AGE2 -- F8.2
 AGE3 -- F8.2
-DATE_ -- A8
-weightVar -- F8.2
-aShortStringVar -- A1
-aLongStringVar -- A100
+Age -- F3
 AvgIncome -- F8.2
+DATE_ -- A8
+ID -- N6
+Income1 -- F8.2
+Income2 -- F8.2
+Income3 -- F8.2
+MONTH_ -- F2
 MaxIncome -- F8.2
+QUARTER_ -- F1
+Region -- F8.2
 SEX -- F8.2
 V1 -- F8.2
 V2 -- F8.2
 V3 -- F8.2
-Region -- F8.2
-QUARTER_ -- F1
-Income1 -- F8.2
-Income2 -- F8.2
-Income3 -- F8.2
-Age -- F3
-ID -- N6
 YEAR_ -- F8
-MONTH_ -- F2
+aLongStringVar -- A100
+aShortStringVar -- A1
 someDate -- ADATE40
-AGE2 -- F8.2
+weightVar -- F8.2
 #MEASURELEVELS
+AGE2 -- ratio
 AGE3 -- ratio
-DATE_ -- nominal
-weightVar -- nominal
-aShortStringVar -- nominal
-aLongStringVar -- nominal
+Age -- ratio
 AvgIncome -- ratio
+DATE_ -- nominal
+ID -- nominal
+Income1 -- ratio
+Income2 -- ratio
+Income3 -- ratio
+MONTH_ -- ordinal
 MaxIncome -- ratio
+QUARTER_ -- ordinal
+Region -- nominal
 SEX -- nominal
 V1 -- nominal
 V2 -- nominal
 V3 -- nominal
-Region -- nominal
-QUARTER_ -- ordinal
-Income1 -- ratio
-Income2 -- ratio
-Income3 -- ratio
-Age -- ratio
-ID -- nominal
 YEAR_ -- ordinal
-MONTH_ -- ordinal
+aLongStringVar -- nominal
+aShortStringVar -- nominal
 someDate -- ratio
-AGE2 -- ratio
+weightVar -- nominal
 #MISSINGVALUES
-aShortStringVar: values -- x, y
-Income1: lower -- -1.797...e+308
+Age: lower -- 0.0
+Age: upper -- 18.0
+Income1: lower -- -1.797...+308
 Income1: upper -- -1.0
-Income2: lower -- -1.797...e+308
+Income2: lower -- -1.797...+308
 Income2: upper -- -1.0
 Income2: value -- 999.0
 Income3: values -- 999.0, 888.0, 777.0
-Age: lower -- 0.0
-Age: upper -- 18.0
+aShortStringVar: values -- x, y
 #MULTRESPDEFS
 V: countedValue -- 1
 V: label -- 
 V: setType -- D
-V: varNames -- v1, v2, v3
+V: varNames -- V1, V2, V3
 ages: label -- the ages
 ages: setType -- C
-ages: varNames -- age, age2, age3
+ages: varNames -- Age, AGE2, AGE3
 incomes: label -- three kinds of income
 incomes: setType -- C
-incomes: varNames -- income1, income2, income3, age, age2, age3
+incomes: varNames -- Income1, Income2, Income3, Age, AGE2, AGE3
 #VALUELABELS
 Age: 27.0 -- 27 y.o. 
 Age: 34.0 -- 34 y.o.
 MaxIncome: DerivedFrom[3] -- Income3
 MaxIncome: Formula -- max(Income1, Income2, Income3)
 #VARLABELS
+AGE2 -- 
+AGE3 -- 
+Age -- How old are you?
+AvgIncome -- 
 DATE_ -- Date.  Format:  "MMM YYYY"              
-aShortStringVar -- Some mysterious short stringVar
-aLongStringVar -- Some mysterious long stringVar
-Region -- What region do you live
+ID -- 
+Income1 -- 
+Income2 -- 
+Income3 -- 
+MONTH_ -- MONTH, period 12
+MaxIncome -- 
 QUARTER_ -- QUARTER, period 4
-Age -- How old are you?
+Region -- What region do you live
+SEX -- 
+V1 -- 
+V2 -- 
+V3 -- 
 YEAR_ -- YEAR, not periodic
-MONTH_ -- MONTH, period 12
+aLongStringVar -- Some mysterious long stringVar
+aShortStringVar -- Some mysterious short stringVar
+someDate -- 
+weightVar -- 
 #VARNAMES
 ID
 Age
 MONTH_
 DATE_
 #VARROLES
+AGE2 -- input
 AGE3 -- input
-DATE_ -- input
-weightVar -- input
-aShortStringVar -- input
-aLongStringVar -- input
+Age -- input
 AvgIncome -- input
+DATE_ -- input
+ID -- input
+Income1 -- target
+Income2 -- target
+Income3 -- target
+MONTH_ -- input
 MaxIncome -- input
+QUARTER_ -- input
+Region -- partition
 SEX -- input
 V1 -- input
 V2 -- input
 V3 -- input
-Region -- partition
-QUARTER_ -- input
-Income1 -- target
-Income2 -- target
-Income3 -- target
-Age -- input
-ID -- input
 YEAR_ -- input
-MONTH_ -- input
+aLongStringVar -- input
+aShortStringVar -- input
 someDate -- input
-AGE2 -- input
+weightVar -- input
 #VARSETS
 #VARTYPES
+AGE2 -- 0
 AGE3 -- 0
-DATE_ -- 8
-weightVar -- 0
-aShortStringVar -- 1
-aLongStringVar -- 100
+Age -- 0
 AvgIncome -- 0
+DATE_ -- 8
+ID -- 0
+Income1 -- 0
+Income2 -- 0
+Income3 -- 0
+MONTH_ -- 0
 MaxIncome -- 0
+QUARTER_ -- 0
+Region -- 0
 SEX -- 0
 V1 -- 0
 V2 -- 0
 V3 -- 0
-Region -- 0
-QUARTER_ -- 0
-Income1 -- 0
-Income2 -- 0
-Income3 -- 0
-Age -- 0
-ID -- 0
 YEAR_ -- 0
-MONTH_ -- 0
+aLongStringVar -- 100
+aShortStringVar -- 1
 someDate -- 0
-AGE2 -- 0
+weightVar -- 0

savReaderWriter/doc_tests/test_SavHeaderReader_report2.txt

-##############################################################################
-## Print a report of the header information
-##############################################################################
-
->>> from __future__ import print_function
->>> from savReaderWriter import *
-
->>> savFileName = "../savReaderWriter/test_data/Employee data.sav"
->>> with SavHeaderReader(savFileName) as header: # doctest: +ELLIPSIS
-...     print(str(header))
-#ALIGNMENTS
-salary -- right
-jobcat -- right
-bdate -- right
-minority -- right
-prevexp -- right
-gender -- left
-salbegin -- right
-jobtime -- right
-educ -- right
-id -- right
-#CASEWEIGHTVAR
-#COLUMNWIDTHS
-salary -- 8
-jobcat -- 8
-bdate -- 13
-minority -- 8
-prevexp -- 8
-gender -- 1
-salbegin -- 8
-jobtime -- 8
-educ -- 8
-id -- 8
-#FILEATTRIBUTES
-#FILELABEL
-05.00.00
-#FORMATS
-salary -- DOLLAR8
-jobcat -- F1
-bdate -- ADATE10
-minority -- F1
-prevexp -- F6
-gender -- A1
-salbegin -- DOLLAR8
-jobtime -- F2
-educ -- F2
-id -- F4
-#MEASURELEVELS
-salary -- ratio
-jobcat -- ordinal
-bdate -- ratio
-minority -- ordinal
-prevexp -- ratio
-gender -- nominal
-salbegin -- ratio
-jobtime -- ratio
-educ -- ordinal
-id -- ratio
-#MISSINGVALUES
-salary: values -- 0.0
-jobcat: values -- 0.0
-minority: values -- 9.0
-salbegin: values -- 0.0
-jobtime: values -- 0.0
-educ: values -- 0.0
-#MULTRESPDEFS
-#VALUELABELS
-salary: 0.0 -- missing
-jobcat: 0.0 -- 0 (Missing)
-jobcat: 1.0 -- Clerical
-jobcat: 2.0 -- Custodial
-jobcat: 3.0 -- Manager
-salbegin: 0.0 -- missing
-minority: 0.0 -- No
-minority: 1.0 -- Yes
-minority: 9.0 -- 9 (Missing)
-prevexp: 0.0 -- missing
-gender: f -- Female
-gender: m -- Male
-jobtime: 0.0 -- missing
-educ: 0.0 -- 0 (Missing)
-educ: 8.0 -- 8
-educ: 12.0 -- 12
-educ: 14.0 -- 14
-educ: 15.0 -- 15
-educ: 16.0 -- 16
-educ: 17.0 -- 17
-educ: 18.0 -- 18
-educ: 19.0 -- 19
-educ: 20.0 -- 20
-educ: 21.0 -- 21
-#VARATTRIBUTES
-#VARLABELS
-salary -- Current Salary
-jobcat -- Employment Category
-bdate -- Date of Birth
-minority -- Minority Classification
-prevexp -- Previous Experience (months)
-gender -- Gender
-salbegin -- Beginning Salary
-jobtime -- Months since Hire
-educ -- Educational Level (years)
-id -- Employee Code
-#VARNAMES
-id
-gender
-bdate
-educ
-jobcat
-salary
-salbegin
-jobtime
-prevexp
-minority
-#VARROLES
-salary -- input
-jobcat -- input
-bdate -- input
-minority -- input
-prevexp -- input
-gender -- input
-salbegin -- input
-jobtime -- input
-educ -- input
-id -- input
-#VARSETS
-SALARY -- salbegin, salary
-DEMOGR -- gender, minority, educ
-#VARTYPES
-salary -- 0
-jobcat -- 0
-bdate -- 0
-minority -- 0
-prevexp -- 0
-gender -- 1
-salbegin -- 0
-jobtime -- 0
-educ -- 0
-id -- 0

savReaderWriter/doc_tests/test_SavHeaderReader_report_utf8.txt

-##############################################################################
-## Print a report of the header information (ioUtf8=True)
-##############################################################################
-
->>> # -*- coding: utf-8 -*-
->>> import pprint
->>> from savReaderWriter import *
-
-# Unicode and doctest is a little awkward. This could be done in a better way, I guess
->>> savFileName = "../savReaderWriter/test_data/greetings.sav"
->>> with SavHeaderReader(savFileName, ioUtf8=True) as header:
-...     pprint.pprint( repr(str(header)).split("\n") )  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-['u"File \'greetings.sav\' built using SavReaderWriter.py version ... (... ... ... ...:...:... ...)\\n#ALIGNMENTS\\nline -- left\\ngreeting -- left\\nBondjo\\xfb -- left\\n#CASEWEIGHTVAR\\n#COLUMNWIDTHS\\nline -- 8\\ngreeting -- 50\\nBondjo\\xfb -- 20\\n#FILEATTRIBUTES\\n#FILELABEL\\nFile created by user \'Administrator\' at Thu Jan 17 16:35:14 2013\\n#FORMATS\\nline -- F8.2\\ngreeting -- A50\\nBondjo\\xfb -- A20\\n#MEASURELEVELS\\nline -- unknown\\ngreeting -- unknown\\nBondjo\\xfb -- unknown\\n#MISSINGVALUES\\nline: lower -- 0.0\\nline: upper -- 9.0\\nBondjo\\xfb: values -- \\xa1hola! \\n#MULTRESPDEFS\\n#VALUELABELS\\nBondjo\\xfb: Thai -- \\u0e2a\\u0e27\\u0e31\\u0e2a\\u0e14\\u0e35\\n#VARATTRIBUTES\\n#VARLABELS\\ngreeting -- \\u0627\\u0644\\u0633\\u0644\\u0627\\u0645 \\u0639\\u0644\\u064a\\u0643\\u0645\\n#VARNAMES\\nline\\nBondjo\\xfb\\ngreeting\\n#VARROLES\\nline -- input\\ngreeting -- input\\nBondjo\\xfb -- input\\n#VARSETS\\n#VARTYPES\\nline -- 0\\ngreeting -- 50\\nBondjo\\xfb -- 20"']

savReaderWriter/savHeaderReader.py

             return Meta(*metadata.values())
         return metadata
 
+    def __getEntry(self, varName, k, v, enc):
+        """Helper function for reportSpssDataDictionary"""
+        try:
+            k = k if self.ioUtf8 else k.decode(enc).strip()
+        except AttributeError:
+            pass
+        try:
+           v = list(v) if isinstance(v, map) else v
+        except TypeError:
+           pass  # python 2
+        try:
+            v =  v if self.ioUtf8 else v.decode(enc)
+        except AttributeError:
+            v = ", ".join(map(str, v)) if isinstance(v, list) else v
+        try:
+            v = ", ".join(eval(str(v)))  # ??
+        except:
+            pass
+        return "%s: %s -- %s" % (varName,k, v)
+
     def reportSpssDataDictionary(self, dataDict):
         """ This function reports information from the Spss dictionary
         of the active Spss dataset. The parameter 'dataDict' is the return
         value of dataDictionary()"""
         # Yeah I know: what a mess! ;-)
-        report = []
-        enc = self.fileEncoding
+        report, enc = [], self.fileEncoding
         for kwd, allValues in sorted(dataDict.items()):
             report.append("#" + kwd.upper())
             if hasattr(allValues, "items"):
                 for varName, values in sorted(allValues.items()):
                     varName =  varName if self.ioUtf8 else varName.decode(enc)
                     if hasattr(values, "items"):
-                        isList = kwd in ("missingValues", "multRespDefs")
                         for k, v in sorted(values.items()):
-                            if isList and isinstance(v, (list, tuple)):
-                                # missings (discrete str values), multRespDefs
-                                vStr = [unicode(item) for item in v]
-                                k = k if self.ioUtf8 else k.decode(enc)
-                                entry = "%s: %s -- %s" 
-                                args = (varName, k, ", ".join(vStr))
-                                report.append(entry % args)
-                            else:
-                                # value label, missings (discrete num values)
-                                try:            # values vallabels str vars
-                                    k = k if self.ioUtf8 else k.decode(enc).strip()
-                                except AttributeError:   
-                                    k = str(k)  # values vallabels num vars
-                                v =  v if self.ioUtf8 else v.decode(enc)
-                                try:
-                                   v = list(v) if isinstance(v, map) else v
-                                except TypeError:
-                                   pass  # python 2
-                                v = ", ".join(v) if isinstance(v, list) else v
-                                report.append("%s: %s -- %s" % (varName, k, v))
+                            report.append(self.__getEntry(varName, k, v, enc))
                     else:
                         # varsets
                         if isinstance(values, list):
                     if isinstance(varName, bytes):
                         varName = varName.decode(enc)
                     report.append(varName)
-
-        #import pprint; pprint.pprint(report)
         return os.linesep.join(report)