Commits

AJ R committed 66a6fdc

SavReader file report now works for Python 2 and 3, added unit_tests/test_SavReader_file_report.py

Comments (0)

Files changed (5)

savReaderWriter/doc_tests/test_SavReader_cmp.txt

-##############################################################################
-## Read a file and use operators (__cmp__)
-##############################################################################
-
->>> import sys
->>> sys.stdout.write('<skipline> '); from savReaderWriter import * # doctest: +ELLIPSIS
-<skipline> ...
-
-## ... get number of cases by using __len__
->>> savFileName = "../savReaderWriter/test_data/Employee data.sav"
->>> try:    # doctest: +ELLIPSIS
-...     reader = SavReader(savFileName)        
-...     print reader == reader, reader > reader, reader < reader
-...     print reader == 474, reader > 474, reader < 474
-... finally:
-...     reader.close()
-...
-True False False
-True False False
-

savReaderWriter/doc_tests/test_SavReader_file_report.txt

-##############################################################################
-## Read a file and get some basic file info (reports)
-##############################################################################
-
->>> import pprint
->>> import sys
->>> sys.stdout.write('<skipline> '); from savReaderWriter import * # doctest: +ELLIPSIS
-<skipline> ...
-
-## ... Get some basic file info
->>> savFileName = "../savReaderWriter/test_data/Employee data.sav"
->>> try:  # doctest: +ELLIPSIS
-...     reader = SavReader(savFileName, ioLocale='en_US.UTF-8')      
-...     print str(reader)
-...     print unicode(reader)
-... finally:
-...     reader.close()
-...
-**********************************************************************
-*File '../savReaderWriter/test_data/Employee data.sav' (24.32 kB) has 10 columns (variables) and 474 rows (4740 values)
-*The file was created with SPSS version: MS Windows Release 11.0 spssio32.dll (11.1.0)
-*The interface locale is: 'en_US.UTF-8'
-*The interface mode is: Codepage (UTF-8)
-*The file encoding is: 'utf_8' (Code page: 65001)
-*File encoding and the interface encoding are compatible: Yes
-*Your computer's locale is: '...' (Code page: ...)
-*The file contains the following variables:
-  01. id (F4 - numerical)
-  02. gender (A1 - string)
-  03. bdate (ADATE10 - numerical)
-  04. educ (F2 - numerical)
-  05. jobcat (F1 - numerical)
-  06. salary (DOLLAR8 - numerical)
-  07. salbegin (DOLLAR8 - numerical)
-  08. jobtime (F2 - numerical)
-  09. prevexp (F6 - numerical)
-  10. minority (F1 - numerical)
-**********************************************************************
-<BLANKLINE>
-**********************************************************************
-*File '../savReaderWriter/test_data/Employee data.sav' (24.32 kB) has 10 columns (variables) and 474 rows (4740 values)
-*The file was created with SPSS version: MS Windows Release 11.0 spssio32.dll (11.1.0)
-*The interface locale is: 'en_US.UTF-8'
-*The interface mode is: Codepage (UTF-8)
-*The file encoding is: 'utf_8' (Code page: 65001)
-*File encoding and the interface encoding are compatible: Yes
-*Your computer's locale is: '...' (Code page: ...)
-*The file contains the following variables:
-  01. id (F4 - numerical)
-  02. gender (A1 - string)
-  03. bdate (ADATE10 - numerical)
-  04. educ (F2 - numerical)
-  05. jobcat (F1 - numerical)
-  06. salary (DOLLAR8 - numerical)
-  07. salbegin (DOLLAR8 - numerical)
-  08. jobtime (F2 - numerical)
-  09. prevexp (F6 - numerical)
-  10. minority (F1 - numerical)
-**********************************************************************
-<BLANKLINE>
-

savReaderWriter/generic.py

             retcode = func(c_int(self.fh), byref(pszEncoding))
             checkErrsWarns("Problem getting file encoding", retcode)
             iana_codes = encodings.aliases.aliases
-            rawEncoding = pszEncoding.value.lower()
+            rawEncoding = pszEncoding.value.lower().decode("utf-8")
             if rawEncoding.replace("-", "") in iana_codes:
                 iana_code = rawEncoding.replace("-", "")
             else:

savReaderWriter/py3k.py

                                 " and __bytes__ methods, respectively, in "
                                 "Python 3")
 
+# implement rich comparison operators in Python 3 (SavReader)
 if isPy3k:
     def rich_comparison(cls):
         assert hasattr(cls, "__cmp__")

savReaderWriter/savReader.py

     def __str__(self):
         """This function returns a conscise file report of the spss data file
         For example str(SavReader(savFileName))"""
-        return self.__unicode__.encode(self.fileEncoding)
+        return self.__unicode__().encode(self.fileEncoding)
 
     def __unicode__(self):
         """This function returns a conscise file report of the spss data file,
         For example unicode(SavReader(savFileName))"""
-        self.fileReport = self.getFileReport()
-        return self.fileReport
+        return self.getFileReport()
 
     @property
     def shape(self):
 
     def getFileReport(self):
         """ This function prints a report about basic file characteristics """
-        bytes = os.path.getsize(self.savFileName)
-        kb = float(bytes) / 2**10
-        mb = float(bytes) / 2**20
+        filesize = os.path.getsize(self.savFileName)
+        kb = float(filesize) / 2**10
+        mb = float(filesize) / 2**20
         (fileSize, label) = (mb, "MB") if mb > 1 else (kb, "kB")
-        systemString = self.systemString
+        systemString = self.systemString.decode(self.fileEncoding)
         spssVersion = ".".join(map(str, self.spssVersion))
         lang, cp = locale.getlocale()
         intEnc = "Utf-8/Unicode" if self.ioUtf8 else "Codepage (%s)" % cp
         line = "  %%0%sd. %%s (%%s - %%s)" % len(str(len(self.varNames) + 1))
         for cnt, varName in enumerate(self.varNames):
             lbl = "string" if self.varTypes[varName] > 0 else "numerical"
-            format_ = self.formats[varName]
+            format_ = self.formats[varName].decode(self.fileEncoding)
+            varName = varName.decode(self.fileEncoding) 
             varlist.append(line % (cnt + 1, varName, format_, lbl))
         info = {"savFileName": self.savFileName,
                 "fileSize": fileSize,
                 "nCols": len(self.varNames),
                 "nValues": self.nCases * len(self.varNames),
                 "spssVersion": "%s (%s)" % (systemString, spssVersion),
-                "ioLocale": self.ioLocale,
+                "ioLocale": self.ioLocale.decode(self.fileEncoding),
                 "ioUtf8": intEnc,
                 "fileEncoding": self.fileEncoding,
                 "fileCodePage": self.fileCodePage,
                 "sep": os.linesep,
                 "asterisks": 70 * "*"}
         report = ("%(asterisks)s%(sep)s" +
-                  "*File %(savFileName)r (%(fileSize)3.2f %(label)s) has " +
+                  "*File '%(savFileName)s' (%(fileSize)3.2f %(label)s) has " +
                   "%(nCols)s columns (variables) and %(nCases)s rows " +
                   "(%(nValues)s values)%(sep)s" +
                   "*The file was created with SPSS version: %(spssVersion)s%" +
                   "(sep)s" +
-                  "*The interface locale is: %(ioLocale)r%(sep)s" +
+                  "*The interface locale is: '%(ioLocale)s'%(sep)s" +
                   "*The interface mode is: %(ioUtf8)s%(sep)s" +
-                  "*The file encoding is: %(fileEncoding)r (Code page: " +
+                  "*The file encoding is: '%(fileEncoding)s' (Code page: " +
                   "%(fileCodePage)s)%(sep)s" +
                   "*File encoding and the interface encoding are compatible:" +
                   " %(isCompatible)s%(sep)s" +
-                  "*Your computer's locale is: %(local_language)r (Code " +
+                  "*Your computer's locale is: '%(local_language)s' (Code " +
                   "page: %(local_encoding)s)%(sep)s" +
                   "*The file contains the following variables:%(sep)s" +
-                  "%(varlist)s%(sep)s%(asterisks)s%(sep)s")
-        return report % info
+                  "%(varlist)s%(sep)s%(asterisks)s%(sep)s") % info
+        if hasattr(report, "decode"):
+            report = report.decode(self.fileEncoding)
+        return report
 
     def getHeader(self, selectVars):
         """This function returns the variable names, or a selection thereof