Commits

Panagiotis Mavrogiorgos  committed dce9e49

Minor Changes in the API + clean-up

* removed "has_header_row". Attribute "number of header rows" is used instead.
* cleaned-up the validation of Column constraints.

  • Participants
  • Parent commits f63f327

Comments (0)

Files changed (4)

File csvmodels/base.py

 class RowModel(metaclass=RowModelMeta):
     """
     The configuration of **csvmodels** involves a combination of ``RowModel``
-    class objects and ``Column`` instances.
+    class objects and ``BaseColumn`` instances.
 
     More specifically, each model is a class object that uses as a Base
     ``csvmodels.RowModel`` and as class attributes instance's of
-    ``csvmodels.Column`` and its subclasses (e.g. ``StringColumn``,
+    ``csvmodels.BaseColumn`` and its subclasses (e.g. ``StringColumn``,
     ``IntegerColumn``, ``FloatColumn`` etc).
 
-    There is also a special attribute named ``_dialect``, being used to pass
-    additional configuration options, which must be a Mapping (e.g.
-    a ``dict``).
+    There is also a special attribute named ``_dialect``, that is being used
+    to pass additional configuration options. The ``_dialect`` must be a
+    Mapping (e.g.  a ``dict``).
 
     A simple example is the following one::
 
             setattr(self, column.name, value)
 
     def __repr__(self):
-        # One liner (aka hack). It calculates the class name as a string.
+        # One liner (aka hack). It returns the class name as a string.
         row_name = str(self.__class__).split(".")[-1][:-2]
 
         row_values = {name: getattr(self, name) for name in self._column_names}
     def __init__(self, row_cls, file):
         self.row_cls = row_cls
         self.csv_reader = csv.reader(file, **row_cls._dialect.csv_dialect)
-        self.skip_header_row = row_cls._dialect.has_header_row
+        self.skip_number_of_rows = row_cls._dialect.number_of_header_rows
 
     def __iter__(self):
         """
         linenumber and it is reset each time a new `Reader` instace is created.
 
         """
-        # Skip the first row if it's a header.
-        # Since we process a single line of the csv file we increment `_linenumber`
-        # by one. We also set `skip_header_row` to False in
-        if self.skip_header_row:
-            next(self.csv_reader)
-            self.skip_header_row = False
-            self.row_cls._linenumber += 1
+        # If there are header rows, skip them.
+        # For each line we process, we increment `_linenumber` by one.
+        # We also set `skip_header_row` to False in
+        if self.row_cls._linenumber == 0:
+            for i in range(self.skip_number_of_rows):
+                next(self.csv_reader)
+                self.row_cls._linenumber += 1
 
         csv_row = next(self.csv_reader)
         self.row_cls._linenumber += 1
     def writerows(self, rows):
         for row in rows:
             self.writerow(row)
-
-

File csvmodels/columns.py

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+_ERROR_MESSAGE = "<Column: {0:s}> - <Row: {1:d}> - <Value: {2:s}>"
+
 
 class ConstraintError(Exception):
     pass
 
 
-class Column:
+class BaseColumn(object):
     """
     An abstract class representing an individual column within a CSV file.  It
     serves as a base for attributes and methods that are common to all types of
-    columns. Subclasses of ``Column`` will define behavior for more specific
+    columns. Subclasses of ``BaseColumn`` will define behavior for more specific
     data types (e.g. ``StringColumn``, ``FloatColumn`` etc).
 
     :param bool required:
         python object.  (e.g. on ``Writer``). It defaults to ``None``.
 
     """
-    def __init__(self, required=True, default=None, format=None):
+    def __init__(self, required=True, default=None, none_value=None, format=None):
         self.required = required
         self.default = default
+        self.none_value = none_value
         self.format = format
 
     def attach_to_class(self, cls, name, dialect):
 
         dialect.add_column(self)
 
-    def check_value(self, value):
-        """
-        Returns value. If there is no input value (e.g. input is an empty
-        string) and there is a default value, it sets input equal to the
-        default value.
-
-        If a value is required and there is no default value, then, if there is
-        no input value it raises a ValueError.
-        """
-        if self.default and value == "":
-            value = self.default
-        elif self.required and value == "":
-            msg = "Column values cannot be emptly."
-            msg += "<Column: {0:s}> - <Row: {1:d}> - <Value: {2:s}>"
-            raise ValueError(msg.format(self.name, self.cls._linenumber, str(value)))
-
-        if not self.required:
-            if self.default:
-                value = self.default
-
-        return value
-
     def to_python(self, value):
         raise NotImplementedError
 
         raise NotImplementedError
 
 
-class StringColumn(Column):
+class StringColumn(BaseColumn):
     """
-    A string column within a CSV file. It extends Column and it support all the
-    arguments that ``Column`` supports plus the following ones:
+    A string column within a CSV file. It extends ``BaseColumn``. It supports
+    all the arguments that ``BaseColumn`` supports plus the following ones:
 
     :param int max_length:
         Maximum length constraint. Must be a positive integer.  Defaults to
         raised.
 
     """
-    def __init__(self, required=True, default=None, format="%s",
+    def __init__(self, required=True, default=None, none_value=None, format=None,
                  min_length=None, max_length=None):
-        super().__init__(required=required, default=default, format=format)
+        super().__init__(required=required, default=default,
+                         none_value=none_value, format=format)
 
         self.min_length = min_length
         self.max_length = max_length
 
-        self.validate_constraints()
+        self.are_constraints_valid()
 
-    def validate_constraints(self):
-        """ Constraint Validation. """
+    def are_constraints_valid(self):
+        """ Check if constraint are self-conflicting. """
         # Ensure that length constraints are positive numbers
         if (self.min_length is not None) and self.min_length <= 0:
             msg = "min_length constraint must be a positive integer."
             raise ConstraintError(msg)
 
         # Ensure that max_length > min_length
-        if self.min_length and self.max_length:
+        if (self.min_length is not None) and (self.max_length is not None):
             if self.min_length > self.max_length:
                 msg = ("Column '{0:s}' has conflicting constraints. "
                        "Minimum length exceeds maximum length.")
                 raise ConstraintError(msg.format(self.name))
 
+        return False
+
     def are_constraints_satisfied(self, value):
+        """ Check if column value satisfies the given constraints. """
         error = False
         if self.min_length and len(value) < self.min_length:
             error = True
             msg = "Column values must have less than %d characters. " % self.max_length
 
         if error:
-            msg += "<Column: {0:s}> - <Row: {1:d}> - <Value: {2:s}>"
+            msg += _ERROR_MESSAGE
             raise ValueError(msg.format(self.name, self.cls._linenumber, str(value)))
         else:
             return True
 
     def to_python(self, value):
-        value = self.check_value(value)
+        # If the value matches the specified ``none_value`` then we return
+        # the none_value
+        if self.none_value is not None:
+            if value == self.none_value:
+                return None
+
+        # If there is no input value (e.g. input is an empty string) and there
+        # is a default value, we set value equal to the default.
+        # If there is no input value and input is required, then a ``ValueError``
+        # is raised, unless a default value has been specified.
+        # If there is no input value, there is no default value and an input
+        # value is not required we return ``None``.
+        # Essentially, if there is a default value, then the ``required``
+        # constraint is ignored.
+        if value == "":
+            if self.default is not None:
+                value = self.default
+            elif self.required == True:
+                msg = "Column values cannot be empty." + _ERROR_MESSAGE
+                raise ValueError(msg.format(self.name, self.cls._linenumber, str(value)))
+            else:
+                return None
 
         if self.are_constraints_satisfied(value):
             return value
 
 
-class NumericColumn(Column):
+class BaseNumericColumn(BaseColumn):
     """
     An abstract numeric column within a CSV file. It extends
-    :py:class:`~csvmodels.columns.Column` and it serves as a Base for
+    :py:class:`~csvmodels.columns.BaseColumn` and it serves as a Base for
     :py:class:`~csvmodels.columns.IntegerColumn`,
     :py:class:`~csvmodels.columns.FloatColumn` and
     :py:class:`~csvmodels.columns.DecimalColumn`. It support all the arguments
         factor. It defaults to ``None``.
 
     """
-    def __init__(self, required=True, default=None, format=None,
+    def __init__(self, required=True, default=None, none_value=None, format=None,
                  positive=False, negative=False, non_zero=False,
                  maximum=None, minimum=None, multiplier=None):
 
-        super().__init__(required=required, default=default, format=format)
+        super().__init__(required=required, default=default,
+                         none_value=none_value, format=format)
 
         self.positive = positive
         self.negative = negative
         self.minimum = minimum
         self.multiplier = multiplier
 
-        self.validate_constraints()
+        self.are_constraints_valid()
 
-    def validate_constraints(self):
+    def are_constraints_valid(self):
         # try to convert constraint to a numeral
         if self.multiplier is not None:
             try:
                 raise ConstraintError("Conflicting constraints. "
                                       "Minimum value must be lower than maximum.")
 
+        return True
+
     def are_constraints_satisfied(self, value):
         error = False
         if self.positive and value < 0:
             msg = "Column values must not be lower than <%f>. " % self.minimum
 
         if error:
-            msg += "<Column: {0:s}> - <Row: {1:d}> - <Value: {2:s}>"
+            msg += _ERROR_MESSAGE
             raise ValueError(msg.format(self.name, self.cls._linenumber, str(value)))
         else:
             return True
 
 
-class IntegerColumn(NumericColumn):
+class IntegerColumn(BaseNumericColumn):
     """
     An integer column within a CSV file. It supports all the arguments that are
     supported by :py:class:`NumericColumn`.
     """
-    def __init__(self, required=True, default=None, format="%d",
+    def __init__(self, required=True, default=None, none_value=None, format="%d",
                  positive=False, negative=False, non_zero=False,
                  maximum=None, minimum=None, multiplier=None):
-        super().__init__(required=required, default=default, format=format,
+        super().__init__(required=required, default=default,
+                         none_value=none_value, format=format,
                          positive=positive, negative=negative, non_zero=non_zero,
                          maximum=maximum, minimum=minimum, multiplier=multiplier)
 
     def to_python(self, value):
-        value = self.check_value(value)
+        """
+        Convert column value to a Integer. If the conversion fails it raises a
+        ``ValueError``.
 
+        :param str value:
+            Column value
+
+        """
+        # If the value matches the specified ``none_value`` then we return
+        # the none_value
+        if self.none_value is not None:
+            if value == self.none_value:
+                return None
+
+        # If there is no input value (e.g. input is an empty string) and there
+        # is a default value, we set value equal to the default.
+        # If there is no input value and input is required, then a ``ValueError``
+        # is raised, unless a default value has been specified.
+        # If there is no input value, there is no default value and an input
+        # value is not required we return ``None``.
+        # Essentially, if there is a default value, then the ``required``
+        # constraint is ignored.
+        if value == "":
+            if self.default is not None:
+                value = self.default
+            elif self.required == True:
+                msg = "Column values cannot be empty." + _ERROR_MESSAGE
+                raise ValueError(msg.format(self.name, self.cls._linenumber, str(value)))
+            else:
+                return None
+
+        # Convert value to integer.
         try:
             value = int(value)
         except ValueError:
-            msg = ("Couldn't convert value to Integer. "
-                   "<Column: {0:s}> - <Row: {1:d}> - <Value: {2:s}>")
+            msg = "Couldn't convert value to Integer. " + _ERROR_MESSAGE
             raise ValueError(msg.format(self.name, self.cls._linenumber, str(value)))
 
         if self.are_constraints_satisfied(value):
             return value
 
 
-class FloatColumn(NumericColumn):
+class FloatColumn(BaseNumericColumn):
     """
     A float column within a CSV file. It supports all the arguments that are
     supported by :py:class:`NumericColumn`.
     """
-    def __init__(self, required=True, default=None, format="%.3f",
+    def __init__(self, required=True, default=None, none_value=None, format="%.3f",
                  positive=False, negative=False, non_zero=False,
                  maximum=None, minimum=None, multiplier=None):
-        super().__init__(required=required, default=default, format=format,
+        super().__init__(required=required, default=default,
+                         none_value=none_value, format=format,
                          positive=positive, negative=negative, non_zero=non_zero,
                          maximum=maximum, minimum=minimum, multiplier=multiplier)
 
     def to_python(self, value):
-        value = self.check_value(value)
+        """
+        Convert column value to a Float. If the conversion fails it raises a
+        ``ValueError``.
 
-        # Lots of files use comma as a decimal sign.
-        # I am changing this to period.
-        # Perhaps this is going to pose problems down the road,
-        # but I am not sure if there is a clean solution.
-        value = value.replace(",", ".")
+        :param str value:
+            Column value
 
+        """
+        # Some csv files use comma as a decimal sign. I am converting them to period.
+        # Perhaps this is going to pose problems down the road, but I am not
+        # sure if there is a clean solution. Maybe something with regex would
+        # work better.
+        try:
+            value = value.replace(",", ".")
+        except AttributeError:
+            pass
+
+        # If the value matches the specified ``none_value`` then we return
+        # the none_value
+        if self.none_value is not None:
+            if value == self.none_value:
+                return None
+
+        # If there is no input value (e.g. input is an empty string) and there
+        # is a default value, we set value equal to the default.
+        # If there is no input value and input is required, then a ``ValueError``
+        # is raised, unless a default value has been specified.
+        # If there is no input value, there is no default value and an input
+        # value is not required we return ``None``.
+        # Essentially, if there is a default value, then the ``required``
+        # constraint is ignored.
+        if value == "":
+            if self.default is not None:
+                value = self.default
+            elif self.required == True:
+                msg = "Column values cannot be empty." + _ERROR_MESSAGE
+                raise ValueError(msg.format(self.name, self.cls._linenumber, str(value)))
+            else:
+                return None
+
+        # Convert value to float.
         try:
             value = float(value)
         except ValueError:
-            msg = ("Couldn't convert value to Float. "
-                   "<Column: {0:s}> - <Row: {1:d}> - <Value: {2:s}>")
+            msg = "Couldn't convert value to Float. " + _ERROR_MESSAGE
             raise ValueError(msg.format(self.name, self.cls._linenumber, str(value)))
 
-        if self.multiplier:
+        # apply the multiplier
+        if self.multiplier is not None:
             value *= self.multiplier
 
         if self.are_constraints_satisfied(value):

File csvmodels/options.py

         header line or not. It defaults oto False.
 
     """
-    def __init__(self, has_header_row=False, **kwargs):
-        self.has_header_row = has_header_row
+    def __init__(self, number_of_header_rows=1, **kwargs):
+        self.number_of_header_rows = number_of_header_rows
         self.csv_dialect = kwargs
         self.columns = []
 

File tests_csvmodels/test_columns.py

 import unittest
 
 from csvmodels import RowModel
-from csvmodels import StringColumn, IntegerColumn, FloatColumn
-from csvmodels.columns import Column, NumericColumn, ConstraintError
+from csvmodels import StringColumn, IntegerColumn, FloatColumn, BooleanColumn
+from csvmodels.columns import BaseColumn, BaseNumericColumn, ConstraintError
+
+
+class RequiredAndDefaultMixin(object):
+    def test_values_are_required_and_there_is_input(self):
+        """ If required = True and value not empty, return value. """
+        self.column.required = True
+        return_value = self.column.to_python(self.valid_input)
+        self.assertEqual(return_value, self.valid_output)
+
+    def test_values_are_required_and_there_is_no_input(self):
+        """ If required = True and value is empty, raise ValueError. """
+        self.column.required = True
+        self.assertRaises(ValueError, self.column.to_python, "")
+
+    def test_values_are_not_required_and_there_is_input(self):
+        """ If required = False and value not empty, return value. """
+        self.column.required = False
+        return_value = self.column.to_python(self.valid_input)
+        self.assertEqual(return_value, self.valid_output)
+
+    def test_values_are_not_required_and_there_is_no_input(self):
+        """ If required = False and value is empty, return empty . """
+        self.column.required = False
+        return_value = self.column.to_python(self.empty_string)
+        self.assertEqual(return_value, None)
+
+    def test_default_is_specified_and_there_is_input(self):
+        """ If default exists and value not empty, return value. """
+        self.column.default = self.default
+        return_value = self.column.to_python(self.valid_input)
+        self.assertEqual(return_value, self.valid_output)
+
+    def test_default_is_specified_and_there_is_no_input(self):
+        """ If default exists and value is empty, return default. """
+        default_value = self.column.default = self.default
+        return_value = self.column.to_python(self.empty_string)
+        self.assertEqual(return_value, default_value)
+
+    def test_none_value_is_specified_and_the_value_matches_it(self):
+        """ If none_value != None and value == none_value return none_value. """
+        self.column.none_value = "-"
+        return_value = self.column.to_python("-")
+        self.assertEqual(return_value, None)
+
+    def test_to_python(self):
+        """ Convert input to a valid python object with the correct type. """
+        return_value = self.column.to_python(self.valid_input)
+        self.assertEqual(return_value, self.valid_output)
 
 
 class ColumnTestCase(unittest.TestCase):
         self.column.cls = RowModel()
         self.column.cls._linenumber = 152
 
+
+class StringColumnTests(ColumnTestCase, RequiredAndDefaultMixin):
     def setUp(self):
-        self.create_column_object(Column)
+        self.create_column_object(BaseColumn)
 
         self.empty_string = ""
         self.valid_input = "aaaa"
         self.valid_output = "aaaa"
         self.default = "bbbb"
 
-    def test_values_are_required_and_there_is_input(self):
-        """ If required = True and value not empty, return value. """
-        self.column.required = True
-        value = self.valid_input
-        return_value = self.column.check_value(value)
-        self.assertEqual(return_value, value)
-
-    def test_values_are_required_and_there_is_no_input(self):
-        """ If required = True and value is empty, raise ValueError. """
-        self.column.required = True
-        value = self.empty_string
-        self.assertRaises(ValueError, self.column.check_value, value)
-
-    def test_values_are_not_required_and_there_is_input(self):
-        """ If required = False and value not empty, return value. """
-        self.column.required = False
-        value = self.valid_input
-        return_value = self.column.check_value(value)
-        self.assertEqual(return_value, value)
-
-    def test_values_are_not_required_and_there_is_no_input(self):
-        """ If required = False and value is empty, return empty . """
-        self.column.required = False
-        value = self.empty_string
-        return_value = self.column.check_value(value)
-        self.assertEqual(return_value, value)
-
-    def test_default_is_specified_and_there_is_input(self):
-        """ If default exists and value not empty, return value. """
-        default = self.column.default = self.default
-        value = self.valid_input
-        return_value = self.column.check_value(value)
-        self.assertEqual(return_value, value)
-
-    def test_default_is_specified_and_there_is_no_input(self):
-        """ If default exists and value is empty, return default. """
-        default = self.column.default = self.default
-        value = self.empty_string
-        return_value = self.column.check_value(value)
-        self.assertEqual(return_value, default)
-
-    def test_to_python(self):
-        """ Convert input to a valid python object with the correct type. """
-        value = self.valid_input
-        output = self.valid_output
-        return_value = self.column.to_python(value)
-        self.assertEqual(return_value, output)
-
-
-class StringColumnTests(ColumnTestCase):
-    def setUp(self):
-        super().setUp()
-
         self.create_column_object(StringColumn)
 
     def test_non_positive_min_length(self):
         """ If min_length <= 0 raise a ConstraintError. """
         for min_length in (0, -1):
             self.column.min_length = min_length
-            self.assertRaises(ConstraintError, self.column.validate_constraints)
+            self.assertRaises(ConstraintError, self.column.are_constraints_valid)
 
     def test_non_positive_max_length(self):
         """ If max_length <= 0 raise a ConstraintError. """
         for max_length in (0, -1):
             self.column.max_length = max_length
-            self.assertRaises(ConstraintError, self.column.validate_constraints)
+            self.assertRaises(ConstraintError, self.column.are_constraints_valid)
 
     def test_min_length_greater_than_max_length(self):
         """ If min_length > max_length raise a ConstraintError. """
         self.column.min_length = 5
         self.column.max_length = 3
-        self.assertRaises(ConstraintError, self.column.validate_constraints)
+        self.assertRaises(ConstraintError, self.column.are_constraints_valid)
 
     def test_min_length_violation(self):
         """ If min_length is violated raise a ValueError. """
 
 class NumericColumnTests(ColumnTestCase):
     def setUp(self):
-        self.create_column_object(NumericColumn)
+        self.create_column_object(BaseNumericColumn)
 
         self.empty_string = ""
         self.valid_input = "1"
         """ If both positive and negative constriants are set, raise an Exception. """
         self.column.positive = True
         self.column.negative = True
-        self.assertRaises(ConstraintError, self.column.validate_constraints)
+        self.assertRaises(ConstraintError, self.column.are_constraints_valid)
 
     def test_set_minimum_greater_than_maximum(self):
         """ Setting minimum > maximum raises a ConstraintError. """
         self.column.minimum = 100
         self.column.maximum = -100
-        self.assertRaises(ConstraintError, self.column.validate_constraints)
+        self.assertRaises(ConstraintError, self.column.are_constraints_valid)
 
     def test_positive_constraint_violation(self):
         """ Values greater than maximum raise a ConstraintError. """
         self.column.minimum = 0
         self.assertRaises(ValueError, self.column.are_constraints_satisfied, -10)
 
-    def test_to_python_invalid_data(self):
-        """ Passing invalid data raises a ValueError """
-        self.assertRaises(ValueError, self.column.to_python, self.invalid_input)
 
-
-class IntegerColumnTests(NumericColumnTests):
+class IntegerColumnTests(NumericColumnTests, RequiredAndDefaultMixin):
     def setUp(self):
         super().setUp()
 
         self.create_column_object(IntegerColumn)
 
+    def test_to_python_invalid_data(self):
+        """ Passing invalid data raises a ValueError """
+        self.assertRaises(ValueError, self.column.to_python, self.invalid_input)
 
-class FloatColumnTests(NumericColumnTests):
+
+class FloatColumnTests(NumericColumnTests, RequiredAndDefaultMixin):
     def setUp(self):
         super().setUp()
 
         self.create_column_object(FloatColumn)
 
+    def test_to_python_invalid_data(self):
+        """ Passing invalid data raises a ValueError """
+        self.assertRaises(ValueError, self.column.to_python, self.invalid_input)
+
     def test_invalid_multiplier(self):
         """ When multiplier is non-numeric raise a ValueError. """
         self.column.multiplier = "asdf"
-        self.assertRaises(ValueError, self.column.validate_constraints)
+        self.assertRaises(ValueError, self.column.are_constraints_valid)
 
     def test_valid_multiplier(self):
         """ When a valid multiplier is provided return the multiplied value."""
         multiplier = self.column.multiplier = 0.001
-        value = 100
-        output = multiplier * value
-        return_value = self.column.to_python(value)
+        output = multiplier * float(self.valid_input)
+        return_value = self.column.to_python(self.valid_input)
         self.assertEqual(return_value, output)