Source

scrapy-vrk / src / vrk / pipelines.py

# This file is part of scrapy-vrk.
#
# scrapy-vrk is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# scrapy-vrk is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with scrapy-vrk. If not, see <http://www.gnu.org/licenses/>.

from scrapy.contrib.exporter import CsvItemExporter


class VrkPipeline(object):
    def __init__(self):
        self.files = {}

    def spider_closed(self, spider):
        for spider, exporters in self.files.items():
            for item, (exporter, f) in exporters.items():
                exporter.finish_exporting()
                f.close()
        self.files.pop(spider)

    def _exporter(self, item, spider):
        if spider.name not in self.files:
            self.files[spider.name] = {}
        name = item.__class__.__name__.lower()
        if name not in self.files[spider.name]:
            f = open('%s_%s.csv' % (spider.name, name), 'w+b')
            exporter = CsvItemExporter(f)
            exporter.start_exporting()
            self.files[spider.name][name] = (exporter, f)
        else:
            exporter, f = self.files[spider.name][name]
        return exporter

    def process_item(self, item, spider):
        self._exporter(item, spider).export_item(item)
        return item