1. okfn
  2. ckanext-importer

Commits

David Read  committed e7ef0a7

[all]: Copied files out of ckan repo. Quite broken - mothballed for the moment.

  • Participants
  • Branches default

Comments (0)

Files changed (12)

File .hgignore

View file
+syntax: glob
+# generic
+*.pyc
+*.swp
+*.swo
+.DS_Store
+ckanext_importer.egg-info/*
+sandbox/*
+*~
+
+# pylons
+development.ini*
+sstore/*
+data/*
+pylons_data/*
+
+# other
+distribute-0.6.10.tar.gz
+migrate/*
+build/*
+tmp/*
+*.ini
+.noseids

File ckanext/__init__.py

View file
+# this is a namespace package
+try:
+    import pkg_resources
+    pkg_resources.declare_namespace(__name__)
+except ImportError:
+    import pkgutil
+    __path__ = pkgutil.extend_path(__path__, __name__)

File ckanext/importer/__init__.py

View file
+# this is a namespace package
+try:
+    import pkg_resources
+    pkg_resources.declare_namespace(__name__)
+except ImportError:
+    import pkgutil
+    __path__ = pkgutil.extend_path(__path__, __name__)
+
+
+import logging
+
+from ckan.plugins.core import SingletonPlugin, implements
+from ckan.plugins.interfaces import IRoutes
+
+log = logging.getLogger(__name__)
+
+
+class Importer(SingletonPlugin):
+    """
+    Add the Importer into the Routing system
+    """
+
+    implements(IRoutes)
+
+    def after_map(self, map):
+        map.connect('/importer', controller='ckanext.importer.controller:ImporterController', action='index')
+
+    def before_map(self, map):
+        return map

File ckanext/importer/controller.py

View file
+import logging
+import os
+import uuid
+
+from pylons import request, response, session, config, tmpl_context as c
+from pylons.controllers.util import abort, redirect_to
+from ckan.lib.package_saver import PackageSaver, ValidationException
+import genshi
+
+from ckan.lib.base import *
+from licenses import LicenseList
+
+log = logging.getLogger(__name__)
+
+importer_dir = os.path.join(config['pylons.cache_dir'], 'importer')
+if not os.path.exists(importer_dir):
+    os.makedirs(importer_dir) 
+
+def importer_to_fs_dict(pkg_dict):
+    fs_dict = {}
+    prefix = 'Package--'
+    for key, value in pkg_dict.items():
+        fs_key = prefix + key
+        fs_dict[fs_key] = value
+    return fs_dict
+
+class ImporterController(BaseController):
+
+    def get_authorizer(self):
+        if not has_attr(self, '_authorizer'):
+            import ckan.authz
+            self._authorizer = ckan.authz.Authorizer()
+        return self._authorizer
+
+    authorizer = property(get_authorizer)
+
+    def index(self):
+        return render('importer/importer.html')
+
+    def preview(self):
+        if not c.user:
+            abort(401, gettext('Need to login before importing.'))
+        c.import_previews = []
+        import ckan.lib.spreadsheet_importer as importer
+        params = dict(request.params)
+        if not params.has_key('file'):
+            c.error = _('Need to specify a filename.')
+            return render('importer/importer.html')                
+        if not hasattr(params['file'], 'value'):
+            c.error = _('Did not receive file successfully.')
+            return render('importer/importer.html')
+        file_buf = params['file'].value
+        # save as temp file for when you do import
+        self._save_tempfile(file_buf)
+        if not file_buf:
+            c.error = _('File \'%s\' not found.') % params['file'].filename
+            return render('importer/importer.html')
+        try:
+            importer = importer.SpreadsheetPackageImporter(buf=file_buf)
+        except importer.ImportException, e:
+            c.error = _('Error importing file \'%s\' as Excel or CSV format: %s') % (params['file'].filename, e)
+            return render('importer/importer.html')
+        c.import_filename = params['file'].filename.lstrip(os.sep)
+        if params.has_key('log_message'):
+            c.log_message = params['log_message']
+        c.fs_list = []
+        c.import_previews = []
+        count = 0
+        all_errors = []
+        for fs in self._get_fs(importer):
+            count += 1
+            errors, warnings, existing_pkg = self._validate(fs)
+            if errors:
+                all_errors.append(errors)
+            if count < 5 or errors or warnings:
+                c.import_previews.append(self.package_render(fs, errors, warnings))
+            else:
+                c.pkgs_suppressed
+            c.fs_list.append(fs)
+        c.errors = len(all_errors)
+        c.num_pkgs = len(c.fs_list)
+        return render('importer/preview.html')
+
+    def do_import(self):
+        import ckan.lib.spreadsheet_importer as importer
+        file_buf = self._load_tempfile()
+        try:
+            importer = importer.SpreadsheetPackageImporter(buf=file_buf)
+        except importer.ImportException, e:
+            c.error = _('Error importing file \'%s\' as Excel or CSV format: %s') % (params['file'].filename, e)
+            return render('importer/importer.html')
+        if 'log_message' in request.params:
+            log_message = request.params.getone('log_message')
+        else:
+            log_message = ''
+        count = 0
+        for fs in self._get_fs(importer):
+            errors, warnings, existing_pkg = self._validate(fs)
+            if errors:
+                print "Errors: ", errors
+                abort(400, gettext('Errors remain - see preview.'))
+            try:
+                rev = model.repo.new_revision()
+                rev.author = c.user
+                rev.message = log_message
+                fs.sync()
+            except Exception, inst:
+                model.Session.rollback()
+                raise
+            
+            if not existing_pkg:
+                new_pkg = fs.model
+                user = model.User.by_name(c.user)
+                if not user:
+                    abort(401, gettext('Problem with user account.'))
+                admins = [user]
+                model.setup_default_user_roles(new_pkg, admins)
+
+            count += 1
+
+        model.Session.commit()
+        c.message = ungettext('Imported %i package.', 'Imported %i packages.', count) % count
+        return render('importer/result.html')
+
+    def _get_fs(self, importer):
+        for index, pkg_dict in enumerate(importer.pkg_dict()):
+            pkg = model.Package.by_name(pkg_dict['name'])
+            if pkg:
+                existing_dict = self._get_package_dict(pkg)
+                pkg_id = pkg.id
+            else:
+                existing_dict = self._get_package_dict()
+                pkg_id = ''
+                pkg = model.Package
+            fa_dict = self._edit_package_dict(existing_dict, pkg_dict, id=pkg_id)
+            fs = self._get_standard_package_fieldset()
+            fs = fs.bind(pkg, data=fa_dict)
+            model.Session.flush()
+            yield fs
+        
+
+    def _save_tempfile(self, buf):
+        tmp_filename = str(uuid.uuid4())
+        tmp_dir = importer_dir
+        tmp_filepath = os.path.join(tmp_dir, tmp_filename)
+        f_obj = open(tmp_filepath, 'wb')
+        f_obj.write(buf)
+        f_obj.close()
+        session['import_filename'] = tmp_filename
+        session.save()
+
+    def _load_tempfile(self):
+        tmp_filename = session['import_filename']
+        if not tmp_filename:
+            raise ImportException(_('Could not access import file any more.'))
+        tmp_dir = importer_dir
+        tmp_filepath = os.path.join(tmp_dir, tmp_filename)
+        f_obj = open(tmp_filepath, 'rb')
+        buf = f_obj.read()
+        f_obj.close() 
+        return buf
+
+    def _validate(self, fs):
+        errors = []
+        warnings = []
+        if not c.user:
+            abort(302, gettext('User is not logged in'))
+        else:
+            user = model.User.by_name(c.user)
+            if not user:
+                abort(302, gettext('Error with user account. Log out and log in again.'))
+        pkg = model.Package.by_name(fs.name.value)
+        if pkg:
+            warnings.append(_('Package %s already exists in database. Import will edit the fields.') % fs.name.value)
+            am_authz = self.authorizer.am_authorized(c, model.Action.EDIT, pkg)
+            if not am_authz:
+                 errors.append(_('User %r unauthorized to edit existing package %s') % (c.user, fs.name.value))
+        validation = fs.validate()
+        if not validation:
+            for field, err_list in fs.errors.items():
+                errors.append("%s:%s" % (field.name, ";".join(err_list)))
+        errors = ', '.join(errors)
+        warnings = ', '.join(errors)
+        return errors, warnings, pkg
+
+    def package_render(self, fs, errors, warnings):
+        try:
+            PackageSaver().render_preview(fs) # create a new package for now
+            preview = h.literal(render('package/read_core.html'))
+        except ValidationException, error:
+            c.error, fs = error.args
+            preview = h.literal('<li>Errors: %s</li>\n') % c.error
+        return preview
+

File ckanext/importer/tests/__init__.py

Empty file added.

File ckanext/importer/tests/samples/test_importer_bis_example.xls

Binary file added.

File ckanext/importer/tests/samples/test_importer_example.csv

View file
+"name","title","resource-0-url","resource-0-format","resource-0-description","tags"
+"wikipedia","Wikipedia","http://static.wikipedia.org/downloads/2008-06/en/wikipedia-en-html.tar.7z","html","In English","encyclopedia reference"
+"tviv","TV IV","http://tviv.org/Category:Grids","","","tv encyclopedia"

File ckanext/importer/tests/samples/test_importer_example.xls

Binary file added.

File ckanext/importer/tests/samples/test_importer_full.csv

View file
+"name","title","version","url","author","author_email","maintainer","maintainer_email","notes","state","license","tags","groups","ckan_url","relationships","genre","original media","resource-0-url","resource-0-format","resource-0-description","resource-0-hash","resource-1-url","resource-1-format","resource-1-description","resource-1-hash"
+"annakarenina","A Novel By Tolstoy","0.7a","http://www.annakarenina.com","","","","","Some test notes
+
+### A 3rd level heading
+
+**Some bolded text.**
+
+*Some italicized text.*
+
+Foreign characters:
+u with umlaut ü
+66-style quote “
+foreign word: thümb
+ 
+Needs escaping:
+left arrow <
+
+<http://ckan.net/>
+
+","active","OKD Compliant::Other (Open)","russian tolstoy","david roger","http://test.ckan.net/package/annakarenina","","romantic novel","book","http://www.annakarenina.com/download/x=1&y=2","plain text","Full text. Needs escaping: "" Umlaut: ü","abc123","http://www.annakarenina.com/index.json","json","Index of the novel","def456"
+"warandpeace","A Wonderful Story","","","","","","","","active","","russian","david","http://test.ckan.net/package/warandpeace","","","","","","","","","","",""

File ckanext/importer/tests/samples/test_importer_full.xls

Binary file added.

File ckanext/importer/tests/test_controller.py

View file
+import os
+
+from pylons import config
+
+from ckan.tests import *
+import ckan.model as model
+from ckan.tests.functional.base import FunctionalTestCase
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+SAMPLE_PATH = os.path.join(TEST_DIR, 'samples')
+EXAMPLE_TESTFILE_FILEPATH = os.path.join(SAMPLE_PATH, 'test_importer_example')
+FULL_TESTFILE_FILEPATH = os.path.join(SAMPLE_PATH, 'test_importer_full')
+XL_EXTENSION = '.xls'
+CSV_EXTENSION = '.csv'
+EXTENSIONS = [CSV_EXTENSION, XL_EXTENSION]
+
+DEFAULT_USER = 'annafan'
+
+IMPORTER_CONTROLLER = 'ckanext.importer.controller:ImporterController'
+
+class TestImporter(FunctionalTestCase):
+    @classmethod
+    def setup_class(self):
+        CreateTestData.create()
+        assert model.User.by_name(unicode(DEFAULT_USER))
+
+    @classmethod
+    def teardown_class(self):
+        model.repo.clean_db()
+
+    def teardown(self):
+        model.Session.remove()
+
+    def test_0_index(self):
+        import pdb; pdb.set_trace()
+        offset = url_for(controller=IMPORTER_CONTROLLER)
+        res = self.app.get(offset)
+        assert 'Importer' in res, res
+
+    def test_1_not_logged_in(self):
+        res = self._submit_file(EXAMPLE_TESTFILE_FILEPATH + XL_EXTENSION, username=None, status=302)
+
+    def test_1_not_logged_in_midway(self):
+        res = self._submit_file(EXAMPLE_TESTFILE_FILEPATH + XL_EXTENSION, status=200)
+        res_ = self._strip_res(res)
+        assert 'Import Preview' in res, res_
+        res = self._import(res, 'test', username=None, status=302)
+        pkg = model.Package.by_name(u'wikipedia')
+        assert not pkg
+
+    def test_2_import_example_testfile(self):
+        res = self._submit_file(EXAMPLE_TESTFILE_FILEPATH + XL_EXTENSION, status=200)
+        res_ = self._strip_res(res)
+        assert 'Import Preview' in res, res_
+        assert '2 packages read' in res, res_
+        assert 'wikipedia' in res_, res_
+        assert 'tviv' in res_, res_
+        res = self._import(res, 'test', status=200)
+        assert 'Imported 2 packages' in res, self.main_div(res)
+
+    # TODO get working: overwriting existing package
+    def _test_3_import_full_testfile(self):
+        res = self._submit_file(FULL_TESTFILE_FILEPATH + XL_EXTENSION, status=200)
+        res_ = self._strip_res(res)
+        assert 'Import Preview' in res, res_
+        assert '2 packages read' in res, res_
+        assert 'name: annakarenina' in res_, res_
+        assert 'name: warandpeace' in res_, res_
+        res = self._import(res, 'test', status=200)
+        assert 'Imported 2 packages' in res, self.main_div(res)
+
+    def _submit_file(self, filepath, username=DEFAULT_USER, status=None):
+        assert os.path.exists(filepath)
+        filebuf = open(filepath, 'rb').read()
+        offset = url_for(controller=IMPORTER_CONTROLLER, action='preview')
+        upload_file = ('file', filepath, filebuf)
+        extra_environ = {'REMOTE_USER':username} if username else {}
+        res = self.app.post(offset, upload_files=([upload_file]),
+                            extra_environ=extra_environ,
+                            status=status)
+        return res
+        
+    def _import(self, res, log_message, username=DEFAULT_USER, status=None):
+        form = res.forms['import-preview']
+        form['log_message'] = log_message
+        extra_environ = {'REMOTE_USER':username} if username else {}
+        res = form.submit('import', extra_environ=extra_environ,
+                          status=status)
+        if not status or status == 200:
+            assert 'Import Result' in res, self.main_div(res)
+        return res
+
+    def _strip_res(self, res):
+        return self.main_div(res).replace('<strong>', '').replace('</strong>', '')
+        

File setup.py

View file
+from setuptools import setup, find_packages
+import sys, os
+
+version = '0.1'
+
+setup(
+	name='ckanext-importer',
+	version=version,
+	description="UI for bulk metadata import",
+	long_description="""\
+	""",
+	classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
+	keywords='',
+        author='Open Knowledge Foundation',
+        author_email='info@okfn.org',
+	url='http://ckan.org/',
+	license='AGPL',
+	packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
+	namespace_packages=['ckanext', 'ckanext.importer'],
+	include_package_data=True,
+	zip_safe=False,
+	install_requires=[
+		# -*- Extra requirements: -*-
+	],
+	entry_points=\
+	"""
+        [ckan.plugins]
+        importer=ckanext.importer:Importer
+	""",
+)