Snippets

Juan Martín Barrios Vargas Convertir GBIF a datos para SNIB

Created by Juan Martín Barrios Vargas last modified

Uso

./convert_gbif.py convert-from-GBIFfile --input-filename ~/Downloads/0068712-160910150852091/occurrence.txt --output-filename salida.csv

#!/usr/bin/env python
import fire
from collections import namedtuple
import csv


def _transform_fossil(dw_item):
    fossil_map = {
                'FOSSIL_SPECIMEN': 'SI',
                'PRESERVED_SPECIMEN': 'NO',
                'HUMAN_OBSERVATION': 'NO',
                'OBSERVATION': 'NO',
                'MACHINE_OBSERVATION': 'NO'
            }

    try:
        return fossil_map[dw_item.dw_basisOfRecord]
    except KeyError:
        return ''


def _transform_basisOfRecord(dw_item):
    basisOfRecord_map = {
                'PRESERVED_SPECIMEN': 'Colectado',
                'HUMAN_OBSERVATION': 'Observado',
                'OBSERVATION': 'Observado',
                'MACHINE_OBSERVATION': 'Observado'
            }

    try:
        return basisOfRecord_map[dw_item.dw_basisOfRecord]
    except KeyError:
        return 'NO DISPONIBLE'


class convertGBIF():

    _GEOPORTAL_SCHEMA = {
            'idejemplar': 'dw_gbifID',
            'longitud': 'dw_decimalLongitude',
            'latitud': 'dw_decimalLatitude',
            'paismapa': 'dw_countryCode',
            'estadomapa': 'dw_stateProvince',
            'altitudmapa': 'dw_elevation',
            'reinovalido': 'dw_kingdom',
            'phylumdivisionvalido': 'dw_phylum',
            'clasevalida': 'dw_class',
            'ordenvalido': 'dw_order',
            'familiavalida': 'dw_family',
            'generovalido': 'dw_genus',
            'especievalida': lambda x: (' '.join([x.dw_genus, x.dw_specificEpithet])).strip(),
            'categoriainfraespecievalida': 'dw_infraspecificEpithet',
            'especievalidabusqueda': lambda x: (' '.join([x.dw_genus, x.dw_specificEpithet])).strip(),
            'proyecto': 'dw_datasetKey',
            'urlejemplar': lambda x: 'http://www.gbif.org/occurrence/{s.dw_gbifID}'.format(s=x),
            'diacolecta': 'dw_day',
            'mescolecta': 'dw_month',
            'aniocolecta': 'dw_year',
            'ejemplarfosil': lambda x: _transform_fossil(x),
            'procedenciaejemplar': lambda x: _transform_basisOfRecord(x)
            }

    def _translate_gbif_geoportal(self, dw_item):
        dw_fields = []
        for k, v in self._GEOPORTAL_SCHEMA.items():
            if callable(v):
                dw_fields.append(v(dw_item))
            else:
                dw_fields.append(getattr(dw_item, v))

        return dict(zip(self._GEOPORTAL_SCHEMA, dw_fields))

    def convert_from_GBIFfile(self, input_filename, output_filename):
        """ Convert to csv from GBIF file """
        of = open(output_filename, 'w', newline='')
        geo_writer = csv.DictWriter(of, self._GEOPORTAL_SCHEMA, dialect='unix')
        geo_writer.writeheader()

        with open(input_filename, 'r') as f:
            dw_names = f.readline().strip('\n').split('\t')
            dw_names = ['dw_'+e for e in dw_names]
            dw_entry = namedtuple('DWCA', dw_names)

            for line in f:
                entry = f.readline().strip('\n').split('\t')
                entry = dw_entry(*entry)

                geo_entry = self._translate_gbif_geoportal(entry)

                geo_writer.writerow(geo_entry)

        of.close()


def main():
    fire.Fire(convertGBIF)


if __name__ == '__main__':
    main()

Comments (0)

HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.