Created by
Juan Martín Barrios Vargas
last modified
./convert_gbif.py convert-from-GBIFfile --input-filename ~/Downloads/0068712-160910150852091/occurrence.txt --output-filename salida.csv
| #!/usr/bin/env python
import fire
from collections import namedtuple
import csv
def _transform_fossil(dw_item):
fossil_map = {
'FOSSIL_SPECIMEN': 'SI',
'PRESERVED_SPECIMEN': 'NO',
'HUMAN_OBSERVATION': 'NO',
'OBSERVATION': 'NO',
'MACHINE_OBSERVATION': 'NO'
}
try:
return fossil_map[dw_item.dw_basisOfRecord]
except KeyError:
return ''
def _transform_basisOfRecord(dw_item):
basisOfRecord_map = {
'PRESERVED_SPECIMEN': 'Colectado',
'HUMAN_OBSERVATION': 'Observado',
'OBSERVATION': 'Observado',
'MACHINE_OBSERVATION': 'Observado'
}
try:
return basisOfRecord_map[dw_item.dw_basisOfRecord]
except KeyError:
return 'NO DISPONIBLE'
class convertGBIF():
_GEOPORTAL_SCHEMA = {
'idejemplar': 'dw_gbifID',
'longitud': 'dw_decimalLongitude',
'latitud': 'dw_decimalLatitude',
'paismapa': 'dw_countryCode',
'estadomapa': 'dw_stateProvince',
'altitudmapa': 'dw_elevation',
'reinovalido': 'dw_kingdom',
'phylumdivisionvalido': 'dw_phylum',
'clasevalida': 'dw_class',
'ordenvalido': 'dw_order',
'familiavalida': 'dw_family',
'generovalido': 'dw_genus',
'especievalida': lambda x: (' '.join([x.dw_genus, x.dw_specificEpithet])).strip(),
'categoriainfraespecievalida': 'dw_infraspecificEpithet',
'especievalidabusqueda': lambda x: (' '.join([x.dw_genus, x.dw_specificEpithet])).strip(),
'proyecto': 'dw_datasetKey',
'urlejemplar': lambda x: 'http://www.gbif.org/occurrence/{s.dw_gbifID}'.format(s=x),
'diacolecta': 'dw_day',
'mescolecta': 'dw_month',
'aniocolecta': 'dw_year',
'ejemplarfosil': lambda x: _transform_fossil(x),
'procedenciaejemplar': lambda x: _transform_basisOfRecord(x)
}
def _translate_gbif_geoportal(self, dw_item):
dw_fields = []
for k, v in self._GEOPORTAL_SCHEMA.items():
if callable(v):
dw_fields.append(v(dw_item))
else:
dw_fields.append(getattr(dw_item, v))
return dict(zip(self._GEOPORTAL_SCHEMA, dw_fields))
def convert_from_GBIFfile(self, input_filename, output_filename):
""" Convert to csv from GBIF file """
of = open(output_filename, 'w', newline='')
geo_writer = csv.DictWriter(of, self._GEOPORTAL_SCHEMA, dialect='unix')
geo_writer.writeheader()
with open(input_filename, 'r') as f:
dw_names = f.readline().strip('\n').split('\t')
dw_names = ['dw_'+e for e in dw_names]
dw_entry = namedtuple('DWCA', dw_names)
for line in f:
entry = f.readline().strip('\n').split('\t')
entry = dw_entry(*entry)
geo_entry = self._translate_gbif_geoportal(entry)
geo_writer.writerow(geo_entry)
of.close()
def main():
fire.Fire(convertGBIF)
if __name__ == '__main__':
main()
|