Wiki

Clone wiki

enterobase-web / api_download_assemblies

Top level links:

Downloading Assemblies

#!html
http://enterobase.warwick.ac.uk/api/v2.0/%s/straindata?serotype=Agona&assembly_status=Assembled&limit=%d&only_fields=strain_name,download_fasta_link

Key points to remember:

  • The straindata resource includes information about the Assemblies, Strain metadata and ST information. This allows us to search for assemblies where the strain metadata says the serovar is 'Agona'; '?serovar=Agona'.
  • 'only_fields' parameter will only request the fields you specify, making your queries much faster. Since we only want the link to download the FASTA file and the strain name (to rename our FASTA file), '&only_fields=strain_name,download_fasta_link' . Note the use of comma to delimit.
  • If you already have Assembly barcodes, you can fetch this easily through the Assemblies endpoint directly.
#!python

import os
import urllib2
import json
import base64
import sys
from urllib2 import HTTPError
import logging

# You must have a valid API Token
API_TOKEN = os.getenv('ENTEROBASE_API_TOKEN', None)
SERVER_ADDRESS = 'http://enterobase.warwick.ac.uk'
SEROTYPE = 'Agona'
DATABASE = 'senterica'

def __create_request(request_str):

    request = urllib2.Request(request_str)
    base64string = base64.encodestring('%s:%s' % (API_TOKEN,'')).replace('\n', '')
    request.add_header("Authorization", "Basic %s" % base64string)
    return request


if not os.path.exists('temp'):
    os.mkdir('temp')
address = SERVER_ADDRESS + '/api/v2.0/%s/straindata?serotype=%s'\
    '&assembly_status=Assembled&limit=%d&only_fields=strain_name,download_fasta_link' \
    %(DATABASE, SEROTYPE, 40)
try:
    response = urllib2.urlopen(__create_request(address))
    data = json.load(response)
    for record in data['straindata']:
        record_values = data['straindata'][record]
        response = urllib2.urlopen(__create_request(record_values['download_fasta_link']))
        with open(os.path.join('temp', '%s.fasta' %record_values['strain_name']),'w') as out_ass: 
            out_ass.write(response.read())
except HTTPError as Response_error:
    logging.error('%d %s. <%s>\n Reason: %s' %(Response_error.code,
                                              Response_error.msg,
                                              Response_error.geturl(),
                                              Response_error.read()))

Updated