#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Denes Turei EMBL 2018
# turei.denes@gmail.com
import os
import itertools
from lipyd import sample
basedir = ('/', 'home', 'denes' , 'archive', 'cihan')
mgfpath = ('data', 'mgf')
skip = {'blanks and standards'}
fractions = [
(('A8', 'A9', 'A10'), ('A11', 'A12')),
(('A6', 'A7', 'A8', 'A9', 'A12'), ('A10', 'A11')),
(('A6', 'A7', 'A8', 'A9', 'A10'), ('A11', 'A12')),
(('A9', 'A10', 'B2'), ('A11', 'A12', 'B1')),
]
fractions = [
(('A8', 'A9'), ('A11', 'A12')),
(('A6', 'A7', 'A8'), ('A10', 'A11')),
(('A6', 'A7', 'A8', 'A9'), ('A11', 'A12')),
(('A9', ), ('A11', 'A12', 'B1')),
]
experiments = [
os.path.join(*(basedir + (dir0, dir1)))
for dir0 in os.listdir(os.path.join(*basedir))
for dir1 in os.listdir(os.path.join(*(basedir + (dir0,))))
if dir0 not in skip
]
peaksfiles = [
os.path.join(expdirs, 'feature.csv')
for expdirs in experiments
]
mgfdirs = [
os.path.join(*((expdirs,) + mgfpath))
for expdirs in experiments
]
for i in range(len(peaksfiles)):
reader = sample.SampleReader(
input_type = 'peaks',
fname = peaksfiles[i]
)
samples = reader.get_sampleset(
sampleset_args = {
'ms2_param': {
'mgfdir': mgfdirs[i]
},
'ms2_format': 'mgf',
}
)
samples.basic_filters()
samples.peak_size_filter(*fractions[i // 2])
samples.database_lookup()
samples.ms2_identify()
samples.export_table(
fname = '%s.results.tsv' % peaksfiles[i],
variables = ['peaksize']
)
for f in range(len(samples)):
if any(
ms2i.hg.main in {'BMP', 'PG'}
for ms2iii in samples.feattrs.ms2_identities[f]
for ms2ii in ms2iii.values()
for ms2i in ms2ii
):
print(peaksfiles[i])
print(samples.mzs[f])