Commits

Faheem Mitha committed ad98971

Remove sequencegroup.

  • Participants
  • Parent commits 83cee91

Comments (0)

Files changed (2)

corrmodel/dbschema.py

     def __repr__(self):
         return '<Motifstat %s>'%self.id
 
-class Sequencegroup(object):
-    def __repr__(self):
-        return '<Sequencegroup %s>'%str(self.id)
-
 class Motifgroup(object):
-    def __init__(self, id, subset, model_id, simulated, size=None):
-        self.id = id
+    def __init__(self, subset, model_id, simulated, size=None):
         # the desired subset of the RSS sequence
         self.subset = subset
         self.model_id = model_id
         return '<Data %s>'%self.id
 
 class Simgroup(object):
-    def __init__(self, id, rows, cols, model_id=None):
-        self.id = id
+    def __init__(self, rows, cols, model_id=None):
         self.rows = rows
         self.cols = cols
         self.model_id = model_id
     mapper(Motifstat, motifstat_table)
     return motifstat_table, Motifstat
 
-def make_sequencegroup_table(meta, schema, name='sequencegroup'):
-    sequencegroup_table = Table(
-        name, meta,
-        Column('id', Integer, primary_key=True),
-        schema = schema,
-        )
-    mapper(Sequencegroup, sequencegroup_table)
-    return sequencegroup_table, Sequencegroup
-
 def make_motifgroup_table(meta, schema, name='motifgroup'):
     """Each row of this table corresponds to a group of RSS sequences"""
     motifgroup_table = Table(
         name, meta,
-        Column('id',  Integer, ForeignKey(schema+'.sequencegroup.id', onupdate='CASCADE', ondelete='CASCADE'), index=True, primary_key=True),
+        Column('id',  Integer, index=True, primary_key=True),
         Column('subset', postgresql.ARRAY(Integer, as_tuple=True)),
         Column('model_id', Integer, ForeignKey(schema+'.model.id', onupdate='CASCADE', ondelete='CASCADE'), index=True),
         Column('simulated', Boolean),
     return datasubgroup_table, Datasubgroup
 
 def make_gene_table(meta, schema, name='gene'):
-    """Each row of this table corresponds to a data file"""
+    """Each row of this table corresponds to a gene"""
     gene_table = Table(
         name, meta,
         Column('id', Integer, primary_key=True),
     for a sequence in the context of a crossvalidation process. Here
     it suffices to identify the crossvalidation process and the
     enclosing round of crossvalidation by specifying the
-    'datasubgroup_id'. The 'gene_id' gives the sequence data file
-    from which the sequence comes. The 'sequence' is the actual
-    sequence string. The 'index' is the index/location of the sequence
-    within the data file. The 'seqindex' is the id of the sequence, if
-    it is a RSS, in the 'motif' table. The 'seqindex' is empty if the
-    sequence is not an RSS. 'neglogpp' is the negative log posterior
-    predictive probability of the sequence. 'pvalue' is the posterior
-    predictive pvalue of the sequence. The 'neglogpp' and 'pvalue' are
-    calculated relative to the crossvalidation process specified by
+    'datasubgroup_id'. The 'gene_id' gives the gene from which the
+    sequence comes. The 'sequence' is the actual sequence string. The
+    'index' is the index/location of the sequence within the gene. The
+    'seqindex' is the id of the sequence, if it is a RSS, in the
+    'motif' table. The 'seqindex' is empty if the sequence is not an
+    RSS. 'neglogpp' is the negative log posterior predictive
+    probability of the sequence. 'pvalue' is the posterior predictive
+    pvalue of the sequence. The 'neglogpp' and 'pvalue' are calculated
+    relative to the crossvalidation process specified by
     'datasubgroup_id', specifically, using the 'training dataset'.
     """
     data_table = Table(
     """
     simgroup_table = Table(
         name, meta,
-        Column('id',  Integer, ForeignKey(schema+'.sequencegroup.id', onupdate='CASCADE', ondelete='CASCADE'), index=True, primary_key=True),
+        Column('id',  Integer, index=True, primary_key=True),
         Column('created', TIMESTAMP(), default=now()),
         Column('rows', Integer),
         Column('cols', Integer),

corrmodel/load.py

     from utils import file_not_exist, create_engine_wrapper, get_conf, pg_url
     from dbutils import schema_exists
     from sqlalchemy.orm import mapper, relation, sessionmaker
-    from dbschema import make_sequencegroup_table, make_simgroup_table, make_simdata_table, make_simresult_table, make_model_table
+    from dbschema import make_simgroup_table, make_simdata_table, make_simresult_table, make_model_table
     import cPickle, random, cpplib, getmodel
     conf = get_conf()
     dbuser = conf["dbuser"]
     print "making tables corresponding to schema %s and dbstring %s"%(schema, dbstring)
     from sqlalchemy import MetaData
     meta = MetaData()
-    sequencegroup_table, Sequencegroup = make_sequencegroup_table(meta, schema)
     simgroup_table, Simgroup = make_simgroup_table(meta, schema)
     simdata_table, Simdata = make_simdata_table(meta, schema)
     simresult_table, Simresult = make_simresult_table(meta, schema)
     d['db'] = db
     d['meta'] = meta
     d['Model'] = Model
-    d['Sequencegroup'] = Sequencegroup
     d['Simgroup'] = Simgroup
     d['Simdata'] = Simdata
     d['Simresult'] = Simresult
     from utils import file_not_exist, create_engine_wrapper, get_conf, pg_url
     from dbutils import schema_exists
     from sqlalchemy.orm import mapper, relation, sessionmaker
-    from dbschema import make_crossvalgroup_table, make_crossval_table, make_motifstat_table, make_motifstatgroup_table, make_sequencegroup_table, make_motifgroup_table, make_motifgroup_table_index, make_motif_table, make_model_table, make_datagroup_table, make_datasubgroup_table, make_gene_table, make_data_table
+    from dbschema import make_crossvalgroup_table, make_crossval_table, make_motifstat_table, make_motifstatgroup_table, make_motifgroup_table, make_motifgroup_table_index, make_motif_table, make_model_table, make_datagroup_table, make_datasubgroup_table, make_gene_table, make_data_table
     import cPickle, random, cpplib, getmodel
     confval = create_db()
     dbname = confval["dbname"]
     crossval_table, Crossval = make_crossval_table(meta, schema)
     motifstat_table, Motifstat = make_motifstat_table(meta, schema)
     motifstatgroup_table, Motifstatgroup = make_motifstatgroup_table(meta, schema)
-    sequencegroup_table, Sequencegroup = make_sequencegroup_table(meta, schema)
     motifgroup_table, Motifgroup = make_motifgroup_table(meta, schema)
     make_motifgroup_table_index(motifgroup_table)
     motif_table, Motif = make_motif_table(meta, schema)
     d['Crossval'] = Crossval
     d['Motifstatgroup'] = Motifstatgroup
     d['Motifstat'] = Motifstat
-    d['Sequencegroup'] = Sequencegroup
     d['Motifgroup'] = Motifgroup
     d['Motif'] = Motif
     d['Model'] = Model
     d = create_motif_tables(schema)
     db = d['db']
     Model = d['Model']
-    Sequencegroup = d['Sequencegroup']
     Motifgroup = d['Motifgroup']
     Motif = d['Motif']
     conf = get_conf()
     model, model_id = write_model_to_db(schema, cols, model)
 
     # Add Motifgroup object
-    seqg = Sequencegroup()
-    session.add(seqg)
     session.commit()
     if args.motifsimnum is not None and not simulated:
         sys.exit("--motifsimnum option is only used when dataset is simulated")
         motifsimnum = conf["motifsimnum"]
     else:
         motifsimnum = None
-    mgroup = Motifgroup(seqg.id, subset, model_id, simulated, motifsimnum)
+    mgroup = Motifgroup(subset, model_id, simulated, motifsimnum)
     session.add(mgroup)
     session.commit()
     motifgroup_id = mgroup.id
     db = confval["db"]
     meta = tabledict["meta"]
     Model = tabledict["Model"]
-    Sequencegroup = tabledict["Sequencegroup"]
     Simgroup = tabledict["Simgroup"]
     Simdata = tabledict["Simdata"]
     Session = sessionmaker(bind=db)
     session = Session()
-    seqg = Sequencegroup()
-    session.add(seqg)
     session.commit()
     modelquery = session.query(Model)
     # In this case, the model gets added to the db
     colnum, model, model_id = write_config_model_to_db(schema, colnum, model_id, default_model, empty_model)
 
     mod = cpplib.cpp_model(colnum, model)
-    simg = Simgroup(seqg.id, rownum, colnum, model_id)
+    simg = Simgroup(rownum, colnum, model_id)
     session.add(simg)
     session.commit()
     arr = numpy.array(cpplib.cpp_gendata(rownum, mod))
     db = confval["db"]
     meta = tabledict["meta"]
     Model = tabledict["Model"]
-    Sequencegroup = tabledict["Sequencegroup"]
     Simgroup = tabledict["Simgroup"]
     Simdata = tabledict["Simdata"]
     Simresult = tabledict["Simresult"]