Commits

Mark Howison committed a327c82

sampler is more robust to nan and -inf

Comments (0)

Files changed (2)

 class Sample:
 
     def __init__(self, likelihood, prior, length, ncontigs):
-        self.posterior = likelihood + prior
+        self.odds = likelihood + prior
         self.likelihood = likelihood
         self.prior = prior
         self.length = length
     def odds_ratio(self, current):
         """
         """
-        return (not math.isinf(self.likelihood)) and \
-               ((self.posterior - current.posterior) > math.log(random()))
+        if math.isinf(self.odds):
+            if math.isinf(current.odds):
+                if not (math.isinf(self.likelihood) or math.isinf(current.likelihood)):
+                    return((self.likelihood - current.likelihood) > math.log(random()))
+                else:
+                    return(random() > 0.5)
+            else:
+                return False
+        elif math.isinf(current.odds):
+            return True
+        else:
+            return((self.odds - current.odds) > math.log(random()))
+
+
+class PriorDistribution:
+
+    def __init__(self, scale, length, ncontigs):
+        """
+        Create a prior distribution from the estimated genome size
+        and number of contigs.
+        """
+        self.scale = float(scale)
+        self.length_pdf = scipy.stats.gamma(scale, scale=length/scale).pdf
+        self.ncontigs_pdf = scipy.stats.gamma(scale, scale=ncontigs/scale).pdf
+
+    def log_prob(self, length, ncontigs):
+        p1 = self.length_pdf(length)
+        p2 = self.ncontigs_pdf(ncontigs)
+        if p1 <= 0 or math.isnan(p1) or p2 <= 0 or math.isnan(p2):
+            return float("-inf")
+        else:
+            return math.log(p1) + math.log(p2)
 
 
 class Sampler:
         if flatprior:
             self.prior = (lambda length, ncontigs : 1.0)
         else:
-            # Create a prior distribution from the estimated genome size
-            # and number of contigs.
-            scale = float(scale)
-            length_pdf = scipy.stats.gamma(scale, scale=length/scale).pdf
-            ncontigs_pdf = scipy.stats.gamma(scale, scale=ncontigs/scale).pdf
-            self.prior = (lambda length, ncontigs :
-                math.log(length_pdf(length) * ncontigs_pdf(ncontigs)))
+            self.prior = PriorDistribution(scale, length, ncontigs).log_prob
 
         self.threads = get_resource('threads')
         self.insert_mean = insert_mean
 
 samples_schema = (
     ('id', 'INTEGER PRIMARY KEY NOT NULL'),
-    ('posterior', 'REAL'),
+    ('odds', 'REAL'),
     ('likelihood', 'REAL'),
     ('prior', 'REAL'),
     ('length', 'INTEGER'),
     _db = sqlite3.connect(path, isolation_level=None)
     if not exists:
         _db.executescript(_create_sql(
-            'samples', samples_schema, ['posterior', 'accept', 'runtime']))
+            'samples', samples_schema, ['odds', 'accept', 'runtime']))
         _db.executescript(_create_sql(
             'states', states_schema, ['id', 'edge']))
         return None
     for row in _db.execute(sql):
        yield row[0]
 
+def add_debug(id, sample, perturb, state, accept, runtime):
+    print id, sample.odds, sample.likelihood, sample.prior, sample.length, sample.ncontigs, perturb, accept, runtime
 
-def add(id, sample, perturb, state, accept, runtime):
+def add_db(id, sample, perturb, state, accept, runtime):
     global _inserts
     # Insert with transaction.
     if _inserts % 100 == 0:
         _db.execute("BEGIN")
     _db.execute(
         "INSERT INTO samples VALUES (?,?,?,?,?,?,?,?,?);",
-        (id, sample.posterior, sample.likelihood, sample.prior, sample.length, sample.ncontigs, perturb, accept, runtime))
+        (id, sample.odds, sample.likelihood, sample.prior, sample.length, sample.ncontigs, perturb, accept, runtime))
     for e in state:
         _db.execute("INSERT INTO states VALUES(?,?);", (id, e))
     if _inserts % 100 == 99:
         _db.execute("COMMIT")
     _inserts += 1
 
+add = add_debug
+