Commits

Anonymous committed 887b6c6

add support for retrying jobs

Comments (0)

Files changed (5)

rbuild/buildmany.py

     paramHelp = '[package]*'
     docs = {
         'late-commit': 'wait until all builds are done before committing',
-        'workers': 'number of active jobs (default 30)'
+        'workers': 'number of active jobs (default 30)',
+        'retries': 'number of times to retry a build job (default 0)',
     }
 
     def addLocalParameters(self, argDef):
         argDef['late-commit'] = command.NO_PARAM
         argDef['workers'] = command.ONE_PARAM
+        argDef['retries'] = command.ONE_PARAM
 
     def runCommand(self, handle, argSet, args):
         lateCommit = argSet.pop('late-commit', False)
         workers = int(argSet.pop('workers', 30))
+        retries = int(argSet.pop('retries', 0))
         _, pkgList = self.requireParameters(args, allowExtra=True)
         results = handle.MirrorBall.buildmany(pkgList, lateCommit=lateCommit,
-                workers=workers)
+                workers=workers, retries=retries)
 
         if not results:
             raise errors.PluginError('pacakges failed to build')
 
         log.addRootLogger()
 
-    def buildmany(self, packages, lateCommit=False, workers=None):
+    def buildmany(self, packages, lateCommit=False, workers=None, retries=None):
         pkgs = set([ (x, self.conarycfg.buildLabel.asString(), None)
             for x in packages ])
         return self.builder.buildmany(pkgs, lateCommit=lateCommit,
-                workers=workers)
+                workers=workers, retries=retries)

updatebot/build/build.py

         ret = self._formatOutput(trvMap)
         return ret
 
-    def buildmany(self, troveSpecs, lateCommit=False, workers=None):
+    def buildmany(self, troveSpecs, lateCommit=False, workers=None,
+            retries=None):
         """
         Build many troves in separate jobs.
         @param troveSpecs: list of trove specs
         if not workers:
             workers = 30
 
+        if not retries:
+            retries = 0
+
         if self._cfg.updateMode == 'current':
-            dispatcher = PromoteDispatcher(self, workers)
+            dispatcher = PromoteDispatcher(self, workers, retries=retries)
         elif not lateCommit:
-            dispatcher = Dispatcher(self, workers)
+            dispatcher = Dispatcher(self, workers, retries=retries)
         else:
-            dispatcher = NonCommittalDispatcher(self, workers)
+            dispatcher = NonCommittalDispatcher(self, workers, retries=retries)
         return dispatcher.buildmany(troveSpecs)
 
     def buildsplitarch(self, troveSpecs):

updatebot/build/common.py

 
     workerClass = None
 
-    def __init__(self, threadArgs):
+    def __init__(self, threadArgs, retries=0):
         if type(threadArgs) not in (list, tuple, set):
             threadArgs = (threadArgs, )
         self._threadArgs = threadArgs
         self._workers = {}
         self._errors = []
 
+        self._retries = Retries(retries)
+
     def addJob(self, job):
         """
         Add a job to the worker pool.
             return
 
         self._workers[worker.workerId] = worker
+        self._retries.addJob(worker.workerId)
         worker.daemon = True
         worker.start()
 
             #assert not self._workers[job].isAlive()
             #raise error
             log.error('[%s] FAILED with exception: %s' % (job, error))
-            self._errors.append((job, error))
+
+            workerId = self._workers[job].workerId
+            if self._retries.retry(workerId):
+                log.info('retrying %s' % (job, ))
+                self._workers.pop(job, None)
+                self.addJob(workerId)
+            else:
+                self._errors.append((job, error))
 
         return data
+
+
+class Retries(object):
+    def __init__(self, retries):
+        self.retries = retries
+        self._jobs = {}
+
+    def addJob(self, jobId):
+        if jobId not in self._jobs:
+            self._jobs[jobId] = 0
+
+    def retry(self, jobId):
+        if self._jobs[jobId] + 1 > self.retries:
+            return False
+        self._jobs[jobId] += 1

updatebot/build/dispatcher.py

 
     _completed = ()
 
-    def __init__(self, builder, maxSlots):
+    def __init__(self, builder, maxSlots, retries=0):
         self._builder = builder
         self._slots = util.BoundedCounter(0, maxSlots, maxSlots)
+        self._retries = retries
 
         # jobId: (trv, status, commitData)
         self._jobs = {}
     _monitorClass = JobMonitor
     _committerClass = JobCommitter
 
-    def __init__(self, builder, maxSlots):
-        AbstractDispatcher.__init__(self, builder, maxSlots)
+    def __init__(self, builder, maxSlots, retries=0):
+        AbstractDispatcher.__init__(self, builder, maxSlots, retries=retries)
 
         self._startSlots = util.BoundedCounter(0, 10, 10)
         #self._commitSlots = util.BoundedCounter(0, 2, 2)
         self._commitSlots = util.BoundedCounter(0, 1, 1)
 
-        self._starter = self._starterClass(self._builder)
-        self._monitor = self._monitorClass(self._builder._helper.client)
-        self._committer = self._committerClass(self._builder)
+        self._starter = self._starterClass((self._builder, ),
+                retries=self._retries)
+        self._monitor = self._monitorClass((self._builder._helper.client, ),
+                retries=self._retries)
+        self._committer = self._committerClass((self._builder, ),
+                retries=self._retries)
 
     def buildmany(self, troveSpecs):
         """
         buildjob.JOB_STATE_BUILT,
     )
 
-    def __init__(self, builder, maxSlots):
-        Dispatcher.__init__(self, builder, maxSlots)
+    def __init__(self, builder, maxSlots, retries=0):
+        Dispatcher.__init__(self, builder, maxSlots, retries=retries)
 
         # Disable commits by removing all commit slots.
         self._commitSlots = util.BoundedCounter(0, 0, 0)
     versions of the same package.
     """
 
-    def __init__(self, builder, maxSlots, waitForAllVersions=False):
-        Dispatcher.__init__(self, builder, maxSlots)
+    def __init__(self, builder, maxSlots, waitForAllVersions=False, retries=0):
+        Dispatcher.__init__(self, builder, maxSlots, retries=retries)
 
         self._waitForAllVersions = waitForAllVersions
 
     _starterClass = JobRebuildStarter
 
     def __init__(self, builder, maxSlots, useLatest=None,
-        additionalResolveTroves=None):
+        additionalResolveTroves=None, retries=0):
         MultiVersionDispatcher.__init__(self, builder, maxSlots,
-            waitForAllVersions=True)
+            waitForAllVersions=True, retries=retries)
 
         self._starter = self._starterClass((builder, useLatest,
             additionalResolveTroves))
 
     _promoterClass = JobPromoter
 
-    def __init__(self, builder, maxSlots):
-        Dispatcher.__init__(self, builder, maxSlots)
+    def __init__(self, builder, maxSlots, retries=0):
+        Dispatcher.__init__(self, builder, maxSlots, retries=retries)
 
         self._promoteSlots = util.BoundedCounter(0, 1, 1)
 
         self._promoter = self._promoterClass((self._builder._conaryhelper,
-            self._builder._cfg.targetLabel))
+            self._builder._cfg.targetLabel), retries=retries)
 
         self._status = {}