Commits

Anonymous committed 43c5419

Issue number: QUARTZ-331
Issues with cluster management refactoring

git-svn-id: http://svn.opensymphony.com/svn/quartz/branches/b_quartz_1-5-x@33269f7d36a-ea1c-0410-88ea-9fd03e4c9665

  • Participants
  • Parent commits 9ecd20b
  • Branches b_quartz_1-5-x

Comments (0)

Files changed (3)

src/java/org/quartz/impl/jdbcjobstore/JobStoreCMT.java

         try {
             conn = getNonManagedTXConnection();
 
-            // checkin, and make sure there is work to be done before we aquire 
-        	// the lock (since that is expensive, and almost never occurs)
-            List failedRecords = clusterCheckIn(conn);
-            if (failedRecords.size() > 0) {
+            // Other than the first time, always checkin first to make sure there is 
+            // work to be done before we aquire / the lock (since that is expensive, 
+            // and is almost never necessary)
+            List failedRecords = (firstCheckIn) ? null : clusterCheckIn(conn);
+            
+            if (firstCheckIn || (failedRecords.size() > 0)) {
                 getLockHandler().obtainLock(conn, LOCK_STATE_ACCESS);
                 transStateOwner = true;
                 
                 // Now that we own the lock, make sure we still have work to do. 
-                failedRecords = findFailedInstances(conn);
+                // The first time through, we also need to make sure we update/create our state record
+                failedRecords = (firstCheckIn) ? clusterCheckIn(conn) : findFailedInstances(conn);
     
                 if (failedRecords.size() > 0) {
                     getLockHandler().obtainLock(conn, LOCK_TRIGGER_ACCESS);

src/java/org/quartz/impl/jdbcjobstore/JobStoreSupport.java

             throws JobPersistenceException {
     
         List failedInstances = new LinkedList();
-        SchedulerStateRecord myLastState = null;
         boolean selfFailed = false;
         
         long timeNow = System.currentTimeMillis();
         
                 // find own record...
                 if (rec.getSchedulerInstanceId().equals(getInstanceId())) {
-                    myLastState = rec;
-        
-                    // TODO: revisit when handle self-failed-out impled (see TODO in clusterCheckIn() below)
-        //            if (rec.getRecoverer() != null && !firstCheckIn) {
-        //                selfFailed = true;
-        //            }
-
                     if (firstCheckIn) {
-                      if(rec.getRecoverer() == null)
-                    	  failedInstances.add(rec);
-                      // make sure the recoverer hasn't died itself!
-                      SchedulerStateRecord recOrec = (SchedulerStateRecord) 
-                      	  statesById.get(rec.getRecoverer());
-                      long failedIfAfter = timeNow;
-                      if(recOrec != null) {
-                      	failedIfAfter = calcFailedIfAfter(recOrec);
-                      }
-                      // if it has failed, then let's become the recoverer
-                      if( failedIfAfter < timeNow || recOrec == null) {
-                      	failedInstances.add(rec);
-                      }
+                        if (rec.getRecoverer() == null) {
+                            failedInstances.add(rec);
+                        } else {
+                            // make sure the recoverer hasn't died itself!
+                            SchedulerStateRecord recOrec = (SchedulerStateRecord)statesById.get(rec.getRecoverer());
+                            
+                            long failedIfAfter = (recOrec == null) ? timeNow : calcFailedIfAfter(recOrec);
+
+                            // if it has failed, then let's become the recoverer
+                            if( failedIfAfter < timeNow || recOrec == null) {
+                                failedInstances.add(rec);
+                            }
+                        }
                     }
-                    firstCheckIn = false;
+                    // TODO: revisit when handle self-failed-out impled (see TODO in clusterCheckIn() below)
+                    // else if (rec.getRecoverer() != null) {
+                    //     selfFailed = true;
+                    // }
                 } else {
                     // find failed instances...
                     long failedIfAfter = calcFailedIfAfter(rec);
         
-                    if (failedIfAfter < timeNow && rec.getRecoverer() == null) {
-                        failedInstances.add(rec);
-                    }
-                    else if(rec.getRecoverer() != null) {
-                    	// make sure the recoverer hasn't died itself!
-                        SchedulerStateRecord recOrec = (SchedulerStateRecord) 
-                        	statesById.get(rec.getRecoverer());
-                        failedIfAfter = timeNow;
-                        if(recOrec != null) {
-                        	failedIfAfter = calcFailedIfAfter(recOrec);
-                        }
+                    if (rec.getRecoverer() == null) {
+                       if (failedIfAfter < timeNow) {
+                           failedInstances.add(rec);
+                       }
+                    } else {
+                        // make sure the recoverer hasn't died itself!
+                        SchedulerStateRecord recOrec = (SchedulerStateRecord)statesById.get(rec.getRecoverer());
+
+                        failedIfAfter = (recOrec == null) ? timeNow : calcFailedIfAfter(recOrec);
+
                         // if it has failed, then let's become the recoverer
-                        if( failedIfAfter < timeNow || recOrec == null) {
-                        	failedInstances.add(rec);
+                        if (failedIfAfter < timeNow || recOrec == null) {
+                            failedInstances.add(rec);
                         }
                     }
                 }
             }
             
         } catch (Exception e) {
-            lastCheckin = System.currentTimeMillis();
             throw new JobPersistenceException("Failure updating scheduler state when checking-in: "
                     + e.getMessage(), e);
         }
                             rec.getSchedulerInstanceId());
 
                     // update record to show that recovery was handled
-                    String recoverer = getInstanceId();
-                    long checkInTS = rec.getCheckinTimestamp();
                     if (rec.getSchedulerInstanceId().equals(getInstanceId())) {
-                        recoverer = null;
-                        checkInTS = System.currentTimeMillis();
                         getDelegate().insertSchedulerState(conn,
-                                rec.getSchedulerInstanceId(), checkInTS,
-                                rec.getCheckinInterval(), recoverer);
+                                rec.getSchedulerInstanceId(), System.currentTimeMillis(),
+                                rec.getCheckinInterval(), null);
                     }
 
                 }

src/java/org/quartz/impl/jdbcjobstore/JobStoreTX.java

         boolean recovered = false;
 
         try {
-            // checkin, and make sure there is work to be done before we aquire 
-        	// the lock (since that is expensive, and almost never occurs)
-            List failedRecords = clusterCheckIn(conn);
-            if (failedRecords.size() > 0) {
+            // Other than the first time, always checkin first to make sure there is 
+            // work to be done before we aquire / the lock (since that is expensive, 
+            // and is almost never necessary)
+            List failedRecords = (firstCheckIn) ? null : clusterCheckIn(conn);
+            
+            if (firstCheckIn || (failedRecords.size() > 0)) {
                 getLockHandler().obtainLock(conn, LOCK_STATE_ACCESS);
                 transStateOwner = true;
     
                 // Now that we own the lock, make sure we still have work to do. 
-                failedRecords = findFailedInstances(conn);
+                // The first time through, we also need to make sure we update/create our state record
+                failedRecords = (firstCheckIn) ? clusterCheckIn(conn) : findFailedInstances(conn);
     
                 if (failedRecords.size() > 0) {
                     getLockHandler().obtainLock(conn, LOCK_TRIGGER_ACCESS);