Commits

Anonymous committed fe7e3e1

catch ConnectionLossException while grabbing ZK nodes

Comments (0)

Files changed (2)

     <skipTests>true</skipTests>
   </properties>
 
-<!--
   <distributionManagement>
     <repository>
       <id>cdl-releases</id>
       <url>http://mvn.cdlib.org/content/repositories/cdl-snapshots/</url>
     </snapshotRepository>
   </distributionManagement>
--->
 
   <scm>
     <connection>scm:hg:http://hg.cdlib.org/egh-cdl-zk-queue</connection>

src/main/java/org/cdlib/mrt/queue/DistributedQueue.java

      * @param watcher optional watcher on getChildren() operation.
      * @return map from id to child name for all children
      */
-    public TreeMap<Long,String> orderedChildren(Watcher watcher) throws KeeperException, InterruptedException {
+    public TreeMap<Long,String> orderedChildren(Watcher watcher) throws KeeperException, InterruptedException, ConnectionLossException {
         TreeMap<Long,String> orderedChildren = new TreeMap<Long,String>();
 
         List<String> childNames = null;
-        try{
-            childNames = zookeeper.getChildren(dir, watcher);
-        }catch (KeeperException.NoNodeException e){
-            throw e;
-        }
-
+        int attempts = 0;
+        while (true) {
+            try{
+                childNames = zookeeper.getChildren(dir, watcher);
+            } catch (ConnectionLossException cle) {
+                // did submit fail?
+                if (attempts >= 3) throw new ConnectionLossException();
+                if (childNames == null) {
+                    System.err.println("[error] DistributedQueue.orderedChildren() lost connection, retrying: " + cle.getMessage());
+                    attempts++;
+                } else {
+                    System.out.println("[info] DistributedQueue.orderedChildren() lost connection, but no need to retry.");
+                    break;
+                }
+            } catch (KeeperException.NoNodeException e){
+                throw e;
+            }
+	}
+    
         for(String childName : childNames){
             try{
                 //Check format
 		// did submit fail?
 		if (attempts >= 3) throw new ConnectionLossException();  
 		if (node == null) {  
-                    System.err.println("[error] DistributedQueue.submit lost connection, requeuing: " + cle.getMessage());
+                    System.err.println("[error] DistributedQueue.submit() lost connection, requeuing: " + cle.getMessage());
 		    attempts++;
 		} else {
+                    System.out.println("[info] DistributedQueue.submit () lost connection, but no need to requeue: " + node);
 		    return true;
 		}
             } catch (KeeperException.NoNodeException e){