Commits

petermr committed 9d59d1d

fixed up and download

Comments (0)

Files changed (4)

src/main/java/org/xmlcml/cml/rest/Client.java

-package org.xmlcml.cml.rest;
-
-
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.IOException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-
-import nu.xom.Document;
-import nu.xom.Element;
-import nu.xom.Nodes;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.log4j.Logger;
-import org.restlet.Request;
-import org.restlet.Response;
-import org.restlet.data.MediaType;
-import org.restlet.data.Method;
-import org.restlet.data.Protocol;
-import org.restlet.representation.Representation;
-import org.restlet.representation.StringRepresentation;
-import org.restlet.resource.ClientResource;
-import org.xmlcml.cml.base.CMLConstants;
-import org.xmlcml.cml.base.CMLUtil;
-import org.xmlcml.cml.html.HtmlElement;
-import org.xmlcml.cml.html.HtmlTable;
-import org.xmlcml.cml.html.HtmlTd;
-import org.xmlcml.cml.html.HtmlTr;
-import org.xmlcml.euclid.Util;
-
-public class Client {
-	private static final String PARENT_DIRECTORY = "Parent Directory";
-	private static final String NAME = "Name";
-	private static final Logger LOG = Logger.getLogger(Client.class);
-	private static final String START_REPR = "#";
-	public static final String HTTP_GREENCHAIN_CH_CAM_AC_UK_QUIXOTE = 
-	    	"http://greenchain.ch.cam.ac.uk/patents/quixote/";
-	
-	private static final String DIR = "-dir";
-	private static final String URL = "-url";
-	private static final String UPLOAD = "-upload";
-	private static final String DOWNLOAD = "-download";
-
-	
-	private String baseUrl;
-	private File topDir;
-	private String topUrl;
-
-	public Client() {
-		setBaseUrl(HTTP_GREENCHAIN_CH_CAM_AC_UK_QUIXOTE);
-	}
-
-	/** sets the base URL to upload to
-	 * defaults to "http://greenchain.ch.cam.ac.uk/patents/quixote/"
-	 * don't use this unless you know what you are doing
-	 * @param baseUrl
-	 */
-	public void setBaseUrl(String baseUrl) {
-		this.baseUrl = baseUrl;
-	}
- 
-	/**
-	 * deletes a single file 
-	 * not yet tested
-	 * @param url (absolute: e.g. 
-	 *      "http://greenchain.ch.cam.ac.uk/patents/quixote/a/b/c.txt"
-	 */
-	public void delete(String url) {
-	    ClientResource resource = new ClientResource(url);  
-	    try {
-	    	resource.delete();
-	    } catch (Exception e) {
-		    e.printStackTrace();
-	    }
-	}
-
-	/**
-	 * downloads single url to file
-	 * @param url
-	 * @param file
-	 */
-	public void downloadFile(String url, File file) {
-		try {
-			FileUtils.copyURLToFile(new URL(url), file);
-		} catch (Exception e) {
-			throw new RuntimeException("cannot download URL", e);
-		}
-	}
-
-	/**
-	 * lists all uriFiles under topUrl
-	 * @param topUrl
-	 * @param file
-	 */
-	public List<String> getUrlDirectoryTree(String topUrl) {
-		this.topUrl = topUrl;
-		List<String> urlList = new ArrayList<String>();
-		urlList.add(topUrl);
-		List<String> subTree = getDirectoryTree(topUrl);
-		for (String st : subTree) {
-			urlList.add(st);
-		}
-		return urlList;
-	}
-	
-	/**
-	 * get list of URLs under topUrl which are "files" with content not "directories"
-	 * @param topDirectoryUrl
-	 * @return
-	 */
-	public List<String> getNonDirectories(String topDirectoryUrl) {
-		List<String> urlList = getUrlDirectoryTree(topDirectoryUrl);
-		System.out.println(urlList.size());
-		List<String> nonDirectoryList = new ArrayList<String>();
-		for (String url : urlList) {
-			if (!url.endsWith("/")) {
-				System.out.println("%% "+url);
-				nonDirectoryList.add(url);
-			}
-		}
-		return nonDirectoryList;
-	}
-	
-	/**
-	 * downloads whole directory and files and their content rooted at topUrl
-	 * copied to similar structure unedr topDir (which must exist and be writable) 
-	 * @param topUrl
-	 * @param topDir
-	 */
-	public void downloadDirectory(String topUrl, File topDir) {
-		if (topUrl == null || topDir == null) {
-			throw new RuntimeException("null dir or url");
-		}
-		if (!topDir.exists() || !topDir.isDirectory()) {
-			throw new RuntimeException("topDir must exist: "+topDir);
-		}
-		LOG.info("downloading url "+topUrl+" to directory "+topDir);
-		List<String> urlList = getNonDirectories(topUrl);
-		for (String url : urlList) {
-			String content = getChildString(url);
-			String deltaUrl = getDeltaUrl(url, topUrl);
-			File newFile = new File(topDir, deltaUrl);
-			System.out.println("made "+newFile);
-			newFile.getParentFile().mkdirs();
-			try {
-				FileUtils.writeStringToFile(newFile, content);
-			} catch (IOException e) {
-				throw new RuntimeException("Cannot write file: "+newFile, e);
-			}
-		}
-	}
-
-	/** not needed? */
-	public List<String> getUrlDirectoryTreeContent(String topUrl) {
-		List<String> urlList = getNonDirectories(topUrl);
-		List<String> contentList = new ArrayList<String>(urlList.size());
-		for (String url : urlList) {
-			String content = getChildString(url);
-			contentList.add(content);
-		}
-		return contentList;
-	}
-	
-	
-
-	private List<String> getDirectoryTree(String url) {
-		System.out.println(">> "+url);
-		List<String> urlList = new ArrayList<String>();
-		if (url != null) {
-			List<String> childUrls = getChildren(url);
-			for (String childUrl : childUrls) {
-				List<String> urlSubTree = getDirectoryTree(childUrl);
-				for (String subUrl : urlSubTree) {
-					urlList.add(subUrl);
-				}
-				urlList.add(childUrl);
-			}
-		}
-		return urlList;
-	}
-	
-	private List<String> getChildren(String url) {
-		List<String> childList = new ArrayList<String>();
-		if (url != null) {
-			ClientResource client = new ClientResource(url);
-			String s = null;
-			try {
-				Representation r = client.get(MediaType.TEXT_URI_LIST);
-				if (r != null) {
-					s = r.getText();
-				}
-			} catch (Exception e) {
-				throw new RuntimeException("Cannot get directories: "+url, e);
-			}
-			if (s != null) {
-				if (s.startsWith(START_REPR)) {
-					childList = getChildListFromRepresentation(s);
-				} else if (s.startsWith(CMLConstants.S_LANGLE)){
-					childList = readApacheHtml(url, s);
-				}
-			}
-		}
-		return childList;
-	}
-
-	private List<String> readApacheHtml(String url, String s) {
-		/**
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
- <head>
-  <title>Index of /files/public/datasets/HCO-L-Ala-NH2_GAMESS-US_Caesaraugusta_2008/calcs/homolevel/B3LYP_aug-cc-pVDZ</title>
- </head>
- <body>
-<h1>Index of /files/public/datasets/HCO-L-Ala-NH2_GAMESS-US_Caesaraugusta_2008/calcs/homolevel/B3LYP_aug-cc-pVDZ</h1>
-<table><tr><th><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=D">Name</a></th><th><a href="?C=M;O=A">Last modified</a></th><th><a href="?C=S;O=A">Size</a></th><th><a href="?C=D;O=A">Description</a></th></tr><tr><th colspan="5"><hr></th></tr>
-
-<tr><td valign="top"><img src="/icons/back.gif" alt="[DIR]"></td><td><a href="/files/public/datasets/HCO-L-Ala-NH2_GAMESS-US_Caesaraugusta_2008/calcs/homolevel/">Parent Directory</a>       </td><td>&nbsp;</td><td align="right">  - </td><td>&nbsp;</td></tr>
-<tr><td valign="top"><img src="/icons/unknown.gif" alt="[   ]"></td><td><a href="B3LYP_aug-cc-pVDZ.dir">B3LYP_aug-cc-pVDZ.dir</a>  </td><td align="right">08-May-2008 16:48  </td><td align="right">1.0K</td><td>&nbsp;</td></tr>
-<tr><td valign="top"><img src="/icons/unknown.gif" alt="[   ]"></td><td><a href="B3LYP_aug-cc-pVDZ_shells.dir">B3LYP_aug-cc-pVDZ_sh..&gt;</a></td><td align="right">09-May-2008 11:50  </td><td align="right">596 </td><td>&nbsp;</td></tr>
-*/
-		List<String> childList = new ArrayList<String>();
-		Document doc = CMLUtil.parseHtmlWithTagSoup(new ByteArrayInputStream(s.getBytes()));
-		Nodes nodes = doc.query("./*[local-name()='html']/*[local-name()='body']/*[local-name()='table']");
-		if (nodes.size() == 1) {
-			HtmlTable table = (HtmlTable) HtmlElement.create((Element)nodes.get(0));
-			HtmlTr header = table.getSingleLeadingTrThChild();
-			if (NAME.equals(header.getTh(1).getValue())) {
-				List<HtmlTr> rows = table.getTrTdRows();
-				for (HtmlTr row : rows) {
-					HtmlTd td = row.getTd(1);
-					String value = td.getValue();
-					if (PARENT_DIRECTORY.equals(value.trim())) {
-						// ignore
-					} else {
-						String linkValue = td.query("./*[local-name()='a']/@href").get(0).getValue();
-						linkValue = url+"/"+linkValue;
-						System.out.println(linkValue);
-						childList.add(linkValue);
-					}
-				}
-			}
-		}
-		return childList;
-	}
-
-	private String getChildString(String url) {
-		String s = null;
-		if (url != null) {
-			ClientResource client = new ClientResource(url);
-			try {
-				Representation r = client.get(MediaType.TEXT_URI_LIST);
-				if (r != null) {
-					s = r.getText();
-				}
-			} catch (Exception e) {
-				throw new RuntimeException("Cannot get directories: "+url, e);
-			}
-		}
-		return s;
-	}
-
-	/**
-	 * this is a messy string (it should really be an array)
-	 * It has to be split at newlines and lines starting with # discarded
-	 * @param s
-	 * @return
-	 */
-	private List<String> getChildListFromRepresentation(String s) {
-		List<String> childList = new ArrayList<String>();
-		String[] childArray = s.split(CMLConstants.S_NEWLINE);
-		for (int i = 0; i < childArray.length; i++) {
-			String child = childArray[i].trim();
-			if (!child.startsWith(START_REPR)) {
-				childList.add(child);
-			}
-		}
-		return childList;
-	}
-		
-
-	/**
-	 * puts a single file 
-	 * @param url (absolute: e.g. 
-	 *      "http://greenchain.ch.cam.ac.uk/patents/quixote/a/b/c.txt"
-	 * @param content
-	 * @param mediaType
-	 */
-	public void put(String url, String content, MediaType mediaType) {
-        org.restlet.Client client = new org.restlet.Client(Protocol.HTTP);
-        Request request = new Request(Method.PUT, url);
-        mediaType = MediaType.APPLICATION_OCTET_STREAM;
-        request.setEntity(content, mediaType);
-        Response response = client.handle(request);
-        System.out.println("Upload to "+url+" ... status: "+response.getStatus());
-//	    ClientResource resource = new ClientResource(url);
-//	    try {
-//	    	Representation rr = resource.put(new StringRepresentation(content, mediaType));
-//	    	System.out.println("Rep after uploading to "+url+" ... "+rr);
-//	    } catch (Exception e) {
-//		    e.printStackTrace();
-//	    }
-	}
-
-	/** 
-	 * uploads all files in a directory subtree
-	 * assumes all files are MediaType.TEXT_PLAIN
-	 * creates directory structure on server
-	 * overwrites existing files without warning
-	 * example: using defaultUrl and local directory "/some/where/a" with files:
-	 * a/a.txt
-	 * a/b/b.txt
-	 * a/b/c/c.txt
-	 * would upload and create
-	 * 
-	 * http://greenchain.ch.cam.ac.uk/patents/quixote/a/a.txt
-	 * http://greenchain.ch.cam.ac.uk/patents/quixote/a/b/b.txt
-	 * http://greenchain.ch.cam.ac.uk/patents/quixote/a/b/c/c.txt
-	 * 
-	 * note that any local absolute filename is not preserved
-
-	 * @param dir directory on your client filesystem
-	 */
-	public void upload(File dir) {
-		if (!dir.exists() || !dir.isDirectory()) {
-			throw new RuntimeException("File must be existing directory: "+dir);
-		}
-		List<File> files = (List<File>)FileUtils.listFiles(dir, null, true);
-		for (File file : files) {
-			upload(dir, file, MediaType.TEXT_PLAIN);
-		}
-	}
-	
-	/**
-	 * PUTs the content of a file to the server
-	 * @param file
-	 * @param url (absolute, so of form http://greenchain.ch.cam.ac.uk/patents/quixote/foo/bar.txt)
-	 * @
-	 */
-	public void put(File file, String url, MediaType mediaType) {
-		String content = null;
-		try {
-			content = FileUtils.readFileToString(file);
-		} catch (IOException e) {
-			throw new RuntimeException("Cannot read file ", e);
-		}
-		put(url, content, mediaType);
-	}
-
-	private void upload(File dir, File file, MediaType mediaType) {
-		if (baseUrl == null) {
-			throw new RuntimeException("Must set baseUrl");
-		}
-		String relativeFileName = Util.getRelativeFilename(dir, file, "/");
-		String url = baseUrl+relativeFileName;
-		put(file, url, mediaType);
-	}
-
-	/**
-	 * find part of url which is descendant of topUrl
-	 * if topUrl is not ancestor of url, return null
-	 * @param url
-	 * @param topUrl
-	 * @return
-	 */
-	private String getDeltaUrl(String url, String topUrl) {
-		String delta = null;
-		String[] urlBits = url.split("/");
-		String[] topUrlBits = topUrl.split("/");
-		if (topUrlBits.length <= urlBits.length) {
-			for (int i = 0; i < topUrlBits.length; i++) {
-				if (!topUrlBits[i].equals(urlBits[i])) {
-					return delta;
-				}
-			}
-			StringBuilder deltaBuilder = new StringBuilder();
-			for (int i = topUrlBits.length; i < urlBits.length; i++) {
-				deltaBuilder.append(urlBits[i]);
-				deltaBuilder.append("/");
-			}
-			delta = deltaBuilder.toString();
-		}
-		return delta;
-	}
-
-	public static void main(String[] args) {
-		Client client = new Client();
-		System.out.println("args: "+args.length);
-		if (args.length == 0) {
-			usage();
-		} else {
-			client.run(args);
-		}
-	}
-
-	private void run(String[] args) {
-		int i = 0;
-		String url = null;
-		String dir = null;
-		boolean upload = false;
-		boolean download = false;
-		while (i < args.length) {
-			if (URL.equalsIgnoreCase(args[i])) {
-				url = args[++i]; i++;
-			} else if (DIR.equalsIgnoreCase(args[i])) {
-				dir = args[++i]; i++;
-			} else if (UPLOAD.equalsIgnoreCase(args[i])) {
-				upload = true;
-				i++;
-			} else if (DOWNLOAD.equalsIgnoreCase(args[i])) {
-				download = true;
-				i++;
-			} else {
-				throw new RuntimeException("unknown arg: "+args[i++]);
-			}
-		}
-		if (url == null || dir == null) {
-			throw new RuntimeException("Must give url and dir");
-		}
-		if (download == upload) {
-			throw new RuntimeException("Must give either upload or download");
-		}
-		if (upload) {
-			upload(url, dir);
-		} else if (download) {
-			download(url, dir);
-		}
-	}
-
-	private void upload(String url, String dir) {
-		setBaseUrl(url);
-		upload(new File(dir));
-	}
-
-	private void download(String url, String dir) {
-		downloadDirectory(url, new File(dir));
-	}
-
-	private static void usage() {
-		System.out.println(" args (mandatory): [-download|upload] -url <url> -dir <dir>");
-	}
-	
-}

src/main/java/org/xmlcml/cml/rest/UpDownLoader.java

+package org.xmlcml.cml.rest;
+
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+
+import nu.xom.Document;
+import nu.xom.Element;
+import nu.xom.Nodes;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.log4j.Logger;
+import org.restlet.Request;
+import org.restlet.Response;
+import org.restlet.data.Form;
+import org.restlet.data.MediaType;
+import org.restlet.data.Method;
+import org.restlet.data.Preference;
+import org.restlet.data.Protocol;
+import org.restlet.data.Range;
+import org.restlet.data.Status;
+import org.restlet.representation.Representation;
+import org.restlet.resource.ClientResource;
+import org.xmlcml.cml.base.CMLConstants;
+import org.xmlcml.cml.base.CMLUtil;
+import org.xmlcml.cml.html.HtmlElement;
+import org.xmlcml.cml.html.HtmlTable;
+import org.xmlcml.cml.html.HtmlTd;
+import org.xmlcml.cml.html.HtmlTr;
+import org.xmlcml.euclid.Util;
+
+public class UpDownLoader {
+	private static final Logger LOG = Logger.getLogger(UpDownLoader.class);
+	
+	public enum DirectoryType {
+		DIRECTORIES,
+		NONDIRECTORIES,
+		ALL;
+	}
+	private static final String PARENT_DIRECTORY = "Parent Directory";
+	private static final String NAME = "Name";
+	private static final String START_REPR = "#";
+	public static final String HTTP_GREENCHAIN_CH_CAM_AC_UK_QUIXOTE = 
+	    	"http://greenchain.ch.cam.ac.uk/patents/quixote/";
+	
+	private static final String DIR = "-dir";
+	private static final String URL = "-url";
+	private static final String UPLOAD = "-upload";
+	private static final String DOWNLOAD = "-download";
+	private static final String DIRECTORY_END = "/";
+
+	private String baseUrl;
+	private File topDir;
+	private String topUrl;
+	private Response lastHeaderResponse;
+	private Map<String, Object> lastHeaderAttributes;
+	private Form lastHeaderForm;
+	private String lastHeaderLastModified;
+	private String lastHeaderContentType;
+	private String lastHeaderContentLength;
+	private Date lastHeaderResponseDate;
+
+	public UpDownLoader() {
+		setBaseUrl(HTTP_GREENCHAIN_CH_CAM_AC_UK_QUIXOTE);
+	}
+
+	/** sets the base URL to upload to
+	 * defaults to "http://greenchain.ch.cam.ac.uk/patents/quixote/"
+	 * don't use this unless you know what you are doing
+	 * @param baseUrl
+	 */
+	public void setBaseUrl(String baseUrl) {
+		this.baseUrl = baseUrl;
+	}
+ 
+	/**
+	 * deletes a single file 
+	 * not yet tested
+	 * @param url (absolute: e.g. 
+	 *      "http://greenchain.ch.cam.ac.uk/patents/quixote/a/b/c.txt"
+	 */
+	public void delete(String url) {
+	    ClientResource resource = new ClientResource(url);  
+	    try {
+	    	resource.delete();
+	    } catch (Exception e) {
+		    e.printStackTrace();
+	    }
+	}
+
+	/**
+	 * as download(url, file)
+	 * supports the creation of new directories, making if not exists()
+	 * @param url
+	 * @param filename (if ends with "/" behaves as directory)
+	 * @return
+	 */
+	public File downloadFile(String url, String filename) {
+		File file = new File(filename);
+		if (!file.exists()) {
+			if (filename.endsWith("/")) {
+				file.mkdirs();
+			} else {
+				file.getParentFile().mkdirs();
+			}
+		} else {
+			if (file.isDirectory()) {
+			}
+		}
+		return downloadFile(url, file);
+	}
+
+	/**
+	 * downloads single url to file
+	 * if file exists and is a directory, then creates file with last part of URL
+	 * else creates file (trainling slash is ignored by system)
+	 * uses mkdirs() to make directories
+	 * @param url
+	 * @param file
+	 */
+	public File downloadFile(String url, File file) {
+		File toFile = null;
+		if (url == null || file == null) {
+			throw new RuntimeException("Null url or file");
+		}
+		String urlName = url.substring(url.lastIndexOf("/")+1);
+		if (file.exists()) {
+			if (file.isDirectory()) {
+				toFile = new File(file, urlName);
+			} else {
+				toFile = file;
+			}
+		} else {
+			// does not exist
+			if (file.getAbsolutePath().endsWith("/")) {
+				file.mkdirs();
+				toFile = new File(file, urlName);
+			} else {
+				toFile = file;
+			}
+		}
+		try {
+			FileUtils.copyURLToFile(new URL(url), toFile);
+		} catch (Exception e) {
+			throw new RuntimeException("cannot download URL", e);
+		}
+		return toFile;
+	}
+
+	/**
+	 * downloads whole directory and files and their content rooted at topUrl
+	 * copied to similar structure under topDir (which must exist and be writable) 
+	 * @param topUrl
+	 * @param topDir
+	 */
+	public List<File> downloadDirectory(String topUrl, File topDir) {
+		return downloadDirectory(topUrl, topDir, 0);
+	}
+
+	/**
+	 * downloads whole directory and files and their content rooted at topUrl
+	 * copied to similar structure under topDir (which must exist and be writable) 
+	 * @param topUrl
+	 * @param topDir
+	 * @param size bytes to download (if <= 0 download all)
+	 */
+	public List<File> downloadDirectory(String topUrl, File topDir, int size) {
+		if (topUrl == null || topDir == null) {
+			throw new RuntimeException("null dir or url");
+		}
+		if (!topDir.exists() || !topDir.isDirectory()) {
+			topDir.mkdirs();
+			LOG.info("made topDir : "+topDir.getAbsolutePath());
+		}
+		List<String> urlList = getDirectoryTree(topUrl, DirectoryType.NONDIRECTORIES);
+		System.out.println("========================="+urlList.size()+"========================");
+		List<File> fileList = new ArrayList<File>();
+		for (String url : urlList) {
+			String content = (size <= 0) ? getChildString(url) : readFirstChunk(url, size);
+			String deltaUrl = getDeltaUrl(url, topUrl);
+			File newFile = new File(topDir, deltaUrl);
+			newFile.getParentFile().mkdirs();
+			try {
+				FileUtils.writeStringToFile(newFile, content);
+			} catch (IOException e) {
+				throw new RuntimeException("Cannot write file: "+newFile, e);
+			}
+			fileList.add(newFile);
+		}
+		return fileList;
+	}
+
+	public String readFirstChunk(String urlString, int byteCount) {
+		String text = null;
+		if (urlString != null) {
+			org.restlet.Client restletClient = new org.restlet.Client(Protocol.HTTP);
+			Request request = new Request(Method.GET, urlString);
+			List<Range> ranges = Collections.singletonList(new Range(0, byteCount));
+			request.setRanges(ranges);
+			Response response = restletClient.handle(request);
+			if (Status.SUCCESS_OK.equals(response.getStatus())) {
+				text = processSuccessfulChunkRequest(response);
+			} else if (Status.SUCCESS_PARTIAL_CONTENT .equals(response.getStatus())) {
+				text = processSuccessfulChunkRequest(response);
+			} else {
+				System.err.println("FAILED "+response.getStatus());
+			}
+		}
+		return text;
+	}
+	
+	public List<String> getDirectoryTree(String url, DirectoryType directoryType) {
+		List<String> urlList = new ArrayList<String>();
+		if (url != null) {
+			List<String> childUrls = getChildren(url, DirectoryType.ALL);
+			for (String childUrl : childUrls) {
+				if (isDirectoryUrl(childUrl)) {
+					List<String> urlSubTree = getDirectoryTree(childUrl, directoryType);
+					for (String subUrl : urlSubTree) {
+						urlList.add(subUrl);
+					}
+				}
+				if (canAddChildToList(directoryType, childUrl)) {
+					urlList.add(childUrl);
+				}
+			}
+		}
+		return urlList;
+	}
+	
+	private List<String> getChildren(String url, DirectoryType directoryType) {
+		List<String> childList = new ArrayList<String>();
+		if (url != null) {
+			org.restlet.Client restletClient = new org.restlet.Client(Protocol.HTTP);
+			Request request = new Request(Method.GET, url);
+			List<Preference<MediaType>> mediaTypes = Collections.singletonList(new Preference<MediaType>(MediaType.TEXT_URI_LIST));
+			request.getClientInfo().setAcceptedMediaTypes(mediaTypes);
+			
+			Response response = restletClient.handle(request);
+			if (Status.SUCCESS_OK.equals(response.getStatus())) {
+				processSuccessfulRequest(response);
+			}
+			else if (Status.CLIENT_ERROR_UNSUPPORTED_MEDIA_TYPE.equals(response.getStatus())) {
+				retryUnsupportedMediaType(restletClient, request, response);
+			}
+			
+			ClientResource clientResource = new ClientResource(url);
+			String s = null;
+			try {
+				Representation r = clientResource.get(MediaType.TEXT_URI_LIST);
+				if (r != null) {
+					s = r.getText();
+				}
+			} catch (Exception e) {
+				throw new RuntimeException("Cannot get directories: "+url, e);
+			}
+			if (s != null) {
+				if (s.startsWith(START_REPR)) {
+					childList = getChildListFromRepresentation(s, directoryType);
+				} else if (s.startsWith(CMLConstants.S_LANGLE)){
+					childList = readApacheHtml(url, s, directoryType);
+				}
+			}
+		}
+		return childList;
+	}
+
+	private String processSuccessfulChunkRequest(Response response) {
+		String text = null;
+		try {
+			text = response.getEntity().getText();
+		} catch (IOException e) {
+			throw new RuntimeException("Cannot download chunk", e);
+		}
+		return text;
+	}
+
+	private void processSuccessfulRequest(Response response) {
+		if (MediaType.TEXT_URI_LIST.equals(response.getEntity().getMediaType())) {
+			processUriList(response);
+		}
+		else if (MediaType.TEXT_HTML.equals(response.getEntity().getMediaType())
+				|| MediaType.APPLICATION_XHTML.equals(response.getEntity().getMediaType())
+				|| MediaType.APPLICATION_XHTML_XML.equals(response.getEntity().getMediaType())) {
+			processHtml(response);
+		}
+	}
+
+	private void retryUnsupportedMediaType(org.restlet.Client restletClient,
+			Request request, Response response) {
+		List<Preference<MediaType>> mediaTypes;
+		response.getEntity().release();
+		mediaTypes = new ArrayList<Preference<MediaType>>();
+		mediaTypes.add(new Preference<MediaType>(MediaType.TEXT_HTML));
+		mediaTypes.add(new Preference<MediaType>(MediaType.APPLICATION_XHTML));
+		mediaTypes.add(new Preference<MediaType>(MediaType.ALL, 0.1f));
+		request.getClientInfo().setAcceptedMediaTypes(mediaTypes);
+		
+		response = restletClient.handle(request);
+		if (Status.SUCCESS_OK.equals(response.getStatus())) {
+			processHtml(response);
+		}
+	}
+
+	private void processHtml(Response response) {
+//		System.out.println("html response: "+response);
+	}
+
+	private void processUriList(Response response) {
+//		System.out.println("uri list response: "+response);
+	}
+
+	private List<String> readApacheHtml(String url, String s, DirectoryType directoryType) {
+		/**
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+ <head>
+  <title>Index of /files/public/datasets/HCO-L-Ala-NH2_GAMESS-US_Caesaraugusta_2008/calcs/homolevel/B3LYP_aug-cc-pVDZ</title>
+ </head>
+ <body>
+<h1>Index of /files/public/datasets/HCO-L-Ala-NH2_GAMESS-US_Caesaraugusta_2008/calcs/homolevel/B3LYP_aug-cc-pVDZ</h1>
+<table><tr><th><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=D">Name</a></th><th><a href="?C=M;O=A">Last modified</a></th><th><a href="?C=S;O=A">Size</a></th><th><a href="?C=D;O=A">Description</a></th></tr><tr><th colspan="5"><hr></th></tr>
+
+<tr><td valign="top"><img src="/icons/back.gif" alt="[DIR]"></td><td><a href="/files/public/datasets/HCO-L-Ala-NH2_GAMESS-US_Caesaraugusta_2008/calcs/homolevel/">Parent Directory</a>       </td><td>&nbsp;</td><td align="right">  - </td><td>&nbsp;</td></tr>
+<tr><td valign="top"><img src="/icons/unknown.gif" alt="[   ]"></td><td><a href="B3LYP_aug-cc-pVDZ.dir">B3LYP_aug-cc-pVDZ.dir</a>  </td><td align="right">08-May-2008 16:48  </td><td align="right">1.0K</td><td>&nbsp;</td></tr>
+<tr><td valign="top"><img src="/icons/unknown.gif" alt="[   ]"></td><td><a href="B3LYP_aug-cc-pVDZ_shells.dir">B3LYP_aug-cc-pVDZ_sh..&gt;</a></td><td align="right">09-May-2008 11:50  </td><td align="right">596 </td><td>&nbsp;</td></tr>
+*/
+		List<String> childList = new ArrayList<String>();
+		Document doc = CMLUtil.parseHtmlWithTagSoup(new ByteArrayInputStream(s.getBytes()));
+		Nodes nodes = doc.query("./*[local-name()='html']/*[local-name()='body']/*[local-name()='table']");
+		if (nodes.size() == 1) {
+			HtmlTable table = (HtmlTable) HtmlElement.create((Element)nodes.get(0));
+			HtmlTr header = table.getSingleLeadingTrThChild();
+			if (NAME.equals(header.getTh(1).getValue())) {
+				List<HtmlTr> rows = table.getTrTdRows();
+				for (HtmlTr row : rows) {
+					HtmlTd td = row.getTd(1);
+					String value = td.getValue();
+					if (PARENT_DIRECTORY.equals(value.trim())) {
+						// ignore
+					} else {
+						String linkValue = td.query("./*[local-name()='a']/@href").get(0).getValue();
+						linkValue = url+"/"+linkValue;
+						if (canAddChildToList(directoryType, linkValue)) {
+							childList.add(linkValue);
+						}
+					}
+				}
+			}
+		}
+		return childList;
+	}
+
+	private String getChildString(String url) {
+		String s = null;
+		if (url != null) {
+			ClientResource client = new ClientResource(url);
+			try {
+				Representation r = client.get(MediaType.TEXT_URI_LIST);
+				if (r != null) {
+					s = r.getText();
+				}
+			} catch (Exception e) {
+				throw new RuntimeException("Cannot get directories: "+url, e);
+			}
+		}
+		return s;
+	}
+
+	/**
+	 * this is a messy string (it should really be an array)
+	 * It has to be split at newlines and lines starting with # discarded
+	 * @param s
+	 * @return
+	 */
+	private List<String> getChildListFromRepresentation(String s, DirectoryType directoryType) {
+		List<String> childList = new ArrayList<String>();
+		String[] childArray = s.split(CMLConstants.S_NEWLINE);
+		for (int i = 0; i < childArray.length; i++) {
+			String child = childArray[i].trim();
+			if (canAddChildToList(directoryType, child)) {
+				childList.add(child);
+			}
+		}
+		return childList;
+	}
+
+	private boolean canAddChildToList(DirectoryType directoryType, String child) {
+		boolean add = false;
+		if (child.startsWith(START_REPR)) {
+			// omit
+		} else if (DirectoryType.ALL.equals(directoryType)) {
+			add = true;
+		} else if (DirectoryType.DIRECTORIES.equals(directoryType) && isDirectoryUrl(child)) {
+			add = true;
+		} else if (DirectoryType.NONDIRECTORIES.equals(directoryType) && !isDirectoryUrl(child)) {
+			add = true;
+		}
+		return add;
+	}
+
+	private boolean isDirectoryUrl(String child) {
+		return child.endsWith(DIRECTORY_END);
+	}
+	
+	public Response getHeader(String url) {
+		if (url != null) {
+			org.restlet.Client restletClient = new org.restlet.Client(Protocol.HTTP);
+			Request request = new Request(Method.HEAD, url);
+			lastHeaderResponse = restletClient.handle(request);
+			lastHeaderResponseDate = lastHeaderResponse.getDate();
+			lastHeaderAttributes = lastHeaderResponse.getAttributes();
+			lastHeaderForm = (lastHeaderAttributes.keySet().size() == 1) ?
+					(Form) lastHeaderAttributes.get(lastHeaderAttributes.keySet().iterator().next()) : null;
+			lastHeaderLastModified = lastHeaderForm == null ? null : lastHeaderForm.getFirstValue("Last-Modified");
+			lastHeaderContentType = lastHeaderForm == null ? null : lastHeaderForm.getFirstValue("Content-Type");
+			lastHeaderContentLength = lastHeaderForm == null ? null : lastHeaderForm.getFirstValue("Content-Length");
+		}
+		return lastHeaderResponse;
+	}
+	
+	
+	public String getLastHeaderLastModified() {
+		return lastHeaderLastModified;
+	}
+
+	public String getLastHeaderContentType() {
+		return lastHeaderContentType;
+	}
+
+	public String getLastHeaderContentLength() {
+		return lastHeaderContentLength;
+	}
+
+	public Date getDate() {
+		return lastHeaderResponseDate;
+	}
+
+	public Date getLastModifiedDate() {
+		return (lastHeaderLastModified == null) ? null : new Date(lastHeaderLastModified);
+	}
+
+	public Map<String, Object> getLastHeaderAttributes() {
+		return lastHeaderAttributes;
+	}
+		
+	// =========================================================== //
+
+	public Response getLastHeaderResponse() {
+		return lastHeaderResponse;
+	}
+
+	/**
+	 * puts a single file 
+	 * @param url (absolute: e.g. 
+	 *      "http://greenchain.ch.cam.ac.uk/patents/quixote/a/b/c.txt"
+	 * @param content
+	 * @param mediaType
+	 */
+	public void put(String url, String content, MediaType mediaType) {
+        org.restlet.Client client = new org.restlet.Client(Protocol.HTTP);
+        Request request = new Request(Method.PUT, url);
+        mediaType = MediaType.APPLICATION_OCTET_STREAM;
+        request.setEntity(content, mediaType);
+        Response response = client.handle(request);
+        System.out.println("Upload to "+url+" ... status: "+response.getStatus());
+	}
+
+	/** 
+	 * uploads all files in a directory subtree
+	 * assumes all files are MediaType.TEXT_PLAIN
+	 * creates directory structure on server
+	 * overwrites existing files without warning
+	 * example: using defaultUrl and local directory "/some/where/a" with files:
+	 * a/a.txt
+	 * a/b/b.txt
+	 * a/b/c/c.txt
+	 * would upload and create
+	 * 
+	 * http://greenchain.ch.cam.ac.uk/patents/quixote/a/a.txt
+	 * http://greenchain.ch.cam.ac.uk/patents/quixote/a/b/b.txt
+	 * http://greenchain.ch.cam.ac.uk/patents/quixote/a/b/c/c.txt
+	 * 
+	 * note that any local absolute filename is not preserved
+
+	 * @param dir directory on your client filesystem
+	 */
+	public void upload(File dir) {
+		if (!dir.exists() || !dir.isDirectory()) {
+			throw new RuntimeException("File must be existing directory: "+dir);
+		}
+		List<File> files = (List<File>)FileUtils.listFiles(dir, null, true);
+		for (File file : files) {
+			upload(dir, file, MediaType.TEXT_PLAIN);
+		}
+	}
+	
+	/**
+	 * PUTs the content of a file to the server
+	 * @param file
+	 * @param url (absolute, so of form http://greenchain.ch.cam.ac.uk/patents/quixote/foo/bar.txt)
+	 * @
+	 */
+	public void put(File file, String url, MediaType mediaType) {
+		String content = null;
+		try {
+			content = FileUtils.readFileToString(file);
+		} catch (IOException e) {
+			throw new RuntimeException("Cannot read file ", e);
+		}
+		put(url, content, mediaType);
+	}
+
+	private void upload(File dir, File file, MediaType mediaType) {
+		if (baseUrl == null) {
+			throw new RuntimeException("Must set baseUrl");
+		}
+		String relativeFileName = Util.getRelativeFilename(dir, file, "/");
+		String url = baseUrl+relativeFileName;
+		put(file, url, mediaType);
+	}
+
+	// =========================================================== //
+	/**
+	 * find part of url which is descendant of topUrl
+	 * if topUrl is not ancestor of url, return null
+	 * @param url
+	 * @param topUrl
+	 * @return
+	 */
+	private static String getDeltaUrl(String url, String topUrl) {
+		String delta = null;
+		String[] urlBits = url.split("/");
+		String[] topUrlBits = topUrl.split("/");
+		if (topUrlBits.length <= urlBits.length) {
+			for (int i = 0; i < topUrlBits.length; i++) {
+				if (!topUrlBits[i].equals(urlBits[i])) {
+					return delta;
+				}
+			}
+			StringBuilder deltaBuilder = new StringBuilder();
+			for (int i = topUrlBits.length; i < urlBits.length; i++) {
+				deltaBuilder.append(urlBits[i]);
+				deltaBuilder.append("/");
+			}
+			delta = deltaBuilder.toString();
+		}
+		return delta;
+	}
+
+	// =========================================================== //
+/**
+ * Run routines	
+ * @param args
+ */
+	public static void main(String[] args) {
+		UpDownLoader client = new UpDownLoader();
+		System.out.println("args: "+args.length);
+		if (args.length == 0) {
+			usage();
+		} else {
+			client.run(args);
+		}
+	}
+
+	private void run(String[] args) {
+		int i = 0;
+		String url = null;
+		String dir = null;
+		boolean upload = false;
+		boolean download = false;
+		while (i < args.length) {
+			if (URL.equalsIgnoreCase(args[i])) {
+				url = args[++i]; i++;
+			} else if (DIR.equalsIgnoreCase(args[i])) {
+				dir = args[++i]; i++;
+			} else if (UPLOAD.equalsIgnoreCase(args[i])) {
+				upload = true;
+				i++;
+			} else if (DOWNLOAD.equalsIgnoreCase(args[i])) {
+				download = true;
+				i++;
+			} else {
+				throw new RuntimeException("unknown arg: "+args[i++]);
+			}
+		}
+		if (url == null || dir == null) {
+			throw new RuntimeException("Must give url and dir");
+		}
+		if (download == upload) {
+			throw new RuntimeException("Must give either upload or download");
+		}
+		if (upload) {
+			upload(url, dir);
+		} else if (download) {
+			download(url, dir);
+		}
+	}
+
+	private void upload(String url, String dir) {
+		setBaseUrl(url);
+		upload(new File(dir));
+	}
+
+	private void download(String url, String dir) {
+		downloadDirectory(url, new File(dir));
+	}
+
+	private static void usage() {
+		System.out.println(" args (mandatory): [-download|upload] -url <url> -dir <dir>");
+	}
+
+}

src/test/java/org/xmlcml/cml/rest/ClientTest.java

-package org.xmlcml.cml.rest;
-
-
-import java.io.File;
-import java.util.List;
-
-import junit.framework.Assert;
-
-import org.junit.Ignore;
-import org.junit.Test;
-
-public class ClientTest {
-	@Test
-	public void testUploadDir() {
-		Client client = new Client();
-		File file = new File("src/test/resources/rest");
-		Assert.assertTrue("exists", file.exists());
-		client.upload(new File("src/test/resources/rest"));
-	}
-
-	@Test
-	@Ignore
-	public void testUrlDirectoryTree() {
-		String topUrl = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1";
-		Client client = new Client();
-		List<String> urlList = client.getUrlDirectoryTree(topUrl);
-		for (String url : urlList) {
-			System.out.println(">>"+url);
-		}
-		Assert.assertEquals("urls ", 7, urlList.size());
-		Assert.assertEquals("urls 0", "http://greenchain.ch.cam.ac.uk/patents/quixote/a/", urlList.get(0));
-		Assert.assertEquals("urls 6", "http://greenchain.ch.cam.ac.uk/patents/quixote/b/b.txt", urlList.get(6));
-	}
-
-	@Test
-	@Ignore
-	public void testGetUrlDirectoryTreeContent() {
-		String topUrl = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1";
-		Client client = new Client();
-		List<String> contentList = client.getUrlDirectoryTreeContent(topUrl);
-		for (String content : contentList) {
-			System.out.println("??"+content);
-		}
-		Assert.assertEquals("urls ", 3, contentList.size());
-		Assert.assertEquals("content0 ", 0, "a");
-		Assert.assertEquals("content2 ", 0, "b");
-	}
-	
-	@Test
-	@Ignore
-	public void testDownload() {
-		Client client = new Client();
-		String topUrl = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/";
-		File topDir = new File("target/quixote");
-		topDir.mkdirs();
-		client.downloadDirectory(topUrl, topDir);
-	}
-
-}

src/test/java/org/xmlcml/cml/rest/UpDownLoaderTest.java

+package org.xmlcml.cml.rest;
+
+
+import java.io.File;
+
+import java.util.Date;
+import java.util.List;
+
+import junit.framework.Assert;
+
+import org.apache.commons.io.FileUtils;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.restlet.Response;
+import org.xmlcml.cml.rest.UpDownLoader.DirectoryType;
+
+public class UpDownLoaderTest {
+	@Test
+	public void dummy() {
+		Assert.assertTrue("true", true);
+	}
+	@Test
+	public void testUploadDir() {
+		UpDownLoader uploader = new UpDownLoader();
+		File file = new File("src/test/resources/rest");
+		Assert.assertTrue("exists", file.exists());
+		uploader.upload(new File("src/test/resources/rest"));
+	}
+
+	@Test
+	public void testDownloadFile() throws Exception {
+		Date now = new Date();
+		UpDownLoader downloader = new UpDownLoader();
+		String topUrl = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/aproxcall.log";
+		String topDirName = "target/quixote/files/";
+		File file = downloader.downloadFile(topUrl, topDirName);
+		Assert.assertEquals("name", new File(new File(topDirName), "aproxcall.log").getAbsolutePath(), file.getAbsolutePath());
+		Assert.assertTrue("exists", file.exists());
+		Assert.assertEquals("size", 545405, FileUtils.readFileToByteArray(file).length);
+		Assert.assertTrue("is newer than "+now, FileUtils.isFileNewer(file, now));
+	}
+
+	@Test
+	public void testDownloadFile1() throws Exception {
+		Date now = new Date();
+		UpDownLoader downloader = new UpDownLoader();
+		String topUrl = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/aproxcall.log";
+		File topDir = new File("target/quixote/files");
+		topDir.mkdirs();
+		File file = new File(topDir, "aproxcall-new.log");
+		downloader.downloadFile(topUrl, file);
+		Assert.assertTrue("exists", file.exists());
+		Assert.assertEquals("size", 545405, FileUtils.readFileToByteArray(file).length);
+		Assert.assertTrue("is newer than "+now, FileUtils.isFileNewer(file, now));
+	}
+
+	@Test
+	public void testDownloadDirectory() throws Exception {
+		Date now = new Date();
+		UpDownLoader downloader = new UpDownLoader();
+		String topUrl = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/";
+		File topDir = new File("target/quixote");
+		List<File> fileList = downloader.downloadDirectory(topUrl, topDir);
+		Assert.assertEquals("file count", 14, fileList.size());
+		File file0 = fileList.get(0);
+		Assert.assertTrue("exists", file0.exists());
+		Assert.assertEquals("file name", "aproxcall.log", file0.getName());
+		Assert.assertEquals("file size", 545405, FileUtils.readFileToByteArray(file0).length);
+		Assert.assertTrue("is newer than "+now, FileUtils.isFileNewer(file0, now));
+	}
+
+	@Test
+	public void testDownloadDirectoryChunks() throws Exception {
+		Date now = new Date();
+		UpDownLoader downloader = new UpDownLoader();
+		String topUrl = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/";
+		File topDir = new File("target/quixote/chunks");
+		List<File> fileList = downloader.downloadDirectory(topUrl, topDir, 999);
+		Assert.assertEquals("file count", 14, fileList.size());
+		File file0 = fileList.get(0);
+		Assert.assertTrue("exists", file0.exists());
+		Assert.assertEquals("file name", "aproxcall.log", file0.getName());
+		Assert.assertEquals("file size", 999, FileUtils.readFileToByteArray(file0).length);
+		Assert.assertTrue("is newer than "+now, FileUtils.isFileNewer(file0, now));
+	}
+
+	@Test
+	public void testReadFirstChunk() {
+		UpDownLoader downloader = new UpDownLoader();
+		String topUrl = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/aproxcall.log";
+		String text = downloader.readFirstChunk(topUrl, 100);
+		Assert.assertEquals("read text", 
+				" Entering Gaussian System, Link 0=g03\n"+
+				" Input=aproxcall.gjf\n"+
+				" Output=aproxcall.log\n"+
+				" Initial command:\n ",
+				text);
+	}
+	@Test
+	public void testGetDirectories1() {
+		UpDownLoader downloader = new UpDownLoader();
+		String topUrl = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/";
+		List<String> urls = downloader.getDirectoryTree(topUrl, DirectoryType.DIRECTORIES);
+		Assert.assertEquals("urls ", 
+			"[http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/aproxcall/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/b3lypoptprod/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/eprfmc8/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/fmndensidad/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/freq1reopt3/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/modelHFHC71prom/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/optscan/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/]",
+			urls.toString());
+	}
+
+	@Test
+	public void testGetNonDirectories() {
+		UpDownLoader downloader = new UpDownLoader();
+		String topUrl = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/";
+		List<String> urls = downloader.getDirectoryTree(topUrl, DirectoryType.NONDIRECTORIES);
+		Assert.assertEquals("urls ", 
+			"[http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/aproxcall.log," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/b3lypoptprod.qfi," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/aproxcall/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/b3lypoptprod/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/eprfmc8/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/fmndensidad/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/freq1reopt3/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/modelHFHC71prom/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/optscan/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/eprfmc8.qfi," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/fmndensidad.log," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/freq1reopt3.qfi," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/modelHFHC71prom.log," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/optscan.log]",
+			urls.toString());
+	}
+
+	@Test
+	public void testGetAllFilenames() {
+		UpDownLoader downloader = new UpDownLoader();
+		String topUrl = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/";
+		List<String> urls = downloader.getDirectoryTree(topUrl, DirectoryType.ALL);
+		Assert.assertEquals("urls ", 
+			"[http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/aproxcall.log," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/b3lypoptprod.qfi," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/aproxcall/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/aproxcall/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/b3lypoptprod/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/b3lypoptprod/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/eprfmc8/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/eprfmc8/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/fmndensidad/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/fmndensidad/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/freq1reopt3/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/freq1reopt3/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/modelHFHC71prom/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/modelHFHC71prom/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/optscan/archive.cml," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/optscan/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/cml/," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/eprfmc8.qfi," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/fmndensidad.log," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/freq1reopt3.qfi," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/modelHFHC71prom.log," +
+			" http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/optscan.log]",
+			urls.toString());
+	}
+	
+	@Test
+	public void testGetHeader() {
+		UpDownLoader downloader = new UpDownLoader();
+		String url = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/optscan.log";
+		Response header = downloader.getHeader(url);
+		Assert.assertNotNull(header);
+	}
+	
+	@Test
+	public void testGetLastModifiedDate() {
+		UpDownLoader downloader = new UpDownLoader();
+		String url = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/optscan.log";
+		downloader.getHeader(url);
+		String date = downloader.getLastHeaderLastModified();
+		Assert.assertEquals("date", "Tue, 19 Oct 2010 09:10:46 GMT", date);
+	}
+	
+	@Test
+	public void testGetLastContentLength() {
+		UpDownLoader downloader = new UpDownLoader();
+		String url = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/optscan.log";
+		downloader.getHeader(url);
+		String length = downloader.getLastHeaderContentLength();
+		Assert.assertEquals("length", "573063", length);
+	}
+	
+	@Test
+	public void testGetLastContentType() {
+		UpDownLoader downloader = new UpDownLoader();
+		String url = "http://greenchain.ch.cam.ac.uk/patents/quixote/pablo1/optscan.log";
+		downloader.getHeader(url);
+		String type = downloader.getLastHeaderContentType();
+		Assert.assertEquals("type", "application/octet-stream; charset=US-ASCII", type);
+	}
+	
+	@Test
+	public void testReadFirstNonRestChunk1() {
+		UpDownLoader downloader = new UpDownLoader();
+		String topUrl = "http://neptuno.unizar.es/files/public/datasets/isaias_gaussian/aproxcall.log";
+		String text = downloader.readFirstChunk(topUrl, 100);
+		Assert.assertEquals("read text", 
+				" Entering Gaussian System, Link 0=g03\n"+
+				" Input=aproxcall.gjf\n"+
+				" Output=aproxcall.log\n"+
+				" Initial command:\n ",
+				text);
+	}
+
+	@Test
+	public void testReadFirstNonRestChunk2() {
+		UpDownLoader downloader = new UpDownLoader();
+		String topUrl = "http://neptuno.unizar.es/files/public/datasets/isaias_gaussian/ircfoewaproxcall.log";
+		String text = downloader.readFirstChunk(topUrl, 100);
+		Assert.assertEquals("read text", 
+				" Entering Gaussian System, Link 0=g03\n"+
+				" Input=ircfoewaproxcall.gjf\n"+
+				" Output=ircfoewaproxcall.log\n"+
+				" Init",
+				text);
+	}
+	
+	@Test
+	public void testDownloadNonRestDirectoryChunks() throws Exception {
+		Date now = new Date();
+		UpDownLoader downloader = new UpDownLoader();
+		String topUrl = "http://neptuno.unizar.es/files/public/datasets/isaias_gaussian/";
+		File topDir = new File("target/quixote/nonrestchunks");
+		List<File> fileList = downloader.downloadDirectory(topUrl, topDir, 999);
+		Assert.assertEquals("file count", 8, fileList.size());
+		File file0 = fileList.get(0);
+		Assert.assertTrue("exists", file0.exists());
+		Assert.assertEquals("file name", "aproxcall.log", file0.getName());
+		Assert.assertEquals("file size", 999, FileUtils.readFileToByteArray(file0).length);
+		Assert.assertTrue("is newer than "+now, FileUtils.isFileNewer(file0, now));
+	}
+
+}