tumblr-tools / tumblr.ml

open Http_client.Convenience (* to use this, link 'netclient' from OCAMLPACKS *)
open Xml

exception Bad_tumblr_xml;;

let api_url_base username = 
  Printf.sprintf "http://%s.tumblr.com/api/read" username;;

let get_tagged_and_attr_child node tagname attrname attrvalue=
  let rec pup list = 
    match list with
      |[] -> raise Bad_tumblr_xml;
	if (Xml.tag child)=tagname & (Xml.attrib child attrname)=attrvalue
(*	  print_endline (Xml.to_string_fmt child); *)
	  Xml.pcdata (List.nth (Xml.children child) 0)
	else pup remain 
    pup (Xml.children node) 

let save_img id url =
  let rec save_binary file_descriptor start remain data = 
    if remain > 0 then
      let len = Unix.write file_descriptor data start remain in
	save_binary file_descriptor (start+len) (remain-len) data
  let fd =
    Unix.mkdir ("data/"^id) 0o755;
    Unix.openfile (Printf.sprintf "data/%s/%s" id (Filename.basename url)) 
      [Unix.O_WRONLY; Unix.O_CREAT; Unix.O_TRUNC] 0o644 
  let bin = http_get url in
    save_binary fd 0 (String.length bin) bin;
    Unix.close fd;;

let get_imgurl list post = (* <post>...<post>.xml -> (id, url) *)
  let id = Xml.attrib post "id" in
  let url = get_tagged_and_attr_child post "photo-url" "max-width" "1280" in
    save_img id url;
    print_endline (id ^ " " ^ url ^ " saved.");
    (id, url)::list ;;

let query username offset = 
  let url_base = api_url_base username in
  let get_req = Printf.sprintf "?start=%d&num=50&type=photo" offset in
  let url = url_base ^ get_req in
  let posts_xml = List.nth (Xml.children (Xml.parse_string (http_get url))) 1 in
    List.fold_left get_imgurl [] (Xml.children posts_xml);;

let rec endless list username start =
  let additional = query username start in
  endless (list @ additional) username (start+50);;

let rec print_id_urls list = 
  match list with 
    | [] -> print_endline "";
    | (id, url)::remain -> 
	print_id_urls remain;;

(* print_id_urls (query "kuenishi" 0);; *)
print_id_urls (endless [] "kuenishi" 50);;

let rec print_urls list = 
  match list with 
    | [] -> print_endline "";
    | url::remain -> 
	print_endline url;
	print_urls remain;;
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.