tumblr-tools / tumblr.ml

open Http_client.Convenience (* to use this, link 'netclient' from OCAMLPACKS *)
open Xml

exception Bad_tumblr_xml;;

let get_xml url =
  let s = http_get url in
  print_string s;
  flush stdout;;

let get_root_url username = 
  Printf.sprintf "http://%s.tumblr.com" username;;

let api_url_base username = 
  Printf.sprintf "http://%s.tumblr.com/api/read" username;;

let get_tagged_child node name =
  let rec pup list = 
    match list with
      |[]-> raise Bad_tumblr_xml;
      |child::remain->
	 if (Xml.tag child)=name
	 then child
	 else pup remain
  in
    pup (Xml.children node);;

let get_tagged_and_attr_child node tagname attrname attrvalue=
  let rec pup list = 
    match list with
      |[] -> raise Bad_tumblr_xml;
      |child::remain->
	if (Xml.tag child)=tagname & (Xml.attrib child attrname)=attrvalue
	then 
(*	  print_endline (Xml.to_string_fmt child); *)
	  Xml.pcdata (List.nth (Xml.children child) 0)
	else pup remain 
  in
    pup (Xml.children node) 

let save_img id url =
  let rec save_binary file_descriptor start remain data = 
    if remain > 0 then
      let len = Unix.write file_descriptor data start remain in
	save_binary file_descriptor (start+len) (remain-len) data
  in
  let fd =
    Unix.mkdir ("data/"^id) 0o755;
    Unix.openfile (Printf.sprintf "data/%s/%s" id (Filename.basename url)) [Unix.O_WRONLY] 0o644 
  in
  let bin = http_get url in
    save_binary fd 0 (String.length bin) bin;
    Unix.close fd;;

let get_imgurl list post = (* <post>...<post>.xml -> (id, url) *)
  let id = Xml.attrib post "id" in
  let url = get_tagged_and_attr_child post "photo-url" "max-width" "1280" in
    save_img id url;
    print_endline (id ^ " " ^ url ^ " saved.");
    (id, url)::list ;;

let query username offset = 
  let url_base = api_url_base username in
  let get_req = Printf.sprintf "?start=%d&num=50&type=photo" offset in
  let url = url_base ^ get_req in
  let posts_xml = List.nth (Xml.children (Xml.parse_string (http_get url))) 1 in
    List.fold_left get_imgurl [] (Xml.children posts_xml);;

let rec endless username start =
  (query username start)::(endless username (start+50));;

let rec print_id_urls list = 
  match list with 
    | [] -> print_endline "";
    | (id, url)::remain -> 
	print_id_urls remain;;

print_id_urls (query "kuenishi" 0);;


(*
let rec print_urls list = 
  match list with 
    | [] -> print_endline "";
    | url::remain -> 
	print_endline url;
	print_urls remain;;


  print_endline (api_url_base "kuenishi");;
  let posts = List.map (Xml.children posts_xml) in
  print_endline (Xml.tag posts) *)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.