Commits

Jason Moiron committed 1a151c7

huge refactor to use goquery, redo the way the Updater interface works, separate site implementations into their own files

Comments (0)

Files changed (7)

 	Numberf float64
 }
 
+func (c Chapter) String() string {
+	return fmt.Sprintf("%#v", c)
+}
+
 type Log struct {
 	Timestamp int64
 	Message   string
 
 }
 
+var createTables = map[string]string{
+	"config":    "create table config (key text primary key, value text)",
+	"watchlist": "create table watchlist (name text primary key, chapter text)",
+	"sites":     "create table sites (name text primary key, url text, priority integer, updated integer default 0)",
+	"series":    "create table series (name text, key text, url text primary key, site text, updated integer default 0)",
+	"chapters":  "create table chapters (name text, number text, url text primary key, series text, site text)",
+	"log":       "create table log (message text, timestamp integer default 0)",
+}
+
 func (c *Config) initDb() {
-	tables := []string{
-		"create table config (key text primary key, value text)",
-		"create table watchlist (name text primary key, chapter text)",
-		"create table sites (name text primary key, url text, priority integer, updated integer default 0)",
-		"create table series (name text, key text, url text primary key, site text, updated integer default 0)",
-		"create table chapters (name text, number text, url text primary key, series text, site text)",
-		"create table log (message text, timestamp integer default 0)",
-	}
 	// start a transaction;  sqlite is slow as hell without them
 	tx, _ := c.db.Beginx()
 	defer tx.Commit()
 
 	// create tables
-	for _, t := range tables {
+	for _, t := range createTables {
 		Execf(tx, t)
 	}
 
-// convenient http wrappers for ukiyo
 package main
 
 import (
+	"bufio"
+	"bytes"
+	"errors"
 	"fmt"
-	"io/ioutil"
+	"io"
 	"log"
 	"net/http"
 	"net/url"
+	"os"
+	"path/filepath"
+	"regexp"
 	"strings"
+	"time"
+
+	"github.com/jmoiron/goquery"
 )
 
-var EnableHttpCache = false
+// This file implements an optional caching layer around the http client in
+// the standard library as well as some convenience functions.  The cache is
+// can be implemented as one of many backends, but a FileSystem based one
+// is provided.
+
+var CacheMiss = errors.New("CacheMiss")
+
+type HttpCache interface {
+	Get(key string) (io.Reader, error)
+	Set(key string, data io.Reader) error
+}
+
+// A FilesystemCache implements the HttpCache on the filesystem where keys are
+// files in the BaseDir.  This means that keys might be modified on the way in
+// to make them suitable for filenames.
+type FilesystemCache struct {
+	BaseDir string
+	Timeout time.Duration
+}
+
+// defang replaces any non-pathy characters in a string with -
+func defang(key string) string {
+	re := regexp.MustCompile(`[^-\._a-zA-Z0-9]`)
+	return string(re.ReplaceAll([]byte(key), []byte(`-`)))
+}
+
+func NewFilesystemCache(baseDir string, timeout time.Duration) *FilesystemCache {
+	return &FilesystemCache{BaseDir: baseDir, Timeout: timeout}
+}
+
+func (f FilesystemCache) Key(key string) string {
+	return filepath.Join(f.BaseDir, defang(key))
+}
+
+// Get a key from the cache.
+func (f FilesystemCache) Load(key string) (io.Reader, error) {
+	path := f.Key(key)
+	file, err := os.Open(path)
+	if err != nil && os.IsNotExist(err) {
+		return nil, CacheMiss
+	} else if err != nil {
+		return nil, err
+	}
+	return bufio.NewReader(file), nil
+}
+
+// Set a key from an io.Reader.  Closing the io.Reader is up to the caller.
+func (f FilesystemCache) Set(key string, data io.Reader) error {
+	path := f.Key(key)
+	file, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	_, err = io.Copy(file, data)
+	return err
+}
 
 var httpClient = &http.Client{
 	// keep user-agent:
 	},
 }
 
-// http get which can use an optional cache file to return values from
-func HttpGet(url string, cache ...string) ([]byte, error) {
-	cachefile := ""
-	if len(cache) != 0 {
-		cachefile = cache[0]
+// HttpGetCached gets a url either from upstream or from its cached value.
+// If cache is nil then it will be skipped automatically.
+func HttpGetCached(url string, cache HttpCache) (io.Reader, error) {
+	if cache == nil {
+		return HttpGet(url)
 	}
-
-	var body []byte
-
-	if len(cachefile) > 0 && exists(cachefile) && EnableHttpCache {
-		if opts.Verbose {
-			log.Printf("Reading in cached body for %s (in %s)\n", url, cachefile)
-		}
-		body, _ = ioutil.ReadFile(cachefile)
-	} else {
-		req, err := http.NewRequest("GET", url, nil)
-		if err != nil {
-			return body, err
-		}
-
-		resp, err := httpClient.Do(req)
-		if err != nil {
-			return body, err
-		}
-		defer resp.Body.Close()
-		body, err = ioutil.ReadAll(resp.Body)
-
-		if err != nil {
-			return body, err
-		}
-
-		if len(cachefile) > 0 {
-			err = ioutil.WriteFile(cachefile, body, 0655)
-			if err != nil {
-				fmt.Printf("Error writing out cachefile %s\n", cachefile)
-			}
-		}
+	result, err := cache.Get(url)
+	// If we had an error which wasn't a CacheMiss, log it.
+	if err != nil && err != CacheMiss {
+		log.Printf("Unknown error fetching from cache: %s\n", err)
+	} else if err == nil {
+		// If we had a cache hit, return it.
+		return result, err
 	}
-
-	return body, nil
+	// Cache Miss, fetch over Http.
+	body, err := HttpGet(url)
+	if err != nil {
+		return nil, err
+	}
+	// Make sure the cache is set when this is read:
+	var buf = bytes.NewBuffer([]byte{})
+	result = io.TeeReader(body, buf)
+	go cache.Set(url, buf)
+	return result, nil
 }
 
-// Get a document object, pre-parsed, which must be freed by the caller
-func HttpGetDocument(url string, cache ...string) (*Document, error) {
-	document := new(Document)
-	body, err := HttpGet(url, cache...)
+// HttpGet gets a URL, returning the body as a reader on success, or an error
+// if it encountered a problem.  If you want a version that transparently caches
+// use HttpGetCached.
+func HttpGet(url string) (io.Reader, error) {
+	req, err := http.NewRequest("GET", url, nil)
 	if err != nil {
-		return document, err
+		return nil, err
 	}
-	err = document.FromBytes(body)
+	resp, err := httpClient.Do(req)
 	if err != nil {
-		return document, err
+		return nil, err
 	}
-	return document, nil
+	return resp.Body, nil
 }
 
-// Download a url to a path
+// HttpDownloadTo downloads a url directly to a file.
 func HttpDownloadTo(url, path string) error {
-	body, err := HttpGet(url)
+	reader, err := HttpGet(url)
 	if err != nil {
 		return err
 	}
-	err = ioutil.WriteFile(path, body, 0644)
+	file, err := os.Create(path)
 	if err != nil {
 		return err
 	}
-	return nil
+	_, err = io.Copy(file, reader)
+	return err
 }
 
 // return the base url for a given url string
 	}
 	return strings.Join(ss, "/")
 }
+
+// BaseJoin joins the query to base if the query does not start with "http",
+// otherwise returns the query as is.
+func BaseJoin(base string, query string) string {
+	if strings.HasPrefix(query, "http") {
+		return query
+	}
+	return UrlJoin(base, query)
+}
+
+func Attr(s *goquery.Selection, attr string) string {
+	val, ok := s.Attr(attr)
+	if !ok {
+		log.Printf("Could not find expected attr %s on %s: %s\n", attr, s)
+	}
+	return val
+}
+package main
+
+import (
+	"log"
+	"strings"
+
+	"github.com/jmoiron/goquery"
+)
+
+type Mangahere struct {
+	*Site
+}
+
+var _ Updater = &Mangahere{}
+
+func (m *Mangahere) SeriesListPath() string { return UrlJoin(m.Url, "/mangalist/") }
+func (m *Mangahere) Cache() HttpCache       { return nil }
+func (m *Mangahere) Name() string           { return m.Site.Name }
+
+func (m *Mangahere) UpdateSeriesList(doc *goquery.Document) []*Series {
+	series := make([]*Series, 0, 10)
+
+	for _, a := range doc.Find("div.list_manga li a").All() {
+		s := &Series{
+			Site: m.Name(),
+			Name: trim(a.Text()),
+			Url:  BaseJoin(m.Url, trim(Attr(a, "href"))),
+		}
+
+		spl := strings.Split(strings.TrimRight(s.Url, "/"), "/")
+		s.Key = spl[len(spl)-1]
+		series = append(series, s)
+	}
+	log.Printf("Found %d series for mangahere\n", len(series))
+	return series
+}
+
+func (m *Mangahere) UpdateChapters(series *Series) []*Chapter {
+	// always print when we do this
+	log.Printf("Updating %s for series %s\n", m.Name(), series.Name)
+
+	document, err := GetDocument(m, series.Url)
+	if err != nil {
+		return []*Chapter{}
+	}
+	base := BaseUrl(series.Url)
+
+	chapters := make([]*Chapter, 0, 10)
+	for _, a := range document.Find(".detail_list li a").All() {
+		c := &Chapter{
+			Site:   series.Site,
+			Series: series.Name,
+			Name:   trim(a.Text()),
+			Url:    BaseJoin(base, trim(Attr(a, "href"))),
+		}
+
+		spl := strings.Split(strings.TrimRight(c.Url, "/"), "/")
+		c.Number = FindNumber(spl[len(spl)-1])
+		if len(c.Number) == 0 {
+			c.Number = FindNumber(c.Name)
+		} else {
+			// remove leading junk
+			c.Number = strings.TrimLeft(c.Number, " \t0")
+		}
+
+		log.Println(c)
+		chapters = append(chapters, c)
+	}
+	vprintf("Found %d chapters on mangahere\n", len(chapters))
+	return chapters
+}
+
+func (m *Mangahere) GetPageUrls(doc *goquery.Document) []string {
+	options := doc.Find(".readpage_top .go_page option").All()[1:]
+	urls := make([]string, 0, len(options))
+	base := BaseUrl(doc.Url.String())
+
+	for _, o := range options {
+		fragment := Attr(o, "value")
+		urls = append(urls, BaseJoin(base, fragment))
+	}
+
+	return urls
+}
+
+func (m *Mangahere) GetImageUrl(doc *goquery.Document) string {
+	return Attr(doc.Find("#image"), "src")
+}
 package main
 
 import (
-	"bytes"
 	"fmt"
 	"log"
 	"regexp"
 	"strings"
 	"time"
-	"unsafe"
-
-	"github.com/moovweb/gokogiri"
-	"github.com/moovweb/gokogiri/css"
-	"github.com/moovweb/gokogiri/html"
-	"github.com/moovweb/gokogiri/xml"
-	"github.com/moovweb/gokogiri/xpath"
 )
 
 var numRegex = regexp.MustCompile("(\\d+(?:\\.\\d+)?)")
 // Find a number in a string
 func FindNumber(str string) string {
 	groups := numRegex.FindStringSubmatch(str)
+	fmt.Println("FindNumber: ", groups)
 	if groups == nil {
 		return ""
 	}
 	if timestamp == 0 {
 		return "never"
 	}
-	t := time.Unix(timestamp, 0)
-	return t.Format(time.UnixDate)
+	return time.Unix(timestamp, 0).Format(time.UnixDate)
 }
 
 func FileExtension(str string) string {
 	return ext
 }
 
-func tick() { fmt.Printf("%s\n", time.Now().String()) }
-
-// Selectable implements a simple interface which allows to get the inner text
-// of some element as well as run a CSS select on it and get a list of nodes
-type Selectable interface {
-	CssSelect(selector string) []Node
-	Text() string
-}
-
-// A node wrapper, in order to provide a similar interface in the future
-// possibly without gokogiri
-type Node struct {
-	doc  *html.HtmlDocument
-	ptr  unsafe.Pointer
-	node xml.Node
-}
-
-// A Document wrapper, which can be Freed and Selected, and exposes
-// the root as a Node object with the Root field
-type Document struct {
-	doc    *html.HtmlDocument
-	docptr unsafe.Pointer
-	Root   Node
-}
-
-// Fill a Node element from a ptr
-func (n *Node) fromPtr(ptr unsafe.Pointer, doc *html.HtmlDocument) {
-	n.ptr = ptr
-	n.doc = doc
-	n.node = xml.NewNode(ptr, doc)
-}
-
-// Fill a Node element from an xml.Node
-func (n *Node) fromNode(node xml.Node, doc *html.HtmlDocument) {
-	n.ptr = node.NodePtr()
-	n.node = node
-	n.doc = doc
-}
-
-func (n *Node) CssSelect(selector string) []Node {
-	xpathexpr := css.Convert(selector, 0)
-	expr := xpath.Compile(xpathexpr)
-	nxp := xpath.NewXPath(n.ptr)
-	defer nxp.Free()
-	nodes, err := nxp.EvaluateAsNodeset(n.ptr, expr)
-	if err != nil {
-		log.Printf("Could not evaluate xpath expr: %s", err)
-		return []Node{}
-	}
-	ret := make([]Node, len(nodes))
-	for i, ptr := range nodes {
-		ret[i].fromPtr(ptr, n.doc)
-	}
-	return ret
-}
-
-func (n *Node) Text() string {
-	return n.node.Content()
-}
-
-func (n *Node) Attr(attr string) string {
-	return n.node.Attr(attr)
-}
-
-func (d *Document) CssSelect(selector string) []Node {
-	return d.Root.CssSelect(selector)
-}
-
-func (d *Document) Text() string {
-	return ""
-}
-
-func (d *Document) FromString(str string) error {
-	buff := bytes.NewBufferString(str)
-	bites := buff.Bytes()
-	return d.FromBytes(bites)
-}
-
-func (d *Document) FromBytes(str []byte) error {
-	doc, err := gokogiri.ParseHtml(str)
-	if err != nil {
-		return err
-	}
-	if doc == nil {
-		return fmt.Errorf("Nil document returned")
-	}
-	d.doc = doc
-	d.Root.fromNode(doc.Root(), doc)
-	d.docptr = doc.DocPtr()
-	return nil
-}
+func tick() { log.Printf("\n") }
 
-func (d *Document) Free() {
-	d.doc.Free()
+func trim(s string) string {
+	return strings.Trim(s, " \t\r\n")
 }
+package main
+
+import (
+	"log"
+	"strings"
+
+	"github.com/jmoiron/goquery"
+)
+
+func init() {
+	// TODO: register the starkana updater
+}
+
+type Starkana struct {
+	*Site
+}
+
+func (s *Starkana) SeriesListPath() string { return UrlJoin(s.Url, "/manga/list/") }
+func (s *Starkana) Cache() HttpCache       { return nil }
+func (s *Starkana) Name() string           { return s.Site.Name }
+
+func (s *Starkana) GetPageUrls(doc *goquery.Document) []string {
+	options := doc.Find("select#page_switch option").All()
+	urls := make([]string, 0, len(options)-1)
+
+	base := BaseUrl(doc.Url.String())
+	for _, o := range options[1:] {
+		u := Attr(o, "value")
+		if len(u) > 0 {
+			urls = append(urls, UrlJoin(base, u))
+		}
+	}
+	return urls
+}
+
+func (s *Starkana) GetImageUrl(doc *goquery.Document) string {
+	return Attr(doc.Find("#pic img"), "src")
+}
+
+func (s *Starkana) UpdateChapters(series *Series) []*Chapter {
+	// always print what we're doing when we do this
+	log.Printf("Updating %s for series %s\n", s.Name(), series.Name)
+
+	document, err := GetDocument(s, series.Url)
+	if err != nil {
+		return []*Chapter{}
+	}
+	base := BaseUrl(series.Url)
+
+	results := make([]*Chapter, 0, 10)
+	for _, a := range document.Find("a.download-link").All() {
+		c := &Chapter{
+			Site:   series.Site,
+			Series: series.Name,
+			Name:   trim(a.Text()),
+			Url:    BaseJoin(base, trim(Attr(a, "href"))),
+		}
+		numElem := a.Find("strong").All()
+		if len(numElem) > 0 {
+			c.Number = numElem[0].Text()
+		}
+		results = append(results, c)
+	}
+	vprintf("Found %d chapters on starkana\n", len(results))
+	return results
+}
+
+func (s *Starkana) UpdateSeriesList(doc *goquery.Document) []*Series {
+	series := make([]*Series, 0, 10)
+	for _, a := range doc.Find("#inner_page >div a").All() {
+		r := &Series{}
+		r.Site = s.Site.Name
+		r.Name = trim(a.Text())
+		r.Url = UrlJoin(s.Site.Url, Attr(a, "href"))
+		spl := strings.Split(r.Url, "/")
+		r.Key = spl[len(spl)-1]
+		r.Url = r.Url + "?mature_confirm=1"
+
+		series = append(series, r)
+	}
+
+	log.Printf("Found %d series for starkana\n", len(series))
+	return series
+}
+
+var _ Updater = new(Starkana)
 
 import (
 	"fmt"
-	"github.com/jmoiron/go-pkg-optarg"
+	"log"
 	"os"
 	"runtime/pprof"
 	"strconv"
 	"strings"
+
+	"github.com/jmoiron/go-pkg-optarg"
 )
 
 const VERSION = "0.1b"
 	Search               bool
 	Show                 bool
 	Log                  bool
+	Clear                bool
 	Filter               *Filter
 }
 
 var opts Options
 
 // print only if verbose is on
-func vPrintf(s string, x ...interface{}) {
+func vprintf(s string, x ...interface{}) {
 	if opts.Verbose {
-		fmt.Printf(s, x...)
+		log.Printf(s, x...)
 	}
 }
 
 			fmt.Println(err)
 			return
 		}
-		vPrintf("Activating Profiling...\n")
+		vprintf("Activating Profiling...\n")
 		pprof.StartCPUProfile(f)
-		vPrintf("Profiling activated.\n")
+		vprintf("Profiling activated.\n")
 		defer pprof.StopCPUProfile()
 	}
 
 		return
 	}
 
+	if opts.Clear {
+		ClearChapters()
+		return
+	}
+
 	if opts.ListSites {
 		ListSites()
 		return
 	}
 }
 
+func ClearChapters() {
+	tx := config.db.MustBegin()
+	Execf(tx, "drop table chapters;")
+	Execf(tx, createTables["chapters"])
+	//Execf(tx, "drop table series;")
+	//Execf(tx, createTables["series"])
+	tx.Commit()
+}
+
 func Show(terms ...string) {
 	UpdateSites()
 	series, err := FindOneSeries(true, terms...)
 	optarg.Add("", "version", "Show version and exit.", false)
 	optarg.Add("v", "verbose", "Show more output.", false)
 	optarg.Add("", "profile", "Profile ukiyo.", false)
+	optarg.Add("", "clear", "Clear cached site data.", false)
 
 	optarg.Header("Downloading")
 	optarg.Add("u", "update", "Update all site & series info.", false)
 			opts.ToggleUseUnderscores = opt.Bool()
 		case "log":
 			opts.Log = opt.Bool()
+		case "clear":
+			opts.Clear = opt.Bool()
 		// sites
 		case "sites":
 			opts.ListSites = opt.Bool()
 
 import (
 	"fmt"
+	"io"
+	"log"
+	"net/url"
 	"os"
 	"path/filepath"
 	"strings"
 	"time"
+
+	"github.com/jmoiron/goquery"
 )
 
 // update once per week unless forced
-var SITE_UPDATE_FREQUENCY = int64(86400) * 7
+var SiteUpdateFrequency = int64(86400) * 7
 
 // update once per day unless forced
-var CHAPTER_UPDATE_FREQUENCY = int64(86400)
-var MAX_CONCURRENT_WORKERS = 3
+var ChapterUpdateFrequency = int64(86400)
+var MaxConcurrentWorkers = 3
+
+type Updater interface {
+	// SeriesListPath returns a url path for the manga series list.  If the site
+	// lacks such a path, it should return the empty string.
+	SeriesListPath() string
+	// Cache returns the active cache for this updater.  If the return is nil
+	// caching is disabled.
+	Cache() HttpCache
+	// Name returns the name of this updater.
+	Name() string
+	// UpdateChapters returns a list of Chapters for the series being updated.
+	UpdateChapters(series *Series) []*Chapter
+	// UpdateSeriesList takes a Document and returns a list of Series from it.
+	UpdateSeriesList(*goquery.Document) []*Series
+	// GetPageUrls returns a list of the urls (as strings) on the page.
+	GetPageUrls(*goquery.Document) []string
+	// GetImageUrl returns the url of the image on the Document, which is a page.
+	GetImageUrl(*goquery.Document) string
+}
 
+// UpdateSeries uses an updater to load the cache of series at that site, if the
+// site has a list of series available.  This allows local searching in sql, which
+// makes searching much faster and hits the upstream site less.  Returns an empty
+// list when SeriesListPath() returns the empty string.
 func UpdateSeries(u Updater) []*Series {
-	url := u.SeriesListPath()
-	vPrintf("Updating via %s\n", url)
+	seriesUrl := u.SeriesListPath()
+	// If this updater doesn't have a series list, make a note and continue.
+	if len(seriesUrl) == 0 {
+		log.Printf("Skipping %s series list: search interface only.\n", u.Name())
+		return []*Series{}
+	}
+
+	vprintf("Updating %s via %s\n", u.Name(), seriesUrl)
 
-	cachefile := u.CachePath()
-	document, err := HttpGetDocument(url, cachefile)
+	var err error
+	var body io.Reader
+	cache := u.Cache()
+	if cache != nil {
+		body, err = HttpGetCached(seriesUrl, cache)
+	} else {
+		body, err = HttpGet(seriesUrl)
+	}
+
+	if err != nil {
+		log.Printf("Error fetching %s series list: %s\n", u.Name(), err)
+		return []*Series{}
+	}
+
+	document, err := goquery.NewDocumentFromReader(body)
 	if err != nil {
-		fmt.Printf("Error fetching site %s\n", u.Name())
+		log.Printf("Error parsing %s series list: %s\n", u.Name(), err)
 		return []*Series{}
 	}
-	defer document.Free()
+	document.Url, err = url.Parse(seriesUrl)
+	if err != nil {
+		log.Fatalf("Invalid URL `%s` loaded without error, but parsing failed: %s\n", seriesUrl, err)
+	}
 	return u.UpdateSeriesList(document)
 }
 
-type Updater interface {
-	SeriesListPath() string
-	CachePath() string
-	Name() string
-	UpdateChapters(series *Series) []*Chapter
-	UpdateSeriesList(doc *Document) []*Series
-	GetPageUrls(string, *Document) []string
-	GetImageUrl(*Document) string
+func GetDocument(u Updater, Url string) (*goquery.Document, error) {
+	result, err := HttpGetCached(Url, u.Cache())
+	if err != nil {
+		log.Printf("Error getting %s from %s\n", Url, u.Name())
+		return nil, err
+	}
+
+	document, err := goquery.NewDocumentFromReader(result)
+	if err != nil {
+		log.Printf("Error parsing %s from %s\n", Url, u.Name())
+		return nil, err
+	}
+	document.Url, _ = url.Parse(Url)
+	return document, nil
 }
 
-var Updaters = map[string]Updater{}
+var UpdaterRegistry = map[string]Updater{}
 
 func init() {
 	sites := []*Site{}
 			u = &Mangahere{v}
 		}
 		if u != nil {
-			Updaters[v.Name] = u
+			UpdaterRegistry[v.Name] = u
 		}
 	}
 }
 	"mangafox":    "/manga/",
 }
 
-type Starkana struct{ *Site }
-
-func (s *Starkana) SeriesListPath() string { return UrlJoin(s.Site.Url, "/manga/list/") }
-func (s *Starkana) CachePath() string      { return "./cache/" + s.Site.Name + ".html" }
-func (s *Starkana) Name() string           { return s.Site.Name }
-
-func (s *Starkana) GetPageUrls(url string, doc *Document) []string {
-	options := doc.CssSelect("select#page_switch option")
-	if len(options) < 1 {
-		return []string{}
-	}
-	urls := make([]string, len(options)-1)
-	base := BaseUrl(url)
-
-	for i, o := range options[1:] {
-		urls[i] = UrlJoin(base, o.Attr("value"))
-	}
-	return urls
-}
-
-func (s *Starkana) GetImageUrl(doc *Document) string {
-	img := doc.CssSelect("#pic img")
-	if len(img) == 0 {
-		return ""
-	}
-	return img[0].Attr("src")
-}
-
-func (s *Starkana) UpdateChapters(series *Series) []*Chapter {
-	// always print what we're doing when we do this
-	fmt.Printf("Updating starkana for series %s\n", series.Name)
-
-	cachefile := fmt.Sprintf("./cache/%s-%s.html", series.Name, series.Site)
-	document, err := HttpGetDocument(series.Url, cachefile)
-	if err != nil {
-		fmt.Printf("Error getting %s\n", series.Name)
-		return []*Chapter{}
-	}
-	defer document.Free()
-	base := BaseUrl(series.Url)
-
-	anchors := document.CssSelect("a.download-link")
-	results := make([]*Chapter, len(anchors))
-	for i, anchor := range anchors {
-		c := &Chapter{}
-		c.Site = series.Site
-		c.Series = series.Name
-		c.Name = strings.Trim(anchor.Text(), " \t\r\n")
-		c.Url = strings.Trim(anchor.Attr("href"), " \t\r\n")
-		if !strings.HasPrefix(c.Url, "http") {
-			c.Url = UrlJoin(base, c.Url)
-		}
-		numelem := anchor.CssSelect("strong")
-		if len(numelem) > 0 {
-			c.Number = numelem[0].Text()
-		}
-		results[i] = c
-	}
-	vPrintf("Found %d chapters on starkana\n", len(results))
-	return results
-}
-
-func (s *Starkana) UpdateSeriesList(doc *Document) []*Series {
-	series := doc.CssSelect("#inner_page >div a")
-	results := make([]*Series, len(series))
-	for i, anchor := range series {
-		r := &Series{}
-		r.Site = s.Site.Name
-		r.Name = strings.Trim(anchor.Text(), "\t ")
-		r.Url = UrlJoin(s.Site.Url, anchor.Attr("href"))
-		spl := strings.Split(r.Url, "/")
-		r.Key = spl[len(spl)-1]
-		r.Url = r.Url + "?mature_confirm=1"
-		results[i] = r
-	}
-
-	fmt.Printf("Found %d series for starkana\n", len(results))
-	return results
-}
-
-type Mangahere struct{ *Site }
-
-func (m *Mangahere) SeriesListPath() string { return UrlJoin(m.Site.Url, "/mangalist/") }
-func (m *Mangahere) CachePath() string      { return "./cache/" + m.Site.Name + ".html" }
-func (m *Mangahere) Name() string           { return m.Site.Name }
-
-func (m *Mangahere) UpdateSeriesList(doc *Document) []*Series {
-	series := doc.CssSelect("div.list_manga li a")
-	results := make([]*Series, len(series))
-
-	for i, anchor := range series {
-		r := &Series{}
-		r.Site = m.Site.Name
-		r.Name = strings.Trim(anchor.Text(), " \t")
-		r.Url = strings.Trim(anchor.Attr("href"), " \t")
-		if !strings.HasPrefix(r.Url, "http") {
-			r.Url = UrlJoin(m.Site.Url, r.Url)
-		}
-		url := strings.TrimRight(r.Url, "/")
-		spl := strings.Split(url, "/")
-		r.Key = spl[len(spl)-1]
-		results[i] = r
-	}
-	fmt.Printf("Found %d series for mangahere\n", len(results))
-	return results
-}
-
-func (m *Mangahere) UpdateChapters(series *Series) []*Chapter {
-	// always print when we do this
-	fmt.Printf("Updating mangahere for series %s\n", series.Name)
-
-	cachefile := fmt.Sprintf("./cache/%s-%s.html", series.Name, series.Site)
-	document, err := HttpGetDocument(series.Url, cachefile)
-	if err != nil {
-		fmt.Printf("Error getting %s\n", series.Name)
-		return []*Chapter{}
-	}
-	defer document.Free()
-	base := BaseUrl(series.Url)
-
-	anchors := document.CssSelect(".detail_list li a")
-	results := make([]*Chapter, len(anchors))
-	for i, anchor := range anchors {
-		c := &Chapter{}
-		c.Site = series.Site
-		c.Series = series.Name
-		c.Name = strings.Trim(anchor.Text(), " \t\n\r")
-		c.Url = strings.Trim(anchor.Attr("href"), " \t\n\r")
-
-		spl := strings.Split(strings.TrimRight(c.Url, "/"), "/")
-		c.Number = FindNumber(spl[len(spl)-1])
-		if len(c.Number) == 0 {
-			c.Number = FindNumber(c.Name)
-		} else {
-			// remove leading junk
-			c.Number = strings.TrimLeft(c.Number, " \t0")
-		}
-		if !strings.HasPrefix(c.Url, "http") {
-			c.Url = UrlJoin(base, c.Url)
-		}
-		results[i] = c
-	}
-	vPrintf("Found %d chapters on mangahere\n", len(results))
-	return results
-}
-
-func (m *Mangahere) GetPageUrls(url string, doc *Document) []string {
-	options := doc.CssSelect(".readpage_top .go_page option")
-	if len(options) < 1 {
-		return []string{}
-	}
-	urls := make([]string, 0, len(options))
-	base := BaseUrl(url)
-
-	for _, o := range options[1:] {
-		fragment := o.Attr("value")
-		if strings.HasPrefix(fragment, "http") {
-			urls = append(urls, fragment)
-		} else {
-			urls = append(urls, UrlJoin(base, fragment))
-		}
-	}
-	return urls
-}
-
-func (m *Mangahere) GetImageUrl(doc *Document) string {
-	img := doc.CssSelect("#image")
-	if len(img) == 0 {
-		return ""
-	}
-	return img[0].Attr("src")
-}
-
 func UpdateSites(force ...bool) {
 	Force := DefaultFalse(force...)
 
 	now := time.Now().Unix()
 	after := now
 	if !Force {
-		after -= SITE_UPDATE_FREQUENCY
+		after -= SiteUpdateFrequency
 	}
 
 	q := "select name, url, priority, updated from sites WHERE updated < ? ORDER BY priority"
 	}
 
 	if !Force {
-		vPrintf("Updating %d sites last updated over 1 week ago:\n", len(sites))
+		vprintf("Updating %d sites last updated over 1 week ago:\n", len(sites))
 	} else {
-		vPrintf("Force-updating %d sites:\n", len(sites))
+		vprintf("Force-updating %d sites:\n", len(sites))
 	}
 
-	sem := make(chan bool, MAX_CONCURRENT_WORKERS)
+	sem := make(chan bool, MaxConcurrentWorkers)
 	results := []*Series{}
 
 	for _, s := range sites {
 				}
 				<-sem
 			}()
-			updater, ok := Updaters[site.Name]
+			updater, ok := UpdaterRegistry[site.Name]
 			if !ok {
 				fmt.Printf("Unknown site-name %s, skipping update.\n", site.Name)
 				return
 
 	tx, _ := config.db.Beginx()
 
-	vPrintf("Received %d total results\n", len(results))
+	vprintf("Received %d total results\n", len(results))
 
 	q = `insert or replace into series 
 		(name, key, url, site, updated) values 
 	now := time.Now().Unix()
 	after := now
 	if !Force {
-		after -= CHAPTER_UPDATE_FREQUENCY
+		after -= ChapterUpdateFrequency
 	}
 
 	series := []*Series{}
 	}
 	if len(series) > 0 {
 		if !Force {
-			vPrintf("Updating %d sites for %s last updated over 1 week ago:\n", len(series), name)
+			vprintf("Updating %d sites for %s last updated over 1 week ago:\n", len(series), name)
 		} else {
-			vPrintf("Force-updating %d sites for %s:\n", len(series), name)
+			vprintf("Force-updating %d sites for %s:\n", len(series), name)
 		}
 		for _, s := range series {
-			vPrintf("Updating %s (@%s, %s)\n", s.Name, s.Site, s.Url)
+			vprintf("Updating %s (@%s, %s)\n", s.Name, s.Site, s.Url)
 		}
 
-		sem := make(chan bool, MAX_CONCURRENT_WORKERS)
+		sem := make(chan bool, MaxConcurrentWorkers)
 		results := []*Chapter{}
 
 		for _, s := range series {
 					}
 					<-sem
 				}()
-				updater, ok := Updaters[s.Site]
+				updater, ok := UpdaterRegistry[s.Site]
 				if !ok {
 					fmt.Printf("Unknown site-name %s, skipping update.\n", s.Site)
 					return
 
 		tx, _ := config.db.Beginx()
 
-		vPrintf("Received %d total results\n", len(results))
+		vprintf("Received %d total results\n", len(results))
 
 		for _, s := range series {
 			Execf(tx, "delete from chapters where name=? and site=?", name, s.Site)
 
 func DownloadChapter(chapter *Chapter) error {
 	site, url := SelectUrl(chapter)
-	vPrintf(" %s %s (%s, %s)\n", chapter.Series, chapter.Number, site, url)
-	updater := Updaters[site]
+	vprintf(" %s %s (%s, %s)\n", chapter.Series, chapter.Number, site, url)
+	updater := UpdaterRegistry[site]
 
-	doc, err := HttpGetDocument(url)
+	doc, err := GetDocument(updater, url)
 	if err != nil {
 		fmt.Printf("Error fetching `%s`: %s\n", url, err)
 		return err
 			fmt.Sprintf("%s-c%s.zip", series, chapter.Number))
 	}
 
-	page_urls := updater.GetPageUrls(url, doc)
+	page_urls := updater.GetPageUrls(doc)
 	numwidth := len(fmt.Sprintf("%d", len(page_urls)))
 	numfmt := fmt.Sprintf("%%0%dd", numwidth)
 	// fmt.Printf("Making destination dir: %s", destpath)
 	os.MkdirAll(destpath, 0755)
 	// fmt.Printf("Number format: %s\n", numfmt)
 
-	vPrintf("Page Urls: %v\n", page_urls)
+	vprintf("Page Urls: %v\n", page_urls)
 	update := fmt.Sprintf("Downloading %s %s (from %s): %%d of %%d", chapter.Series, chapter.Number, site)
 
 	type Img struct {
 		url string
 	}
 
-	sem := make(chan bool, MAX_CONCURRENT_WORKERS)
+	sem := make(chan bool, MaxConcurrentWorkers)
 	images := make(chan Img, len(page_urls))
 	completed := make(chan int)
 	// send the first image on the images channel
 		sem <- true
 		go func(num int, url string) {
 			defer func() { <-sem }()
-			doc, err := HttpGetDocument(url)
+			doc, err := GetDocument(updater, url)
 			if err != nil {
 				fmt.Printf("Error fetching page %03d (%s)\n", num, url)
 				return