Commits

Jason Moiron  committed 1a151c7

huge refactor to use goquery, redo the way the Updater interface works, separate site implementations into their own files

  • Participants
  • Parent commits f5daa76

Comments (0)

Files changed (7)

 	Numberf float64
 }
 
+func (c Chapter) String() string {
+	return fmt.Sprintf("%#v", c)
+}
+
 type Log struct {
 	Timestamp int64
 	Message   string
 
 }
 
+var createTables = map[string]string{
+	"config":    "create table config (key text primary key, value text)",
+	"watchlist": "create table watchlist (name text primary key, chapter text)",
+	"sites":     "create table sites (name text primary key, url text, priority integer, updated integer default 0)",
+	"series":    "create table series (name text, key text, url text primary key, site text, updated integer default 0)",
+	"chapters":  "create table chapters (name text, number text, url text primary key, series text, site text)",
+	"log":       "create table log (message text, timestamp integer default 0)",
+}
+
 func (c *Config) initDb() {
-	tables := []string{
-		"create table config (key text primary key, value text)",
-		"create table watchlist (name text primary key, chapter text)",
-		"create table sites (name text primary key, url text, priority integer, updated integer default 0)",
-		"create table series (name text, key text, url text primary key, site text, updated integer default 0)",
-		"create table chapters (name text, number text, url text primary key, series text, site text)",
-		"create table log (message text, timestamp integer default 0)",
-	}
 	// start a transaction;  sqlite is slow as hell without them
 	tx, _ := c.db.Beginx()
 	defer tx.Commit()
 
 	// create tables
-	for _, t := range tables {
+	for _, t := range createTables {
 		Execf(tx, t)
 	}
 
-// convenient http wrappers for ukiyo
 package main
 
 import (
+	"bufio"
+	"bytes"
+	"errors"
 	"fmt"
-	"io/ioutil"
+	"io"
 	"log"
 	"net/http"
 	"net/url"
+	"os"
+	"path/filepath"
+	"regexp"
 	"strings"
+	"time"
+
+	"github.com/jmoiron/goquery"
 )
 
-var EnableHttpCache = false
+// This file implements an optional caching layer around the http client in
+// the standard library as well as some convenience functions.  The cache is
+// can be implemented as one of many backends, but a FileSystem based one
+// is provided.
+
+var CacheMiss = errors.New("CacheMiss")
+
+type HttpCache interface {
+	Get(key string) (io.Reader, error)
+	Set(key string, data io.Reader) error
+}
+
+// A FilesystemCache implements the HttpCache on the filesystem where keys are
+// files in the BaseDir.  This means that keys might be modified on the way in
+// to make them suitable for filenames.
+type FilesystemCache struct {
+	BaseDir string
+	Timeout time.Duration
+}
+
+// defang replaces any non-pathy characters in a string with -
+func defang(key string) string {
+	re := regexp.MustCompile(`[^-\._a-zA-Z0-9]`)
+	return string(re.ReplaceAll([]byte(key), []byte(`-`)))
+}
+
+func NewFilesystemCache(baseDir string, timeout time.Duration) *FilesystemCache {
+	return &FilesystemCache{BaseDir: baseDir, Timeout: timeout}
+}
+
+func (f FilesystemCache) Key(key string) string {
+	return filepath.Join(f.BaseDir, defang(key))
+}
+
+// Get a key from the cache.
+func (f FilesystemCache) Load(key string) (io.Reader, error) {
+	path := f.Key(key)
+	file, err := os.Open(path)
+	if err != nil && os.IsNotExist(err) {
+		return nil, CacheMiss
+	} else if err != nil {
+		return nil, err
+	}
+	return bufio.NewReader(file), nil
+}
+
+// Set a key from an io.Reader.  Closing the io.Reader is up to the caller.
+func (f FilesystemCache) Set(key string, data io.Reader) error {
+	path := f.Key(key)
+	file, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	_, err = io.Copy(file, data)
+	return err
+}
 
 var httpClient = &http.Client{
 	// keep user-agent:
 	},
 }
 
-// http get which can use an optional cache file to return values from
-func HttpGet(url string, cache ...string) ([]byte, error) {
-	cachefile := ""
-	if len(cache) != 0 {
-		cachefile = cache[0]
+// HttpGetCached gets a url either from upstream or from its cached value.
+// If cache is nil then it will be skipped automatically.
+func HttpGetCached(url string, cache HttpCache) (io.Reader, error) {
+	if cache == nil {
+		return HttpGet(url)
 	}
-
-	var body []byte
-
-	if len(cachefile) > 0 && exists(cachefile) && EnableHttpCache {
-		if opts.Verbose {
-			log.Printf("Reading in cached body for %s (in %s)\n", url, cachefile)
-		}
-		body, _ = ioutil.ReadFile(cachefile)
-	} else {
-		req, err := http.NewRequest("GET", url, nil)
-		if err != nil {
-			return body, err
-		}
-
-		resp, err := httpClient.Do(req)
-		if err != nil {
-			return body, err
-		}
-		defer resp.Body.Close()
-		body, err = ioutil.ReadAll(resp.Body)
-
-		if err != nil {
-			return body, err
-		}
-
-		if len(cachefile) > 0 {
-			err = ioutil.WriteFile(cachefile, body, 0655)
-			if err != nil {
-				fmt.Printf("Error writing out cachefile %s\n", cachefile)
-			}
-		}
+	result, err := cache.Get(url)
+	// If we had an error which wasn't a CacheMiss, log it.
+	if err != nil && err != CacheMiss {
+		log.Printf("Unknown error fetching from cache: %s\n", err)
+	} else if err == nil {
+		// If we had a cache hit, return it.
+		return result, err
 	}
-
-	return body, nil
+	// Cache Miss, fetch over Http.
+	body, err := HttpGet(url)
+	if err != nil {
+		return nil, err
+	}
+	// Make sure the cache is set when this is read:
+	var buf = bytes.NewBuffer([]byte{})
+	result = io.TeeReader(body, buf)
+	go cache.Set(url, buf)
+	return result, nil
 }
 
-// Get a document object, pre-parsed, which must be freed by the caller
-func HttpGetDocument(url string, cache ...string) (*Document, error) {
-	document := new(Document)
-	body, err := HttpGet(url, cache...)
+// HttpGet gets a URL, returning the body as a reader on success, or an error
+// if it encountered a problem.  If you want a version that transparently caches
+// use HttpGetCached.
+func HttpGet(url string) (io.Reader, error) {
+	req, err := http.NewRequest("GET", url, nil)
 	if err != nil {
-		return document, err
+		return nil, err
 	}
-	err = document.FromBytes(body)
+	resp, err := httpClient.Do(req)
 	if err != nil {
-		return document, err
+		return nil, err
 	}
-	return document, nil
+	return resp.Body, nil
 }
 
-// Download a url to a path
+// HttpDownloadTo downloads a url directly to a file.
 func HttpDownloadTo(url, path string) error {
-	body, err := HttpGet(url)
+	reader, err := HttpGet(url)
 	if err != nil {
 		return err
 	}
-	err = ioutil.WriteFile(path, body, 0644)
+	file, err := os.Create(path)
 	if err != nil {
 		return err
 	}
-	return nil
+	_, err = io.Copy(file, reader)
+	return err
 }
 
 // return the base url for a given url string
 	}
 	return strings.Join(ss, "/")
 }
+
+// BaseJoin joins the query to base if the query does not start with "http",
+// otherwise returns the query as is.
+func BaseJoin(base string, query string) string {
+	if strings.HasPrefix(query, "http") {
+		return query
+	}
+	return UrlJoin(base, query)
+}
+
+func Attr(s *goquery.Selection, attr string) string {
+	val, ok := s.Attr(attr)
+	if !ok {
+		log.Printf("Could not find expected attr %s on %s: %s\n", attr, s)
+	}
+	return val
+}

File mangahere.go

+package main
+
+import (
+	"log"
+	"strings"
+
+	"github.com/jmoiron/goquery"
+)
+
+type Mangahere struct {
+	*Site
+}
+
+var _ Updater = &Mangahere{}
+
+func (m *Mangahere) SeriesListPath() string { return UrlJoin(m.Url, "/mangalist/") }
+func (m *Mangahere) Cache() HttpCache       { return nil }
+func (m *Mangahere) Name() string           { return m.Site.Name }
+
+func (m *Mangahere) UpdateSeriesList(doc *goquery.Document) []*Series {
+	series := make([]*Series, 0, 10)
+
+	for _, a := range doc.Find("div.list_manga li a").All() {
+		s := &Series{
+			Site: m.Name(),
+			Name: trim(a.Text()),
+			Url:  BaseJoin(m.Url, trim(Attr(a, "href"))),
+		}
+
+		spl := strings.Split(strings.TrimRight(s.Url, "/"), "/")
+		s.Key = spl[len(spl)-1]
+		series = append(series, s)
+	}
+	log.Printf("Found %d series for mangahere\n", len(series))
+	return series
+}
+
+func (m *Mangahere) UpdateChapters(series *Series) []*Chapter {
+	// always print when we do this
+	log.Printf("Updating %s for series %s\n", m.Name(), series.Name)
+
+	document, err := GetDocument(m, series.Url)
+	if err != nil {
+		return []*Chapter{}
+	}
+	base := BaseUrl(series.Url)
+
+	chapters := make([]*Chapter, 0, 10)
+	for _, a := range document.Find(".detail_list li a").All() {
+		c := &Chapter{
+			Site:   series.Site,
+			Series: series.Name,
+			Name:   trim(a.Text()),
+			Url:    BaseJoin(base, trim(Attr(a, "href"))),
+		}
+
+		spl := strings.Split(strings.TrimRight(c.Url, "/"), "/")
+		c.Number = FindNumber(spl[len(spl)-1])
+		if len(c.Number) == 0 {
+			c.Number = FindNumber(c.Name)
+		} else {
+			// remove leading junk
+			c.Number = strings.TrimLeft(c.Number, " \t0")
+		}
+
+		log.Println(c)
+		chapters = append(chapters, c)
+	}
+	vprintf("Found %d chapters on mangahere\n", len(chapters))
+	return chapters
+}
+
+func (m *Mangahere) GetPageUrls(doc *goquery.Document) []string {
+	options := doc.Find(".readpage_top .go_page option").All()[1:]
+	urls := make([]string, 0, len(options))
+	base := BaseUrl(doc.Url.String())
+
+	for _, o := range options {
+		fragment := Attr(o, "value")
+		urls = append(urls, BaseJoin(base, fragment))
+	}
+
+	return urls
+}
+
+func (m *Mangahere) GetImageUrl(doc *goquery.Document) string {
+	return Attr(doc.Find("#image"), "src")
+}
 package main
 
 import (
-	"bytes"
 	"fmt"
 	"log"
 	"regexp"
 	"strings"
 	"time"
-	"unsafe"
-
-	"github.com/moovweb/gokogiri"
-	"github.com/moovweb/gokogiri/css"
-	"github.com/moovweb/gokogiri/html"
-	"github.com/moovweb/gokogiri/xml"
-	"github.com/moovweb/gokogiri/xpath"
 )
 
 var numRegex = regexp.MustCompile("(\\d+(?:\\.\\d+)?)")
 // Find a number in a string
 func FindNumber(str string) string {
 	groups := numRegex.FindStringSubmatch(str)
+	fmt.Println("FindNumber: ", groups)
 	if groups == nil {
 		return ""
 	}
 	if timestamp == 0 {
 		return "never"
 	}
-	t := time.Unix(timestamp, 0)
-	return t.Format(time.UnixDate)
+	return time.Unix(timestamp, 0).Format(time.UnixDate)
 }
 
 func FileExtension(str string) string {
 	return ext
 }
 
-func tick() { fmt.Printf("%s\n", time.Now().String()) }
-
-// Selectable implements a simple interface which allows to get the inner text
-// of some element as well as run a CSS select on it and get a list of nodes
-type Selectable interface {
-	CssSelect(selector string) []Node
-	Text() string
-}
-
-// A node wrapper, in order to provide a similar interface in the future
-// possibly without gokogiri
-type Node struct {
-	doc  *html.HtmlDocument
-	ptr  unsafe.Pointer
-	node xml.Node
-}
-
-// A Document wrapper, which can be Freed and Selected, and exposes
-// the root as a Node object with the Root field
-type Document struct {
-	doc    *html.HtmlDocument
-	docptr unsafe.Pointer
-	Root   Node
-}
-
-// Fill a Node element from a ptr
-func (n *Node) fromPtr(ptr unsafe.Pointer, doc *html.HtmlDocument) {
-	n.ptr = ptr
-	n.doc = doc
-	n.node = xml.NewNode(ptr, doc)
-}
-
-// Fill a Node element from an xml.Node
-func (n *Node) fromNode(node xml.Node, doc *html.HtmlDocument) {
-	n.ptr = node.NodePtr()
-	n.node = node
-	n.doc = doc
-}
-
-func (n *Node) CssSelect(selector string) []Node {
-	xpathexpr := css.Convert(selector, 0)
-	expr := xpath.Compile(xpathexpr)
-	nxp := xpath.NewXPath(n.ptr)
-	defer nxp.Free()
-	nodes, err := nxp.EvaluateAsNodeset(n.ptr, expr)
-	if err != nil {
-		log.Printf("Could not evaluate xpath expr: %s", err)
-		return []Node{}
-	}
-	ret := make([]Node, len(nodes))
-	for i, ptr := range nodes {
-		ret[i].fromPtr(ptr, n.doc)
-	}
-	return ret
-}
-
-func (n *Node) Text() string {
-	return n.node.Content()
-}
-
-func (n *Node) Attr(attr string) string {
-	return n.node.Attr(attr)
-}
-
-func (d *Document) CssSelect(selector string) []Node {
-	return d.Root.CssSelect(selector)
-}
-
-func (d *Document) Text() string {
-	return ""
-}
-
-func (d *Document) FromString(str string) error {
-	buff := bytes.NewBufferString(str)
-	bites := buff.Bytes()
-	return d.FromBytes(bites)
-}
-
-func (d *Document) FromBytes(str []byte) error {
-	doc, err := gokogiri.ParseHtml(str)
-	if err != nil {
-		return err
-	}
-	if doc == nil {
-		return fmt.Errorf("Nil document returned")
-	}
-	d.doc = doc
-	d.Root.fromNode(doc.Root(), doc)
-	d.docptr = doc.DocPtr()
-	return nil
-}
+func tick() { log.Printf("\n") }
 
-func (d *Document) Free() {
-	d.doc.Free()
+func trim(s string) string {
+	return strings.Trim(s, " \t\r\n")
 }
+package main
+
+import (
+	"log"
+	"strings"
+
+	"github.com/jmoiron/goquery"
+)
+
+func init() {
+	// TODO: register the starkana updater
+}
+
+type Starkana struct {
+	*Site
+}
+
+func (s *Starkana) SeriesListPath() string { return UrlJoin(s.Url, "/manga/list/") }
+func (s *Starkana) Cache() HttpCache       { return nil }
+func (s *Starkana) Name() string           { return s.Site.Name }
+
+func (s *Starkana) GetPageUrls(doc *goquery.Document) []string {
+	options := doc.Find("select#page_switch option").All()
+	urls := make([]string, 0, len(options)-1)
+
+	base := BaseUrl(doc.Url.String())
+	for _, o := range options[1:] {
+		u := Attr(o, "value")
+		if len(u) > 0 {
+			urls = append(urls, UrlJoin(base, u))
+		}
+	}
+	return urls
+}
+
+func (s *Starkana) GetImageUrl(doc *goquery.Document) string {
+	return Attr(doc.Find("#pic img"), "src")
+}
+
+func (s *Starkana) UpdateChapters(series *Series) []*Chapter {
+	// always print what we're doing when we do this
+	log.Printf("Updating %s for series %s\n", s.Name(), series.Name)
+
+	document, err := GetDocument(s, series.Url)
+	if err != nil {
+		return []*Chapter{}
+	}
+	base := BaseUrl(series.Url)
+
+	results := make([]*Chapter, 0, 10)
+	for _, a := range document.Find("a.download-link").All() {
+		c := &Chapter{
+			Site:   series.Site,
+			Series: series.Name,
+			Name:   trim(a.Text()),
+			Url:    BaseJoin(base, trim(Attr(a, "href"))),
+		}
+		numElem := a.Find("strong").All()
+		if len(numElem) > 0 {
+			c.Number = numElem[0].Text()
+		}
+		results = append(results, c)
+	}
+	vprintf("Found %d chapters on starkana\n", len(results))
+	return results
+}
+
+func (s *Starkana) UpdateSeriesList(doc *goquery.Document) []*Series {
+	series := make([]*Series, 0, 10)
+	for _, a := range doc.Find("#inner_page >div a").All() {
+		r := &Series{}
+		r.Site = s.Site.Name
+		r.Name = trim(a.Text())
+		r.Url = UrlJoin(s.Site.Url, Attr(a, "href"))
+		spl := strings.Split(r.Url, "/")
+		r.Key = spl[len(spl)-1]
+		r.Url = r.Url + "?mature_confirm=1"
+
+		series = append(series, r)
+	}
+
+	log.Printf("Found %d series for starkana\n", len(series))
+	return series
+}
+
+var _ Updater = new(Starkana)
 
 import (
 	"fmt"
-	"github.com/jmoiron/go-pkg-optarg"
+	"log"
 	"os"
 	"runtime/pprof"
 	"strconv"
 	"strings"
+
+	"github.com/jmoiron/go-pkg-optarg"
 )
 
 const VERSION = "0.1b"
 	Search               bool
 	Show                 bool
 	Log                  bool
+	Clear                bool
 	Filter               *Filter
 }
 
 var opts Options
 
 // print only if verbose is on
-func vPrintf(s string, x ...interface{}) {
+func vprintf(s string, x ...interface{}) {
 	if opts.Verbose {
-		fmt.Printf(s, x...)
+		log.Printf(s, x...)
 	}
 }
 
 			fmt.Println(err)
 			return
 		}
-		vPrintf("Activating Profiling...\n")
+		vprintf("Activating Profiling...\n")
 		pprof.StartCPUProfile(f)
-		vPrintf("Profiling activated.\n")
+		vprintf("Profiling activated.\n")
 		defer pprof.StopCPUProfile()
 	}
 
 		return
 	}
 
+	if opts.Clear {
+		ClearChapters()
+		return
+	}
+
 	if opts.ListSites {
 		ListSites()
 		return
 	}
 }
 
+func ClearChapters() {
+	tx := config.db.MustBegin()
+	Execf(tx, "drop table chapters;")
+	Execf(tx, createTables["chapters"])
+	//Execf(tx, "drop table series;")
+	//Execf(tx, createTables["series"])
+	tx.Commit()
+}
+
 func Show(terms ...string) {
 	UpdateSites()
 	series, err := FindOneSeries(true, terms...)
 	optarg.Add("", "version", "Show version and exit.", false)
 	optarg.Add("v", "verbose", "Show more output.", false)
 	optarg.Add("", "profile", "Profile ukiyo.", false)
+	optarg.Add("", "clear", "Clear cached site data.", false)
 
 	optarg.Header("Downloading")
 	optarg.Add("u", "update", "Update all site & series info.", false)
 			opts.ToggleUseUnderscores = opt.Bool()
 		case "log":
 			opts.Log = opt.Bool()
+		case "clear":
+			opts.Clear = opt.Bool()
 		// sites
 		case "sites":
 			opts.ListSites = opt.Bool()
 
 import (
 	"fmt"
+	"io"
+	"log"
+	"net/url"
 	"os"
 	"path/filepath"
 	"strings"
 	"time"
+
+	"github.com/jmoiron/goquery"
 )
 
 // update once per week unless forced
-var SITE_UPDATE_FREQUENCY = int64(86400) * 7
+var SiteUpdateFrequency = int64(86400) * 7
 
 // update once per day unless forced
-var CHAPTER_UPDATE_FREQUENCY = int64(86400)
-var MAX_CONCURRENT_WORKERS = 3
+var ChapterUpdateFrequency = int64(86400)
+var MaxConcurrentWorkers = 3
+
+type Updater interface {
+	// SeriesListPath returns a url path for the manga series list.  If the site
+	// lacks such a path, it should return the empty string.
+	SeriesListPath() string
+	// Cache returns the active cache for this updater.  If the return is nil
+	// caching is disabled.
+	Cache() HttpCache
+	// Name returns the name of this updater.
+	Name() string
+	// UpdateChapters returns a list of Chapters for the series being updated.
+	UpdateChapters(series *Series) []*Chapter
+	// UpdateSeriesList takes a Document and returns a list of Series from it.
+	UpdateSeriesList(*goquery.Document) []*Series
+	// GetPageUrls returns a list of the urls (as strings) on the page.
+	GetPageUrls(*goquery.Document) []string
+	// GetImageUrl returns the url of the image on the Document, which is a page.
+	GetImageUrl(*goquery.Document) string
+}
 
+// UpdateSeries uses an updater to load the cache of series at that site, if the
+// site has a list of series available.  This allows local searching in sql, which
+// makes searching much faster and hits the upstream site less.  Returns an empty
+// list when SeriesListPath() returns the empty string.
 func UpdateSeries(u Updater) []*Series {
-	url := u.SeriesListPath()
-	vPrintf("Updating via %s\n", url)
+	seriesUrl := u.SeriesListPath()
+	// If this updater doesn't have a series list, make a note and continue.
+	if len(seriesUrl) == 0 {
+		log.Printf("Skipping %s series list: search interface only.\n", u.Name())
+		return []*Series{}
+	}
+
+	vprintf("Updating %s via %s\n", u.Name(), seriesUrl)
 
-	cachefile := u.CachePath()
-	document, err := HttpGetDocument(url, cachefile)
+	var err error
+	var body io.Reader
+	cache := u.Cache()
+	if cache != nil {
+		body, err = HttpGetCached(seriesUrl, cache)
+	} else {
+		body, err = HttpGet(seriesUrl)
+	}
+
+	if err != nil {
+		log.Printf("Error fetching %s series list: %s\n", u.Name(), err)
+		return []*Series{}
+	}
+
+	document, err := goquery.NewDocumentFromReader(body)
 	if err != nil {
-		fmt.Printf("Error fetching site %s\n", u.Name())
+		log.Printf("Error parsing %s series list: %s\n", u.Name(), err)
 		return []*Series{}
 	}
-	defer document.Free()
+	document.Url, err = url.Parse(seriesUrl)
+	if err != nil {
+		log.Fatalf("Invalid URL `%s` loaded without error, but parsing failed: %s\n", seriesUrl, err)
+	}
 	return u.UpdateSeriesList(document)
 }
 
-type Updater interface {
-	SeriesListPath() string
-	CachePath() string
-	Name() string
-	UpdateChapters(series *Series) []*Chapter
-	UpdateSeriesList(doc *Document) []*Series
-	GetPageUrls(string, *Document) []string
-	GetImageUrl(*Document) string
+func GetDocument(u Updater, Url string) (*goquery.Document, error) {
+	result, err := HttpGetCached(Url, u.Cache())
+	if err != nil {
+		log.Printf("Error getting %s from %s\n", Url, u.Name())
+		return nil, err
+	}
+
+	document, err := goquery.NewDocumentFromReader(result)
+	if err != nil {
+		log.Printf("Error parsing %s from %s\n", Url, u.Name())
+		return nil, err
+	}
+	document.Url, _ = url.Parse(Url)
+	return document, nil
 }
 
-var Updaters = map[string]Updater{}
+var UpdaterRegistry = map[string]Updater{}
 
 func init() {
 	sites := []*Site{}
 			u = &Mangahere{v}
 		}
 		if u != nil {
-			Updaters[v.Name] = u
+			UpdaterRegistry[v.Name] = u
 		}
 	}
 }
 	"mangafox":    "/manga/",
 }
 
-type Starkana struct{ *Site }
-
-func (s *Starkana) SeriesListPath() string { return UrlJoin(s.Site.Url, "/manga/list/") }
-func (s *Starkana) CachePath() string      { return "./cache/" + s.Site.Name + ".html" }
-func (s *Starkana) Name() string           { return s.Site.Name }
-
-func (s *Starkana) GetPageUrls(url string, doc *Document) []string {
-	options := doc.CssSelect("select#page_switch option")
-	if len(options) < 1 {
-		return []string{}
-	}
-	urls := make([]string, len(options)-1)
-	base := BaseUrl(url)
-
-	for i, o := range options[1:] {
-		urls[i] = UrlJoin(base, o.Attr("value"))
-	}
-	return urls
-}
-
-func (s *Starkana) GetImageUrl(doc *Document) string {
-	img := doc.CssSelect("#pic img")
-	if len(img) == 0 {
-		return ""
-	}
-	return img[0].Attr("src")
-}
-
-func (s *Starkana) UpdateChapters(series *Series) []*Chapter {
-	// always print what we're doing when we do this
-	fmt.Printf("Updating starkana for series %s\n", series.Name)
-
-	cachefile := fmt.Sprintf("./cache/%s-%s.html", series.Name, series.Site)
-	document, err := HttpGetDocument(series.Url, cachefile)
-	if err != nil {
-		fmt.Printf("Error getting %s\n", series.Name)
-		return []*Chapter{}
-	}
-	defer document.Free()
-	base := BaseUrl(series.Url)
-
-	anchors := document.CssSelect("a.download-link")
-	results := make([]*Chapter, len(anchors))
-	for i, anchor := range anchors {
-		c := &Chapter{}
-		c.Site = series.Site
-		c.Series = series.Name
-		c.Name = strings.Trim(anchor.Text(), " \t\r\n")
-		c.Url = strings.Trim(anchor.Attr("href"), " \t\r\n")
-		if !strings.HasPrefix(c.Url, "http") {
-			c.Url = UrlJoin(base, c.Url)
-		}
-		numelem := anchor.CssSelect("strong")
-		if len(numelem) > 0 {
-			c.Number = numelem[0].Text()
-		}
-		results[i] = c
-	}
-	vPrintf("Found %d chapters on starkana\n", len(results))
-	return results
-}
-
-func (s *Starkana) UpdateSeriesList(doc *Document) []*Series {
-	series := doc.CssSelect("#inner_page >div a")
-	results := make([]*Series, len(series))
-	for i, anchor := range series {
-		r := &Series{}
-		r.Site = s.Site.Name
-		r.Name = strings.Trim(anchor.Text(), "\t ")
-		r.Url = UrlJoin(s.Site.Url, anchor.Attr("href"))
-		spl := strings.Split(r.Url, "/")
-		r.Key = spl[len(spl)-1]
-		r.Url = r.Url + "?mature_confirm=1"
-		results[i] = r
-	}
-
-	fmt.Printf("Found %d series for starkana\n", len(results))
-	return results
-}
-
-type Mangahere struct{ *Site }
-
-func (m *Mangahere) SeriesListPath() string { return UrlJoin(m.Site.Url, "/mangalist/") }
-func (m *Mangahere) CachePath() string      { return "./cache/" + m.Site.Name + ".html" }
-func (m *Mangahere) Name() string           { return m.Site.Name }
-
-func (m *Mangahere) UpdateSeriesList(doc *Document) []*Series {
-	series := doc.CssSelect("div.list_manga li a")
-	results := make([]*Series, len(series))
-
-	for i, anchor := range series {
-		r := &Series{}
-		r.Site = m.Site.Name
-		r.Name = strings.Trim(anchor.Text(), " \t")
-		r.Url = strings.Trim(anchor.Attr("href"), " \t")
-		if !strings.HasPrefix(r.Url, "http") {
-			r.Url = UrlJoin(m.Site.Url, r.Url)
-		}
-		url := strings.TrimRight(r.Url, "/")
-		spl := strings.Split(url, "/")
-		r.Key = spl[len(spl)-1]
-		results[i] = r
-	}
-	fmt.Printf("Found %d series for mangahere\n", len(results))
-	return results
-}
-
-func (m *Mangahere) UpdateChapters(series *Series) []*Chapter {
-	// always print when we do this
-	fmt.Printf("Updating mangahere for series %s\n", series.Name)
-
-	cachefile := fmt.Sprintf("./cache/%s-%s.html", series.Name, series.Site)
-	document, err := HttpGetDocument(series.Url, cachefile)
-	if err != nil {
-		fmt.Printf("Error getting %s\n", series.Name)
-		return []*Chapter{}
-	}
-	defer document.Free()
-	base := BaseUrl(series.Url)
-
-	anchors := document.CssSelect(".detail_list li a")
-	results := make([]*Chapter, len(anchors))
-	for i, anchor := range anchors {
-		c := &Chapter{}
-		c.Site = series.Site
-		c.Series = series.Name
-		c.Name = strings.Trim(anchor.Text(), " \t\n\r")
-		c.Url = strings.Trim(anchor.Attr("href"), " \t\n\r")
-
-		spl := strings.Split(strings.TrimRight(c.Url, "/"), "/")
-		c.Number = FindNumber(spl[len(spl)-1])
-		if len(c.Number) == 0 {
-			c.Number = FindNumber(c.Name)
-		} else {
-			// remove leading junk
-			c.Number = strings.TrimLeft(c.Number, " \t0")
-		}
-		if !strings.HasPrefix(c.Url, "http") {
-			c.Url = UrlJoin(base, c.Url)
-		}
-		results[i] = c
-	}
-	vPrintf("Found %d chapters on mangahere\n", len(results))
-	return results
-}
-
-func (m *Mangahere) GetPageUrls(url string, doc *Document) []string {
-	options := doc.CssSelect(".readpage_top .go_page option")
-	if len(options) < 1 {
-		return []string{}
-	}
-	urls := make([]string, 0, len(options))
-	base := BaseUrl(url)
-
-	for _, o := range options[1:] {
-		fragment := o.Attr("value")
-		if strings.HasPrefix(fragment, "http") {
-			urls = append(urls, fragment)
-		} else {
-			urls = append(urls, UrlJoin(base, fragment))
-		}
-	}
-	return urls
-}
-
-func (m *Mangahere) GetImageUrl(doc *Document) string {
-	img := doc.CssSelect("#image")
-	if len(img) == 0 {
-		return ""
-	}
-	return img[0].Attr("src")
-}
-
 func UpdateSites(force ...bool) {
 	Force := DefaultFalse(force...)
 
 	now := time.Now().Unix()
 	after := now
 	if !Force {
-		after -= SITE_UPDATE_FREQUENCY
+		after -= SiteUpdateFrequency
 	}
 
 	q := "select name, url, priority, updated from sites WHERE updated < ? ORDER BY priority"
 	}
 
 	if !Force {
-		vPrintf("Updating %d sites last updated over 1 week ago:\n", len(sites))
+		vprintf("Updating %d sites last updated over 1 week ago:\n", len(sites))
 	} else {
-		vPrintf("Force-updating %d sites:\n", len(sites))
+		vprintf("Force-updating %d sites:\n", len(sites))
 	}
 
-	sem := make(chan bool, MAX_CONCURRENT_WORKERS)
+	sem := make(chan bool, MaxConcurrentWorkers)
 	results := []*Series{}
 
 	for _, s := range sites {
 				}
 				<-sem
 			}()
-			updater, ok := Updaters[site.Name]
+			updater, ok := UpdaterRegistry[site.Name]
 			if !ok {
 				fmt.Printf("Unknown site-name %s, skipping update.\n", site.Name)
 				return
 
 	tx, _ := config.db.Beginx()
 
-	vPrintf("Received %d total results\n", len(results))
+	vprintf("Received %d total results\n", len(results))
 
 	q = `insert or replace into series 
 		(name, key, url, site, updated) values 
 	now := time.Now().Unix()
 	after := now
 	if !Force {
-		after -= CHAPTER_UPDATE_FREQUENCY
+		after -= ChapterUpdateFrequency
 	}
 
 	series := []*Series{}
 	}
 	if len(series) > 0 {
 		if !Force {
-			vPrintf("Updating %d sites for %s last updated over 1 week ago:\n", len(series), name)
+			vprintf("Updating %d sites for %s last updated over 1 week ago:\n", len(series), name)
 		} else {
-			vPrintf("Force-updating %d sites for %s:\n", len(series), name)
+			vprintf("Force-updating %d sites for %s:\n", len(series), name)
 		}
 		for _, s := range series {
-			vPrintf("Updating %s (@%s, %s)\n", s.Name, s.Site, s.Url)
+			vprintf("Updating %s (@%s, %s)\n", s.Name, s.Site, s.Url)
 		}
 
-		sem := make(chan bool, MAX_CONCURRENT_WORKERS)
+		sem := make(chan bool, MaxConcurrentWorkers)
 		results := []*Chapter{}
 
 		for _, s := range series {
 					}
 					<-sem
 				}()
-				updater, ok := Updaters[s.Site]
+				updater, ok := UpdaterRegistry[s.Site]
 				if !ok {
 					fmt.Printf("Unknown site-name %s, skipping update.\n", s.Site)
 					return
 
 		tx, _ := config.db.Beginx()
 
-		vPrintf("Received %d total results\n", len(results))
+		vprintf("Received %d total results\n", len(results))
 
 		for _, s := range series {
 			Execf(tx, "delete from chapters where name=? and site=?", name, s.Site)
 
 func DownloadChapter(chapter *Chapter) error {
 	site, url := SelectUrl(chapter)
-	vPrintf(" %s %s (%s, %s)\n", chapter.Series, chapter.Number, site, url)
-	updater := Updaters[site]
+	vprintf(" %s %s (%s, %s)\n", chapter.Series, chapter.Number, site, url)
+	updater := UpdaterRegistry[site]
 
-	doc, err := HttpGetDocument(url)
+	doc, err := GetDocument(updater, url)
 	if err != nil {
 		fmt.Printf("Error fetching `%s`: %s\n", url, err)
 		return err
 			fmt.Sprintf("%s-c%s.zip", series, chapter.Number))
 	}
 
-	page_urls := updater.GetPageUrls(url, doc)
+	page_urls := updater.GetPageUrls(doc)
 	numwidth := len(fmt.Sprintf("%d", len(page_urls)))
 	numfmt := fmt.Sprintf("%%0%dd", numwidth)
 	// fmt.Printf("Making destination dir: %s", destpath)
 	os.MkdirAll(destpath, 0755)
 	// fmt.Printf("Number format: %s\n", numfmt)
 
-	vPrintf("Page Urls: %v\n", page_urls)
+	vprintf("Page Urls: %v\n", page_urls)
 	update := fmt.Sprintf("Downloading %s %s (from %s): %%d of %%d", chapter.Series, chapter.Number, site)
 
 	type Img struct {
 		url string
 	}
 
-	sem := make(chan bool, MAX_CONCURRENT_WORKERS)
+	sem := make(chan bool, MaxConcurrentWorkers)
 	images := make(chan Img, len(page_urls))
 	completed := make(chan int)
 	// send the first image on the images channel
 		sem <- true
 		go func(num int, url string) {
 			defer func() { <-sem }()
-			doc, err := HttpGetDocument(url)
+			doc, err := GetDocument(updater, url)
 			if err != nil {
 				fmt.Printf("Error fetching page %03d (%s)\n", num, url)
 				return