1. Chema Cortes
  2. mcu_isbn

Commits

chemacortes  committed d71faf1

Refactorización abandonada

  • Participants
  • Parent commits efac170
  • Branches refact1

Comments (0)

Files changed (3)

File get_isbn.py

View file
 
 def parse_book(book):
 
-    m = PAT_YEAR.search(book.text)
-    year = m.group(1) if m else ""
+    res = PAT_YEAR.search(book.text)
+    year = res.group(1) if res else ""
 
     # extraer tres enlaces (isbn, título, editorial)
     links = book.find_all("a")
     title = title.encode("utf-8") if title else "<SIN TÍTULO>"  # Los hay...
     editorial = editorial.encode("utf-8") if editorial else "<SIN EDITORIAL>"
 
-    m = PAT_IDBOOK.search(links[1]["href"])
-    idbook = m.group(1) if m else "<NOID>"
+    res = PAT_IDBOOK.search(links[1]["href"])
+    idbook = res.group(1) if res else "<NOID>"
 
     # Hay libros con más de una editorial. Sólo se considera la primera.
-    m = PAT_ID_EDITORIAL.search(links[2]["href"])
-    id_editorial = m.group(1) if m else "<NOID>"
+    res = PAT_ID_EDITORIAL.search(links[2]["href"])
+    id_editorial = res.group(1) if res else "<NOID>"
 
     return [isbn, idbook, title, id_editorial, editorial, year]
 
         response = urllib2.urlopen(req2)
         return response.read()
 
-    print(msg)
+    print msg
 
     cookie = get_cookie()
 
     else:
         info = soup.find("div", class_="stTextos")
         if info:
-            m = PAT_RESULTS.search(info.text)
-            total = m.group(1) if m else 0
+            res = PAT_RESULTS.search(info.text)
+            total = res.group(1) if res else 0
             print "  ", info.text
     query.set_total(total)
 

File main.py

View file
 import csv
 import time
 
-from mk_query import QA_IBIC_Editorial, QA_IBIC
+from mk_query import QueryArgs, QA_IBIC_Editorial, QA_IBIC
 from get_isbn import get_books
 
 # Parámetros
 
 
 # Leer parámetros de búsqueda
-editoriales = dict(r[:-1].split("\t", 1) for r in open("editoriales.txt", "rU") if r != "\n")
-materias = dict(r[:-1].split("\t", 1) for r in open("materias.txt", "rU") if r != "\n")
+EDITORIALES = dict(r[:-1].split("\t", 1) for r in open("editoriales.txt", "rU") if r != "\n")
+MATERIAS = dict(r[:-1].split("\t", 1) for r in open("materias.txt", "rU") if r != "\n")
 
 
 def captura():
+    """Obtiene libros a partir de materias.txt y editoriales.txt"""
 
     with open("libros.csv", "wb") as csvfile:
 
-        sr = csv.writer(csvfile, delimiter=';', quotechar='"')
-        sr.writerow(["ISBN", "idbook", "título", "id_editorial", "editorial", "año", "IBIC", "materia"])
+        swr = csv.writer(csvfile, delimiter=';', quotechar='"')
+        swr.writerow(["ISBN", "idbook", "título", "id_editorial", "editorial", "año", "IBIC", "materia"])
 
-        for materia, m_desc in materias.items():
-            for editorial, e_desc in editoriales.items():
+        for materia, m_desc in MATERIAS.items():
+            for editorial, e_desc in EDITORIALES.items():
                 query = QA_IBIC_Editorial(materia, editorial, YEAR1, YEAR2, maxitems=MAXITEMS, publanguage=PUBLANG)
-                msg = "Materia:{} - Editorial:{}".format(materia, editorial)
+                msg = "IBIC:{} - Editorial:{} {}".format(materia, editorial, e_desc)
                 for book in get_books(query, msg):
-                    sr.writerow(book + [materia, m_desc])
+                    swr.writerow(book + [materia, m_desc])
             # print "Esperando 5 segundos..."
             # time.sleep(5)
 
 
 def captura_completa():
+    """Obtiene libros a partir de materias.txt"""
 
     with open("libros_full.csv", "wb") as csvfile:
 
-        sr = csv.writer(csvfile, delimiter=';', quotechar='"')
-        sr.writerow(["ISBN", "idbook", "título", "id_editorial", "editorial", "año", "IBIC", "materia"])
+        swr = csv.writer(csvfile, delimiter=';', quotechar='"')
+        swr.writerow(["ISBN", "idbook", "título", "id_editorial", "editorial", "año", "IBIC", "materia"])
 
-        for materia, m_desc in materias.items():
+        for materia, m_desc in MATERIAS.items():
             for year in range(YEAR1, YEAR2 + 1):
                 query = QA_IBIC(materia, year, year + 1, maxitems=MAXITEMS, publanguage=PUBLANG)
-                msg = "{} Materia:{} - {}".format(year, materia, materias[materia])
+                msg = "{} Materia:{} - {}".format(year, materia, m_desc)
                 for book in get_books(query, msg):
-                    sr.writerow(book + [materia, m_desc])
-            #=======================================================================
+                    swr.writerow(book + [materia, m_desc])
             # print "Esperando 5 segundos..."
             # time.sleep(5)
-            #=======================================================================
+
+def captura3():
+    """Obtiene libros a partir de materias.txt"""
+
+    with open("libros_full.csv", "wb") as csvfile:
+
+        swr = csv.writer(csvfile, delimiter=';', quotechar='"')
+        swr.writerow(["ISBN", "idbook", "título", "id_editorial", "editorial", "año", "IBIC", "materia"])
+
+        for materia, m_desc in MATERIAS.items():
+            for year in range(YEAR1, YEAR2 + 1):
+                query = QueryArgs(maxitems=MAXITEMS)
+                query.materia = materia
+                query.year1 = year
+                query.year2 = year + 1
+                query.publanguage = PUBLANGUAGE
+                msg = "{} Materia:{} - {}".format(year, materia, m_desc)
+                for book in get_books(query, msg):
+                    swr.writerow(book + [materia, m_desc])
+            # print "Esperando 5 segundos..."
+            # time.sleep(5)
+
 
 
 if __name__ == "__main__":
 
-    captura()
+#    captura()
 #    captura_completa()
+
+    captura3()

File mk_query.py

View file
 
 # Parámetros de búsqueda
 
-operator = ("", "AND", "OR", "NOT")
+OPERATOR = ("", "AND", "OR", "NOT")
 
-cdispo = dict(
+CDISPO = dict(
     A="Ambos",
     D="Disponibles",
     N="NoDisponibles"
 )
 
-concepto = (
+CONCEPTO = (
     "",
     "AUTOR",
     "COLECCIÓN",
     "TÍTULO"
 )
 
-idiomas = (
+IDIOMAS = (
     "Afrikaans", "Alemán", "Árabe", "Aragonés", "Aranés", "Asturiano",
     "Búlgaro", "Castellano", "Catalán", "Checo", "Chino", "Croata", "Danés",
     "Eslovaco", "Esloveno", "Euskera", "Finés", "Francés", "Gallego", "Griego",
     "Ruso", "Sánscrito", "Serbio", "Sueco", "Turco", "Valenciano"
 )
 
-registro = {
+REGISTRO = {
     "-1": "",
     "33": "Monografía",
     "34": "Grabación sonora",
     "47": "Material gráfico",
 }
 
-soporte = {
+SOPORTE = {
      "-1": "",
       "1": "Obra Completa",
       "2": "Fascículo",
 
 
 class QueryArgs(object):
-
     """Estructura vacía para peticiones al buscador"""
 
     def __init__(self, maxitems=50):
         self.page = 0
         self.maxitems = maxitems
 
-        self.q = {
+        self.query = {
             "line": "",
             "action": "Buscar",
             # parámetros para control de la paginación
         # Algunos parámetros pueden ser texto libre (en .texto).
         # Otros sólo permite introducir un índice (eg: .indices="4GB" para MATERIA)
         for i in range(12):
-            self.q.update({
-                "params.liConceptosExt[{}].indices".format(i): "",
-                "params.liConceptosExt[{}].operadorFormId".format(i): "",
-                "params.liConceptosExt[{}].concepQueryFormIdExt".format(i): "",
-                "params.liConceptosExt[{}].texto".format(i): "",
-                "params.liConceptosExt[{}].accion".format(i): "Índice",
-            })
+            self.concepto(i, "", "", "")
+
+    def concepto(self, num, concepto, indice, operador,
+                 texto="", accion="Índice"):
+        key = "params.liConceptosExt[{}]".format(num)
+        self.query.update({
+            key + ".concepQueryFormIdExt": concepto,
+            key + ".indices": indice,
+            key + ".operadorFormId": operador,
+            key + ".texto": texto,
+            key + ".accion": "Índice",
+        })
 
     def set_total(self, total):
-        self.q["TOTAL"] = total
+        self.query["TOTAL"] = total
 
     def goto_page(self, page=0):
-        self.q.update(
+        self.query.update(
             action="goToPage",
             POS=str(self.page),
             PAGE=str(page)
         self.page = page
 
     def next_page(self):
-        self.q.update(
+        self.query.update(
             action="Siguiente",
             POS=str(self.page),
         )
         self.page += self.maxitems
 
     def set_max(self):
-        self.q["MAX"] = "5000"
+        self.query["MAX"] = "5000"
         return self
 
+    def __setattr__(self, name, value):
+        aliases = dict(
+            TOTAL="TOTAL",
+            MAX="MAX",
+            PAGE="PAGE",
+            POS="POS",
+            action="action",
+            clase_registro="params.claseRegistroFormId",
+            soporte="params.soporteFormId",
+            order="params.orderByFormId",
+            publanguage="params.reLenguas.lenguaPublicacionFormId",
+            year1="params.reAnnoPublica.annoDesde",
+            year2="params.reAnnoPublica.annoEntre",
+        )
+        if name in aliases:
+            self.query[aliases[name]] = value
+
     def __str__(self):
         import urllib
-        return urllib.urlencode(self.q)
+        return urllib.urlencode(self.query)
 
 
-class QA_IBIC_Editorial(QueryArgs):
+class QA_IBIC_Editorial(QueryArgs):  # pylint: disable=C0103
 
     def __init__(self, materia, editorial,
                  year1="", year2="",
 
         super(QA_IBIC_Editorial, self).__init__(maxitems)
 
-        self.q.update({
+        self.query.update({
             "params.liConceptosExt[0].indices": materia,
             "params.liConceptosExt[0].concepQueryFormIdExt": "4",  # MATERIA
 
         })
 
 
-class QA_IBIC(QueryArgs):
+class QA_IBIC(QueryArgs):  # pylint: disable=C0103
 
     def __init__(self, materia,
                  year1="", year2="",
 
         super(QA_IBIC, self).__init__(maxitems)
 
-        self.q.update({
+        self.query.update({
             "params.liConceptosExt[0].indices": materia,
             "params.liConceptosExt[0].concepQueryFormIdExt": "4",  # MATERIA
 
             "params.reAnnoPublica.annoDesde": year1,
             "params.reAnnoPublica.annoEntre": year2,
         })
+
+class QA_IBIC_2(QueryArgs):  # pylint: disable=C0103
+
+    def __init__(self,
+                 year1="", year2="",
+                 maxitems=50, publanguage="Castellano"):
+        super(QA_IBIC_2, self).__init__(maxitems)