Jernej Virag avatar Jernej Virag committed e119fd4

Implemented database store of the articles

Comments (0)

Files changed (5)

 import parsers
+from model import store_articles
 
 if __name__ == "__main__":
     sites = parsers.get_parsers()
     for site in sites:
         content = site.get_data()
-        print content
+        store_articles(content)
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy import Column, String, Integer, Boolean, create_engine
+import settings
+
+Base = declarative_base()
+Session = sessionmaker()
+engine = create_engine(settings.DB_CONNECTION_STRING, echo=True)
+Session.configure(bind=engine)
+
+class Article(Base):
+    __tablename__ = "articles"
+
+    id = Column(Integer, primary_key=True)
+    site = Column(String)
+    title = Column(String)
+    img_url = Column(String, index=True, unique=True)
+    subtext = Column(String)
+    dispatched = Column(Boolean)
+
+Base.metadata.create_all(engine)
+
+def store_articles(articles):
+    session = Session()
+
+    # Check existing URLs
+    urls = [article["img_url"] for article in articles]
+    existing_urls = {url[0] for url in session.query(Article.img_url).filter(Article.img_url.in_(urls)).all()}
+
+    for article in articles:
+        if article["img_url"] in existing_urls:
+            continue
+
+        db_article = Article(site=article["site"], title=article.get("title", None), img_url=article["img_url"], subtext=article.get("subtext", None),
+                             dispatched=False)
+        session.add(db_article)
+    session.commit()
+

parsers/icanhaz.py

         for post in post_divs:
             title = post["title"]
             url = post["src"]
-            posts.append({ "title" : title, "img_url" : url})
+            posts.append({ "title" : title, "img_url" : url, "site":"ICanHazCheezburger"})
         return posts
 
 

Binary file modified.

+DB_CONNECTION_STRING = "sqlite:///articles.db"
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.