Commits

Flavio Codeco Coelho  committed 801cf9d

first commit

  • Participants

Comments (0)

Files changed (24)

File .eric4project/project-apis.db

Binary file added.

File .ropeproject/config.py

+# The default ``config.py``
+
+
+def set_prefs(prefs):
+    """This function is called before opening the project"""
+
+    # Specify which files and folders to ignore in the project.
+    # Changes to ignored resources are not added to the history and
+    # VCSs.  Also they are not returned in `Project.get_files()`.
+    # Note that ``?`` and ``*`` match all characters but slashes.
+    # '*.pyc': matches 'test.pyc' and 'pkg/test.pyc'
+    # 'mod*.pyc': matches 'test/mod1.pyc' but not 'mod/1.pyc'
+    # '.svn': matches 'pkg/.svn' and all of its children
+    # 'build/*.o': matches 'build/lib.o' but not 'build/sub/lib.o'
+    # 'build//*.o': matches 'build/lib.o' and 'build/sub/lib.o'
+    prefs['ignored_resources'] = ['*.pyc', '*~', '.ropeproject',
+                                  '.hg', '.svn', '_svn', '.git', 
+                                  '.eric4project', '_eric4project']
+
+    # Specifies which files should be considered python files.  It is
+    # useful when you have scripts inside your project.  Only files
+    # ending with ``.py`` are considered to be python files by
+    # default.
+    #prefs['python_files'] = ['*.py']
+
+    # Custom source folders:  By default rope searches the project
+    # for finding source folders (folders that should be searched
+    # for finding modules).  You can add paths to that list.  Note
+    # that rope guesses project source folders correctly most of the
+    # time; use this if you have any problems.
+    # The folders should be relative to project root and use '/' for
+    # separating folders regardless of the platform rope is running on.
+    # 'src/my_source_folder' for instance.
+    #prefs.add('source_folders', 'src')
+
+    # You can extend python path for looking up modules
+    #prefs.add('python_path', '~/python/')
+
+    # Should rope save object information or not.
+    prefs['save_objectdb'] = True
+    prefs['compress_objectdb'] = False
+
+    # If `True`, rope analyzes each module when it is being saved.
+    prefs['automatic_soa'] = True
+    # The depth of calls to follow in static object analysis
+    prefs['soa_followed_calls'] = 0
+
+    # If `False` when running modules or unit tests "dynamic object
+    # analysis" is turned off.  This makes them much faster.
+    prefs['perform_doa'] = True
+
+    # Rope can check the validity of its object DB when running.
+    prefs['validate_objectdb'] = True
+
+    # How many undos to hold?
+    prefs['max_history_items'] = 32
+
+    # Shows whether to save history across sessions.
+    prefs['save_history'] = True
+    prefs['compress_history'] = False
+
+    # Set the number spaces used for indenting.  According to
+    # :PEP:`8`, it is best to use 4 spaces.  Since most of rope's
+    # unit-tests use 4 spaces it is more reliable, too.
+    prefs['indent_size'] = 4
+
+    # Builtin and c-extension modules that are allowed to be imported
+    # and inspected by rope.
+    prefs['extension_modules'] = []
+
+    # Add all standard c-extensions to extension_modules list.
+    prefs['import_dynload_stdmods'] = True
+
+    # If `True` modules with syntax errors are considered to be empty.
+    # The default value is `False`; When `False` syntax errors raise
+    # `rope.base.exceptions.ModuleSyntaxError` exception.
+    prefs['ignore_syntax_errors'] = False
+
+    # If `True`, rope ignores unresolvable imports.  Otherwise, they
+    # appear in the importing namespace.
+    prefs['ignore_bad_imports'] = False
+
+
+def project_opened(project):
+    """This function is called after opening the project"""
+    # Do whatever you like here!

File ScholarScrap.e4p

+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE Project SYSTEM "Project-4.6.dtd">
+<!-- eric4 project file for project ScholarScrap -->
+<!-- Saved: 2011-02-10, 07:58:34 -->
+<!-- Copyright (C) 2011 Flávio Codeço Coelho, fccoelho@gmail.com -->
+<Project version="4.6">
+  <Language>en</Language>
+  <ProgLanguage mixed="0">Python</ProgLanguage>
+  <ProjectType>Other</ProjectType>
+  <Description>Spyder to scrap results from google scholar</Description>
+  <Version>0.1</Version>
+  <Author>Flávio Codeço Coelho</Author>
+  <Email>fccoelho@gmail.com</Email>
+  <Sources>
+    <Source>recipe-523047-1.py</Source>
+    <Source>scholar/scholar/items.py</Source>
+    <Source>scholar/scholar/pipelines.py</Source>
+    <Source>scholar/scholar/spiders/__init__.py</Source>
+    <Source>scholar/scholar/__init__.py</Source>
+    <Source>scholar/scholar/settings.py</Source>
+    <Source>__init__.py</Source>
+  </Sources>
+  <Forms>
+  </Forms>
+  <Translations>
+  </Translations>
+  <Resources>
+  </Resources>
+  <Interfaces>
+  </Interfaces>
+  <Others>
+  </Others>
+  <Vcs>
+    <VcsType>None</VcsType>
+  </Vcs>
+  <FiletypeAssociations>
+    <FiletypeAssociation pattern="*.pyw" type="SOURCES" />
+    <FiletypeAssociation pattern="*.idl" type="INTERFACES" />
+    <FiletypeAssociation pattern="*.py" type="SOURCES" />
+    <FiletypeAssociation pattern="*.ptl" type="SOURCES" />
+  </FiletypeAssociations>
+</Project>

File __init__.py

Empty file added.

File recipe-523047-1.py

+import httplib
+import urllib
+from BeautifulSoup import BeautifulSoup
+import re
+
+class GoogleScholarSearch:
+    """
+    @brief This class searches Google Scholar (http://scholar.google.com)
+
+    Search for articles and publications containing terms of interest.
+    
+    Usage example:\n
+    <tt>
+    > from google_search import *\n
+    > searcher = GoogleScholarSearch()\n
+    > searcher.search(['breast cancer', 'gene'])
+    </tt>
+    """
+    def __init__(self):
+        """
+        @brief Empty constructor.
+        """
+        self.SEARCH_HOST = "scholar.google.com"
+        self.SEARCH_BASE_URL = "/scholar"
+
+    def search(self, terms, limit=10):
+        """
+        @brief This function searches Google Scholar using the specified terms.
+        
+        Returns a list of dictionarys. Each
+        dictionary contains the information related to the article:
+            "URL"       : link to the article/n
+            "Title"     : title of the publication/n
+            "Authors"   : authors (example: DF Easton, DT Bishop, D Ford)/n
+            "JournalYear"   : journal name & year (example: Nature, 2001)/n
+            "JournalURL"    : link to the journal main website (example: www.nature.com)/n
+            "Abstract"  : abstract of the publication/n
+            "NumCited"  : number of times the publication is cited/n
+            "Terms"     : list of search terms used in the query/n
+
+        @param terms List of search terms
+        @param limit Maximum number of results to be returned (default=10)
+        @return List of results, this is the empty list if nothing is found
+        """
+        params = urllib.urlencode({'q': "+".join(terms), 'num': limit})
+        headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
+
+        url = self.SEARCH_BASE_URL+"?"+params
+        conn = httplib.HTTPConnection(self.SEARCH_HOST)
+        conn.request("GET", url, {}, headers)
+    
+        resp = conn.getresponse()      
+        
+        if resp.status==200:
+            html = resp.read()
+            results = []
+            html = html.decode('ascii', 'ignore')
+                        
+            # Screen-scrape the result to obtain the publication information
+            soup = BeautifulSoup(html)
+            citations = 0
+            for record in soup('p', {'class': 'g'}):
+             
+                # Includeds error checking
+                topPart = record.first('span', {'class': 'w'})                                
+                
+                pubURL = topPart.a['href']
+                # Clean up the URL, make sure it does not contain '\' but '/' instead
+                pubURL = pubURL.replace('\\', '/')
+
+                pubTitle = ""
+                
+                for part in topPart.a.contents:
+                    pubTitle += str(part)
+                
+                if pubTitle == "":
+                    match1 = re.findall('<b>\[CITATION\]<\/b><\/font>(.*)- <a',str(record))
+                    match2 = re.split('- <a',match1[citations])
+                    pubTitle = re.sub('<\/?(\S)+>',"",match2[0])
+                    citations = citations + 1
+               
+                authorPart = record.first('font', {'color': 'green'}).string
+                if str(authorPart)=='Null': 
+                    authorPart = ''
+                    # Sometimes even BeautifulSoup can fail, fall back to regex
+                    m = re.findall('<font color="green">(.*)</font>', str(record))
+                    if len(m)>0:
+                        authorPart = m[0]
+                num = authorPart.count(" - ")
+                # Assume that the fields are delimited by ' - ', the first entry will be the
+                # list of authors, the last entry is the journal URL, anything in between
+                # should be the journal year
+                idx_start = authorPart.find(' - ')
+                idx_end = authorPart.rfind(' - ')
+                pubAuthors = authorPart[:idx_start]             
+                pubJournalYear = authorPart[idx_start + 3:idx_end]
+                pubJournalURL = authorPart[idx_end + 3:]
+                # If (only one ' - ' is found) and (the end bit contains '\d\d\d\d')
+                # then the last bit is journal year instead of journal URL
+                if pubJournalYear=='' and re.search('\d\d\d\d', pubJournalURL)!=None:
+                    pubJournalYear = pubJournalURL
+                    pubJournalURL = ''
+                               
+                # This can potentially fail if all of the abstract can be contained in the space
+                # provided such that no '...' is found
+                delimiter = soup.firstText("...").parent
+                pubAbstract = ""
+                while str(delimiter)!='Null' and (str(delimiter)!='<b>...</b>' or pubAbstract==""):
+                    pubAbstract += str(delimiter)
+                    delimiter = delimiter.nextSibling
+                pubAbstract += '<b>...</b>'
+                
+                match = re.search("Cited by ([^<]*)", str(record))
+                pubCitation = ''
+                if match != None:
+                    pubCitation = match.group(1)
+                results.append({
+                    "URL": pubURL,
+                    "Title": pubTitle,
+                    "Authors": pubAuthors,
+                    "JournalYear": pubJournalYear,
+                    "JournalURL": pubJournalURL,
+                    "Abstract": pubAbstract,
+                    "NumCited": pubCitation,
+                    "Terms": terms
+                })
+            return results
+        else:
+            print "ERROR: ",
+            print resp.status, resp.reason
+            return []
+
+if __name__ == '__main__':
+    search = GoogleScholarSearch()
+    pubs = search.search(["dengue", "fever"], 10)
+    for pub in pubs:
+        print pub['Title']
+        print pub['Authors']
+        print pub['JournalYear']
+        print pub['Terms']
+        print "======================================"

File scholar/.directory

+[Dolphin]
+ShowPreview=true
+Timestamp=2011,2,10,8,39,36

File scholar/.scrapy/scrapy.db

Binary file added.

File scholar/scholar.google.com.br

+<html><head><meta http-equiv="content-type" content="text/html;charset=ISO-8859-1"><meta HTTP-EQUIV="imagetoolbar" content="no"><link rel="canonical" href="/"><title>Google Acad�mico</title><style>body,td,a,p,.h{font-family:arial,sans-serif}#gbar,#guser{font-size:13px;padding-top:1px !important}#gbar{height:22px}#guser{padding-bottom:7px !important;text-align:right}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}@media all{.gb1{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb4{color:#00c !important}.gbi .gb4{color:#dd8e27 !important}.gbf .gb4{color:#900 !important}</style><script></script></head><body topmargin=3 marginheight=3 bgcolor="#ffffff" text="#000000" link="#0000cc" vlink="#551a8b" alink="#ff0000" onload="document.f.q.focus()"><div id=gbar><nobr><a onclick=gbar.qs(this) href="http://www.google.com.br/webhp?hl=pt-br&tab=sw" class=gb1>Web</a> <a onclick=gbar.qs(this) href="http://www.google.com.br/imghp?hl=pt-br&tab=si" class=gb1>Imagens</a> <a onclick=gbar.qs(this) href="http://video.google.com.br/?hl=pt-br&tab=sv" class=gb1>V�deos</a> <a onclick=gbar.qs(this) href="http://maps.google.com.br/maps?hl=pt-br&tab=sl" class=gb1>Mapas</a> <a onclick=gbar.qs(this) href="http://news.google.com.br/nwshp?hl=pt-br&tab=sn" class=gb1>Not�cias</a> <a href="http://www.orkut.com/Home.aspx?hl=pt-br&tab=s0" class=gb1>Orkut</a> <a href="http://mail.google.com/mail/?hl=pt-br&tab=sm" class=gb1>Gmail</a> <a href="http://www.google.com.br/intl/pt-BR/options/" class=gb1 style="text-decoration:none"><u>mais</u> &raquo;</a></nobr></div><div id=guser width=100%><nobr><span id=gbn class=gbi></span><span id=gbf class=gbf></span><span id=gbe><a href="/scholar_preferences?hl=pt-BR&oe=ASCII&as_sdt=0,5" class=gb4>Prefer�ncias do Google Acad�mico</a> | </span><a href="" class=gb4>Configura��es</a> | <a href="https://www.google.com/accounts/Login?hl=pt-br&continue=http://scholar.google.com.br/" class=gb4>Fazer login</a></nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div><br clear="all"><center><table border="0" cellspacing="0" cellpadding="0"><tr><td><img src="/intl/pt-BR/images/scholar_logo_lg_2009.gif" width="276" height="110" alt="Google Acad�mico"></td></tr></table><br><br><form action="/scholar" name="f"><table cellspacing="0" cellpadding="0"><tr valign="top"><td width=150> </td><td align="center"><input maxlength="256" size="40" name="q" value=""><input type=hidden name=hl value="pt-BR"><input type=hidden name=oe value="ASCII"> <input type="submit" value="Pesquisar" name="btnG"></td><td>&nbsp;</td><td valign="top" nowrap><font size=-2><a href="/advanced_scholar_search?hl=pt-BR&amp;oe=ASCII&amp;as_sdt=0,5">Pesquisa avan�ada do Google Acad�mico</a></font></td></tr><tr><td colspan=4 align=center nowrap><font size=-1><br><input id=all type=radio name=lr value="" checked><label for=all>Pesquisar na Web</label><input id=il type=radio name=lr value="lang_pt"><label for=il>Pesquisar p�ginas em Portugu�s</label></font></td></tr></table></form><p><font color="#60a63a"><b>Sobre os ombros de gigantes</b></font><br><br><br><font size="-1"><a href="http://www.google.com.br/webhp?hl=pt-BR&amp;oe=ASCII" target="_top">Ir para a p�gina inicial do Google</a> <span>-</span> <a href="http://www.google.com.br/intl/pt-BR/about.html" target="_top">Sobre o Google</a> <span>-</span> <a href="/intl/pt-BR/scholar/about.html" target="_top">Sobre o Google Acad�mico</a> <span>-</span> <a href=http://scholar.google.com/scholar_ncr>Google Scholar in English</a></font><p><font size=-2>&copy;2011 Google</font></center></body></html>

File scholar/scholar/__init__.py

Empty file added.

File scholar/scholar/__init__.pyc

Binary file added.

File scholar/scholar/items.py

+# Define here the models for your scraped items
+#
+# See documentation in:
+# http://doc.scrapy.org/topics/items.html
+
+from scrapy.item import Item, Field
+
+class ScholarItem(Item):
+    """
+    Define the fields for each item to be scraped from google scholar
+    """
+    URL = Field()       # link to the article
+    Title = Field()     # title of the publication
+    Authors = Field()   # authors (example: DF Easton, DT Bishop, D Ford)
+    JournalYear= Field()   # journal name & year (example: Nature, 2001)
+    JournalURL = Field()    # link to the journal main website (example: www.nature.com)
+    Abstract = Field()  # abstract of the publication
+    NumCited   = Field() # number of times the publication is cited
+    Terms = Field()     # list of search terms used in the query
+
+class PubmedItem(Item):
+    """
+    Define the fields for each item to be scraped from pubmed
+    """
+    URL = Field()       # link to the article
+    Title = Field()     # title of the publication
+    Authors = Field()   # authors (example: DF Easton, DT Bishop, D Ford)
+    JournalYear= Field()   # journal name & year (example: Nature, 2001)
+    JournalURL = Field()    # link to the journal main website (example: www.nature.com)
+    Abstract = Field()  # abstract of the publication
+    NumCited   = Field() # number of times the publication is cited
+    Terms = Field()     # list of search terms used in the query

File scholar/scholar/items.pyc

Binary file added.

File scholar/scholar/pipelines.py

+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: http://doc.scrapy.org/topics/item-pipeline.html
+
+class ScholarPipeline(object):
+    def process_item(self, item, spider):
+        return item

File scholar/scholar/pipelines.pyc

Binary file added.

File scholar/scholar/scholar.google.com.br

+<html><head><meta http-equiv="content-type" content="text/html;charset=ISO-8859-1"><meta http-equiv="imagetoolbar" content="no"><title>Dengue - Google Scholar</title><style>#gbar,#guser{font-size:13px;padding-top:1px !important}#gbar{height:22px}#guser{padding-bottom:7px !important;text-align:right}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}@media all{.gb1{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb4{color:#00c !important}.gbi .gb4{color:#dd8e27 !important}.gbf .gb4{color:#900 !important}
+body,td,div,a{font-family:arial,sans-serif}a:link{color:#00c}a:visited{color:#551a8b}a:active{color:#f00}.gs_r{position:relative}.gs_rt{width:44.5em;position:relative}.gs_ggs{position:absolute;left:45em;top:0;white-space:nowrap}.gs_ctc{color:#00c;font-size:x-small;font-weight:bold}.gs_ctu{font-size:x-small;font-weight:bold}.gs_ctg{color:#7777cc;font-size:small;font-weight:bold}.gs_ctg2{font-size:small;font-weight:bold}.gs_r h3{display:inline;font-weight:normal;font-size:100%;margin:0}a.gs_fl:link,.gs_fl a:link{color:#7777cc}a.gs_fl:visited,.gs_fl a:visited{color:#551a8b}a.gs_fl:active,.gs_fl a:active{color:#f00}.gs_a{color:#008000}.gs_a a:link,.gs_a a:visited{color:#008000}.k{background-color:#008000}.i{color:#a90a08}.n a{font-size:10pt;color:#000}.n .i{font-size:10pt;font-weight:bold}.b{font-size:12pt;color:#00c;font-weight:bold}
+div#scife_hdr{clear:both;margin:7px 3px 7px 3px;width:100%;vertical-align:top;display:inline-block;}</style><script>
+
+function fnGw(f,x){var e=document.createElement('span');e.innerHTML='<font size='+f+'>MMMMM</font>';x.appendChild(e);return (e.offsetWidth||0)/5.0;}function fnGss(n){var s=document.styleSheets;if(s){for(var i=s.length-1;i>=0;i--)if(s[i]){var r=s[i].cssRules||s[i].rules;if(r){for(var j=r.length-1;j>=0;j--){if(r[j]&&(r[j].selectorText||'').toLowerCase()==n){return r[j].style;}}}}}return null;}function fnGacal(){var s=fnGss('.gs_ggs');var q=fnGss('.gs_rt');if(s&&q){var d=document;var x=d.createElement('div');x.setAttribute('style','position:absolute;left:-1000;top:-1000');d.body.appendChild(x);var w0=(window.innerWidth||(document.documentElement?document.documentElement.clientWidth:0)||(document.body?document.body.clientWidth:0)||0);var w3=fnGw('3',x);var w2=fnGw('2',x);d.body.removeChild(x);if(w3-w2>=2&&w3-w2>=0.15*w2&&w2>10.5&&w0<w3*60){s.left='40em';q.width='39.5em';}}}</script></head><body bgcolor="#ffffff" onload="document.gs.reset()" topmargin=2 marginheight=2><div id=gbar><nobr><a onclick=gbar.qs(this) href="http://www.google.com.br/search?hl=en&q=Dengue&sa=N&tab=sw" class=gb1>Web</a> <a onclick=gbar.qs(this) href="http://www.google.com.br/images?hl=en&q=Dengue&source=og&sa=N&tab=si" class=gb1>Images</a> <a onclick=gbar.qs(this) href="http://www.google.com.br/search?hl=en&q=Dengue&tbo=u&tbs=vid:1&source=og&sa=N&tab=sv" class=gb1>Videos</a> <a onclick=gbar.qs(this) href="http://maps.google.com.br/maps?hl=en&q=Dengue&sa=N&tab=sl" class=gb1>Maps</a> <a href="http://www.orkut.com/UniversalSearch.aspx?hl=en&tab=s0" class=gb1>Orkut</a> <a onclick=gbar.qs(this) href="http://www.google.com.br/search?hl=en&q=Dengue&tbo=u&tbs=bks:1&source=og&sa=N&tab=sp" class=gb1>Books</a> <a href="http://mail.google.com/mail/?hl=en&tab=sm" class=gb1>Gmail</a> <a href="http://www.google.com.br/intl/en/options/" class=gb1 style="text-decoration:none"><u>more</u> &raquo;</a></nobr></div><div id=guser width=100%><nobr><span id=gbn class=gbi></span><span id=gbf class=gbf></span><span id=gbe><a href="/scholar_preferences?q=Dengue&hl=en&oe=ASCII&as_sdt=1,5&as_ylo=1998" class=gb4>Scholar Preferences</a> | </span><a href="" class=gb4>Settings</a> | <a href="https://www.google.com/accounts/Login?hl=en&continue=http://scholar.google.com.br/scholar%3Fhl%3Den%26q%3DDengue%26as_sdt%3D1,5%26as_ylo%3D1998%26as_vis%3D0" class=gb4>Sign in</a></nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div><script><!--
+fnGacal();//-->
+</script><form name=gs method=GET action="/scholar"><div id="scife_hdr"><div style="float: left; margin: 0;" width="189"><a href="/schhp?hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998" target="_top"><img src="/intl/en/images/scholar_logo_md_2009.gif" width="189" height="40" alt="Scholar Home" border=0 vspace=2></a></div><div style="margin-left: 189px"><div style="margin-left: 10px"><table cellpadding=0 cellspacing=0 border=0 style="margin:1px 3px 1px 0px"><tr><td nowrap><input type=hidden name=hl value="en"><input type=hidden name=oe value="ASCII"><input type=text name=q size=41 maxlength=2048 value="Dengue"><font size=-1> <input type=submit name="btnG" value="Search"></font></td><td>&nbsp;&nbsp;</td><td valign="top" nowrap><font size=-2><a href="/advanced_scholar_search?q=Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">Advanced Scholar Search</a></font></td></tr></table></div></div></div><table width=100% border=0 cellpadding=0 cellspacing=0 bgcolor=#dcf6db><tr><td colspan=2 bgcolor=#008000><img width=1 height=1 alt=""></td></tr><tr><td bgcolor=#dcf6db nowrap><font size=+1> <b>Scholar</b></font>&nbsp; <select name="as_sdt" onChange="document.gs.submit()"><option value="0,5">Articles and patents<option value="1,5" selected>&nbsp;&nbsp;Articles excluding patents<option value="2,5">Legal opinions and journals<option value="2003">&nbsp;&nbsp;All federal courts<option value="4,5">&nbsp;&nbsp;California courts<option value="1,5!">  Advanced search&hellip;</select> <select name="as_ylo" onChange="document.gs.submit()"><option value="">anytime<option value="2011">since 2011<option value="2010">since 2010<option value="2009">since 2009<option value="2008">since 2008<option value="2007">since 2007<option value="2006">since 2006<option value="2005">since 2005<option value="2004">since 2004<option value="2003">since 2003<option value="2002">since 2002<option value="2001">since 2001<option value="2000">since 2000<option value="1999">since 1999<option value="1998" selected>since 1998<option value="1997">since 1997<option value="1996">since 1996<option value="1995">since 1995<option value="1994">since 1994<option value="1993">since 1993<option value="1992">since 1992</select> <select name="as_vis" onChange="document.gs.submit()"><option value="0" selected>include citations<option value="1">at least summaries</select>&nbsp;&nbsp;<a href="/scholar_alerts?view_op=create_alert_options&amp;hl=en&amp;alert_query=intitle:Dengue&amp;alert_params=hl%3Den%26as_sdt%3D1,5"><img src="/scholar/scholar_envelope.png" width="22" height="19" title="Create email alert" border=0 align="texttop"></a>&nbsp;<font size=-1><a href="/scholar_alerts?view_op=create_alert_options&amp;hl=en&amp;alert_query=intitle:Dengue&amp;alert_params=hl%3Den%26as_sdt%3D1,5">Create email alert</a></font></td><td bgcolor=#dcf6db align=right nowrap><font size=-1>Results <b>1</b> - <b>10</b> of about <b>38,500</b>.   (<b>0.07</b> sec)&nbsp;</font></td></tr></table></form>  <p><div class=gs_r><div class=gs_rt><h3><a href="http://ukpmc.ac.uk/abstract/MED/20205587"><b>Dengue </b>hemorrhagic fever: the sensitivity and specificity of the world health organization definition for identification of severe cases of <b>dengue </b>in Thailand, 1994-2005.</a></h3></div><font size=-1><span class=gs_a>A Srikiatkhachorn, RV Gibbons, S Green&hellip; - &hellip; infectious diseases: an &hellip;, 2010 - ukpmc.ac.uk</span><br>Either your web browser doesn&#39;t support Javascript or it is currently turned off. In the latter <br>
+case, please turn on Javascript support in your web browser and reload this page.  <b> ... Dengue</b> <br>
+hemorrhagic fever: the sensitivity and specificity of the world health organization definition <b> ...</b> <br><span class=gs_fl><a href="/scholar?cites=5383287083163243071&amp;as_sdt=2005&amp;sciodt=1,5&amp;hl=en&amp;oe=ASCII">Cited by 1</a> - <a href="/scholar?q=related:P9LkixRHtUoJ:scholar.google.com/&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">Related articles</a> - <a href="http://74.125.155.132/scholar?q=cache:P9LkixRHtUoJ:scholar.google.com/+Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">Cached</a> - <a href="/scholar?cluster=5383287083163243071&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">All 3 versions</a></span></font>  </div>    <p><div class=gs_r><div class=gs_rt><h3><span class=gs_ctu>[CITATION]</span> Experts Find Explanation for Severe <b>Dengue </b>Illness</h3></div><font size=-1><span class=gs_a>D Fever - Science, 2010</span></font>  </div>    <p><div class=gs_r><div class=gs_rt><h3><a href="http://linkinghub.elsevier.com/retrieve/pii/S0924857910002608"><b>Dengue </b>conundrums</a></h3></div><font size=-1><span class=gs_a>RV Gibbons - International Journal of Antimicrobial Agents, 2010 - Elsevier</span><br><b>Dengue</b> virus is the most common arboviral infection of humans in the subtropical and subtropical <br>
+regions of the world. This review briefly describes some of the challenges it presents. <b>Dengue</b> <br>
+is an emerging disease; it is increasing in geographical distribution and severity, despite <b> ...</b> <br><span class=gs_fl><a href="/scholar?cluster=11429898657314297331&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">All 2 versions</a></span></font>  </div>      <p><div class=gs_r><div class=gs_rt><h3><a href="http://linkinghub.elsevier.com/retrieve/pii/S0140673607616870"><b>Dengue</b></a></h3></div><font size=-1><span class=gs_a>SB Halstead - The Lancet, 2007 - Elsevier</span><br>The four <b>dengue</b> viruses are transmitted in tropical countries that circle the globe. All can cause <br>
+syndromes that are self-limited or severe. The common severe syndrome�<b>dengue</b> haemorrhagic <br>
+fever/<b>dengue</b> shock syndrome (DHF/DSS)�is characterised by sudden vascular <b> ...</b> <br><span class=gs_fl><a href="/scholar?cites=3417023684260787401&amp;as_sdt=2005&amp;sciodt=1,5&amp;hl=en&amp;oe=ASCII">Cited by 323</a> - <a href="/scholar?q=related:yfR1ye-0ay8J:scholar.google.com/&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">Related articles</a> - <a href="http://direct.bl.uk/research/59/58/RN020679293.html?source=googlescholar">BL Direct</a> - <a href="/scholar?cluster=3417023684260787401&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">All 15 versions</a></span></font>  </div>    <p><div class=gs_r><div class=gs_rt><h3><a href="http://jvi.asm.org/cgi/content/abstract/83/9/4338">Crystal structure of <b>dengue </b>virus type 1 envelope protein in the postfusion conformation and its implications for membrane fusion</a></h3></div><span class="gs_ggs gs_fl"><a href="http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2668458/"><span class=gs_ctg2>[HTML]</span> from nih.gov</a></span><font size=-1><span class=gs_a>V Nayak, M Dessau, K Kucera, K Anthony&hellip; - Journal of  &hellip;, 2009 - Am Soc Microbiol</span><br><b>Dengue</b> virus relies on a conformational change in its envelope protein, E, to fuse the viral lipid <br>
+membrane with the endosomal membrane and thereby deliver the viral genome into the <br>
+cytosol. We have determined the crystal structure of a soluble fragment E (sE) of <b>dengue  ...</b> <br><span class=gs_fl><a href="/scholar?cites=5518460411739197686&amp;as_sdt=2005&amp;sciodt=1,5&amp;hl=en&amp;oe=ASCII">Cited by 15</a> - <a href="/scholar?q=related:9lDFpIGClUwJ:scholar.google.com/&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">Related articles</a> - <a href="/scholar?cluster=5518460411739197686&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">All 7 versions</a></span></font>  </div>    <p><div class=gs_r><div class=gs_rt><h3><a href="http://jama.ama-assn.org/cgi/content/abstract/299/2/214"><b>Dengue </b>and hemorrhagic fever: a potential threat to public health in the United States</a></h3></div><font size=-1><span class=gs_a>DM Morens&hellip; - Jama, 2008 - Am Med Assoc</span><br>Although asymptomatic and mild cases are common, classic <b>dengue</b> fever is clinically similar <br>
+to influenza, but with a variable and generally unimpressive maculopapular exanthem. 14 <b>Dengue</b> <br>
+hemorrhagic fever and its severe or fatal form, <b>dengue</b> shock syndrome (DSS), are <b> ...</b> <br><span class=gs_fl><a href="/scholar?cites=8698791595171327919&amp;as_sdt=2005&amp;sciodt=1,5&amp;hl=en&amp;oe=ASCII">Cited by 53</a> - <a href="/scholar?q=related:ry9XHd5QuHgJ:scholar.google.com/&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">Related articles</a> - <a href="http://direct.bl.uk/research/49/00/RN221686330.html?source=googlescholar">BL Direct</a> - <a href="/scholar?cluster=8698791595171327919&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">All 3 versions</a></span></font>  </div>    <p><div class=gs_r><div class=gs_rt><h3><a href="http://cat.inist.fr/?aModele=afficheN&amp;cpsidt=20321022">&hellip;  in global gene expression in peripheral blood mononuclear cells indicate a significant role of the innate responses in progression of <b>dengue </b>fever but not <b>dengue </b> &hellip;</a></h3></div><font size=-1><span class=gs_a>S Ubol, P Masrinoul, J Chaijaruwanich&hellip; - The Journal of  &hellip;, 2008 - cat.inist.fr</span><br>Background. <b>Dengue</b> virus infection causes an array of symptoms ranging from <b>dengue</b> fever <br>
+(DF) to <b>dengue</b> hemorrhagic fever (DHF). The pathophysiological processes behind these 2 <br>
+clinical manifestations are unclear. Methods. In the present study, genomewide <b> ...</b> <br><span class=gs_fl><a href="/scholar?cites=3534433813742775596&amp;as_sdt=2005&amp;sciodt=1,5&amp;hl=en&amp;oe=ASCII">Cited by 31</a> - <a href="/scholar?q=related:LG1EztPUDDEJ:scholar.google.com/&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">Related articles</a> - <a href="http://direct.bl.uk/research/57/03/RN228672980.html?source=googlescholar">BL Direct</a> - <a href="/scholar?cluster=3534433813742775596&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">All 4 versions</a></span></font>  </div>    <p><div class=gs_r><div class=gs_rt><h3><a href="http://www.sciencemag.org/cgi/content/full/319/5871/1834??sa_campaign=Email/subcol/--/">Structure of the immature <b>dengue </b>virus at low pH primes proteolytic maturation</a></h3></div><font size=-1><span class=gs_a>I Yu - Science, 2008 - sciencemag.org</span><br>Intracellular cleavage of immature flaviviruses is a critical step in assembly that generates the <br>
+membrane fusion potential of the E glycoprotein. With cryo�electron microscopy we show that <br>
+the immature <b>dengue</b> particles undergo a reversible conformational change at low pH <b> ...</b> <br><span class=gs_fl><a href="/scholar?cites=12154255209545942912&amp;as_sdt=2005&amp;sciodt=1,5&amp;hl=en&amp;oe=ASCII">Cited by 56</a> - <a href="/scholar?q=related:gEdtkreWrKgJ:scholar.google.com/&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">Related articles</a> - <a href="http://direct.bl.uk/research/42/02/RN226659510.html?source=googlescholar">BL Direct</a> - <a href="/scholar?cluster=12154255209545942912&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">All 7 versions</a></span></font>  </div>    <p><div class=gs_r><div class=gs_rt><h3><span class=gs_ctc>[HTML]</span> <a href="http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2824905/">IFITM Proteins Mediate the Innate Immune Response to Influenza A H1N1 Virus, West Nile Virus and <b>Dengue </b>Virus</a></h3></div><span class="gs_ggs gs_fl"><a href="http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2824905/"><span class=gs_ctg2>[HTML]</span> from nih.gov</a></span><font size=-1><span class=gs_a>AL Brass, IC Huang, Y Benita, SP John&hellip; - Cell, 2009 - ncbi.nlm.nih.gov</span><br>Influenza viruses exploit host cell machinery to replicate, resulting in epidemics of respiratory <br>
+illness. In turn, the host expresses anti-viral restriction factors to defend against infection. To find <br>
+host-cell modifiers of influenza A H1N1 viral infection, we used a functional genomic <b> ...</b> <br><span class=gs_fl><a href="/scholar?q=related:nHWbNn8pFzkJ:scholar.google.com/&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">Related articles</a></span></font>  </div>    <p><div class=gs_r><div class=gs_rt><h3><span class=gs_ctc>[HTML]</span> <a href="http://espace.library.uq.edu.au/view/UQ:190069">Antigenic and structural analysis of the NS1 glycoprotein of <b>dengue </b>virus</a></h3></div><span class="gs_ggs gs_fl"><a href="http://espace.library.uq.edu.au/view/UQ:190069"><span class=gs_ctg2>[HTML]</span> from uq.edu.au</a></span><font size=-1><span class=gs_a>C Bletchly - 2010 - espace.library.uq.edu.au</span><br>The <b>dengue</b> virus protein NS1, is the first non-structural protein to be translated in the <b>dengue</b> <br>
+virus genome. It is glycosylated, which is unusual for a viral non structural protein and exists <br>
+as an oligomer in its native form. The function of NS1 is still largely unknown however <b> ...</b> <br><span class=gs_fl><a href="http://74.125.155.132/scholar?q=cache:xDHWaVha_KUJ:scholar.google.com/+Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">Cached</a> - <a href="/scholar?cluster=11960534046168461764&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998">All 2 versions</a></span></font>  </div>  <br><a href="/scholar_alerts?view_op=create_alert_options&amp;hl=en&amp;alert_query=intitle:Dengue&amp;alert_params=hl%3Den%26as_sdt%3D1,5"><img src="/scholar/scholar_envelope.png" width="22" height="19" title="Create email alert" border=0 align="texttop"></a>&nbsp;<a href="/scholar_alerts?view_op=create_alert_options&amp;hl=en&amp;alert_query=intitle:Dengue&amp;alert_params=hl%3Den%26as_sdt%3D1,5">Create email alert</a><br clear=all><br><div class=n><table align=center border=0 cellpadding=0 cellspacing=0 width="1%"><tr align=center valign=top><td valign=bottom nowrap><font size=-1>Result&nbsp;Page:&nbsp;</font></td><td><img src="/intl/en/nav_first.gif" width="18" height="26" border=0><br></td><td><img src="/intl/en/nav_current.gif" width="16" height="26" border=0><br><span class="i">1</span></td><td><a href="/scholar?start=10&amp;q=Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998"><img src="/intl/en/nav_page.gif" width="16" height="26" border=0><br>2</a></td><td><a href="/scholar?start=20&amp;q=Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998"><img src="/intl/en/nav_page.gif" width="16" height="26" border=0><br>3</a></td><td><a href="/scholar?start=30&amp;q=Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998"><img src="/intl/en/nav_page.gif" width="16" height="26" border=0><br>4</a></td><td><a href="/scholar?start=40&amp;q=Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998"><img src="/intl/en/nav_page.gif" width="16" height="26" border=0><br>5</a></td><td><a href="/scholar?start=50&amp;q=Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998"><img src="/intl/en/nav_page.gif" width="16" height="26" border=0><br>6</a></td><td><a href="/scholar?start=60&amp;q=Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998"><img src="/intl/en/nav_page.gif" width="16" height="26" border=0><br>7</a></td><td><a href="/scholar?start=70&amp;q=Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998"><img src="/intl/en/nav_page.gif" width="16" height="26" border=0><br>8</a></td><td><a href="/scholar?start=80&amp;q=Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998"><img src="/intl/en/nav_page.gif" width="16" height="26" border=0><br>9</a></td><td><a href="/scholar?start=90&amp;q=Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998"><img src="/intl/en/nav_page.gif" width="16" height="26" border=0><br>10</a></td><td nowrap><a href="/scholar?start=10&amp;q=Dengue&amp;hl=en&amp;oe=ASCII&amp;as_sdt=1,5&amp;as_ylo=1998"><img src="/intl/en/nav_next.gif" width="100" height="26" border=0><br><span class="b">Next</span></a></td></tr></table></div><center><br clear=all><br><table cellspacing=0 cellpadding=0 border=0 width="100%"><tr><td class=k><img height=1 alt="" width=1></td></tr><tr><td align=center bgcolor=#dcf6db>&nbsp;<br><table border=0 cellpadding=0 cellspacing=0 align=center><tr><td nowrap><form method=GET action=/scholar><font size=-1><input type=text name=q size=31 maxlength=2048 value="Dengue"><input type=submit name=btnG VALUE="Search"><input type=hidden name=hl value="en"><input type=hidden name=oe value="ASCII"><input type=hidden name=as_sdt value="1,5"><input type=hidden name=as_ylo value="1998"></font></form></td></tr></table><br></td></tr><tr><td class=k><img height=1 alt="" width=1></td></tr></table></center><center><p><font size=-1><a href="http://www.google.com.br/webhp?hl=en&amp;oe=ASCII" target="_top">Go to Google Home</a> <span>-</span> <a href="http://www.google.com.br/intl/en/about.html" target="_top">About Google</a> <span>-</span> <a href="/intl/en/scholar/about.html" target="_top">About Google Scholar</a></font><p><font size=-2>&copy;2011 Google</font></center></body></html>

File scholar/scholar/settings.py

+# Scrapy settings for scholar project
+#
+# For simplicity, this file contains only the most important settings by
+# default. All the other settings are documented here:
+#
+#     http://doc.scrapy.org/topics/settings.html
+#
+
+BOT_NAME = 'scholar'
+BOT_VERSION = '1.0'
+
+SPIDER_MODULES = ['scholar.spiders']
+NEWSPIDER_MODULE = 'scholar.spiders'
+DEFAULT_ITEM_CLASS = 'scholar.items.ScholarItem'
+ITEM_PIPELINES=['scholar.pipelines.ScholarPipeline']
+USER_AGENT = '%s/%s' % (BOT_NAME, BOT_VERSION)
+
+# FEED Configuration
+
+FEED_URI = 'file:///tmp/refs.json'
+FEED_FORMAT = 'jsonlines'
+
+
+

File scholar/scholar/settings.pyc

Binary file added.

File scholar/scholar/spiders/__init__.py

+# This package will contain the spiders of your Scrapy project
+#
+# To create the first spider for your project use this command:
+#
+#   scrapy genspider myspider myspider-domain.com
+#
+# For more info see:
+# http://doc.scrapy.org/topics/spiders.html

File scholar/scholar/spiders/__init__.pyc

Binary file added.

File scholar/scholar/spiders/pubmed.py

+import re
+
+from scrapy.selector import HtmlXPathSelector
+from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
+from scrapy.contrib.spiders import CrawlSpider, Rule
+from scholar.items import PubmedItem
+
+class CrawlSpider(CrawlSpider):
+    name = 'pubmed'
+    allowed_domains = ['www.ncbi.nlm.nih.gov']
+    start_urls = ['http://www.ncbi.nlm.nih.gov/pmc/']
+
+    rules = (
+        Rule(SgmlLinkExtractor(allow=r'Items/'), callback='parse_item', follow=True),
+    )
+
+    def parse_item(self, response):
+        hxs = HtmlXPathSelector(response)
+        i = ScholarItem()
+        #i['domain_id'] = hxs.select('//input[@id="sid"]/@value').extract()
+        #i['name'] = hxs.select('//div[@id="name"]').extract()
+        #i['description'] = hxs.select('//div[@id="description"]').extract()
+        return i

File scholar/scholar/spiders/pubmed.pyc

Binary file added.

File scholar/scholar/spiders/scholar_spyder.py

+from scrapy.spider import BaseSpider
+from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
+from scrapy.contrib.spiders import CrawlSpider, Rule
+from scrapy.http.request.form import FormRequest
+from scholar.items import ScholarItem
+from BeautifulSoup import BeautifulSoup
+import re
+
+class ScholarSpider(CrawlSpider):
+    name = "gscholar"
+    allowed_domains = ["scholar.google.com"]
+    start_urls = [
+        "http://scholar.google.com.br/scholar?hl=en&q=Dengue&as_sdt=1%2C5&as_ylo=1998&as_vis=0"
+    ]
+    rules = (
+             Rule(SgmlLinkExtractor(allow=(re.compile('/scholar.bib?'), ))), 
+             )
+    
+#    def start_requests(self):
+#        return [FormRequest("http://scholar.google.com.br/scholar",
+#                            formdata={'hl': 'en', 'q': 'Dengue', 'as_sdt':'1%2C5','as_ylo':'1998', 'as_vis':'0'})]
+
+        
+    def parse(self, response):
+        filename = response.url.split("/")[-2]
+        open(filename, 'wb').write(response.body)
+        soup = BeautifulSoup(response.body)
+        citations = 0
+        results = []
+        limit = 10
+        terms='Dengue'
+        for record in soup.findAll('div', attrs= {'class':'gs_r'}):
+#            print record.findAll(text="[CITATION]")
+            if record.findAll(text="[CITATION]") : 
+                continue #Skip citation items
+                citations +=1
+            # Included error checking
+            topPart = record.first('div', {'class': 'gs_rt'})   
+            try: #Some entries don't have URLs
+                pubURL = topPart.a['href']
+            except TypeError:
+                pubURL = ""
+            # Clean up the URL, make sure it does not contain '\' but '/' instead
+            pubURL = pubURL.replace('\\', '/')
+
+            pubTitle = ""
+            try:
+                for part in topPart.a.contents:
+                    pubTitle += str(part)
+            except AttributeError:
+                continue # citation items don't have titla in <a></a> and are not scraped
+#            print "==> URL: ", pubURL
+#            print "==> Title: ",  pubTitle
+            if pubTitle == "":
+                match1 = re.findall('<b>\[CITATION\]<\/b><\/font>(.*)- <a',str(record))
+                match2 = re.split('- <a',match1[citations])
+                pubTitle = re.sub('<\/?(\S)+>',"",match2[0])
+                citations = citations + 1
+           
+            authorPart = record.findAll('span', {'class': 'gs_a'})[0].string
+            if str(authorPart)=='Null': 
+                authorPart = ''
+                # Sometimes even BeautifulSoup can fail, fall back to regex
+                m = re.findall('<font size="-1">(.*)</font>', str(record))
+                if len(m)>0:
+                    authorPart = m[0]
+            num = authorPart.count(" - ")
+#            print "==> Authors: ", authorPart
+            # Assume that the fields are delimited by ' - ', the first entry will be the
+            # list of authors, the last entry is the journal URL, anything in between
+            # should be the journal year
+            idx_start = authorPart.find(' - ')
+            idx_end = authorPart.rfind(' - ')
+            pubAuthors = authorPart[:idx_start]             
+            pubJournalYear = authorPart[idx_start + 3:idx_end]
+            pubJournalURL = authorPart[idx_end + 3:]
+            # If (only one ' - ' is found) and (the end bit contains '\d\d\d\d')
+            # then the last bit is journal year instead of journal URL
+            if pubJournalYear=='' and re.search('\d\d\d\d', pubJournalURL)!=None:
+                pubJournalYear = pubJournalURL
+                pubJournalURL = ''
+                           
+            pubAbstract = ""
+            match = re.search("Cited by ([^<]*)", str(record))
+            pubCitation = ''
+            if match != None:
+                pubCitation = match.group(1)
+            itemdict = {
+                "URL": pubURL,
+                "Title": pubTitle,
+                "Authors": pubAuthors,
+                "JournalYear": pubJournalYear,
+                "JournalURL": pubJournalURL,
+                "Abstract": pubAbstract,
+                "NumCited": pubCitation,
+                "Terms": terms
+            }
+            it = ScholarItem()
+            for k, v in itemdict.iteritems():
+                it[k]=v 
+            results.append(it)
+#        print results
+        return results
+

File scholar/scholar/spiders/scholar_spyder.pyc

Binary file added.

File scholar/scrapy.cfg

+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# http://doc.scrapy.org/topics/scrapyd.html
+
+[settings]
+default = scholar.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = scholar