Commits

leopku committed 2e7de15

first commit.

Comments (0)

Files changed (1)

+#!/usr/bin/env python
+
+__author__ = 'leopku@qq.com'
+__version__ = '%prog 1.0b1'
+__doc__ = '''svn clone is a kit for downloading source code through .svn info.
+Author: %s
+Version: %s
+
+History:
+    2012-04-16 
+        + startup. Inspired by http://rains.im/node/18.
+''' % (__author__, __version__)
+
+import sys
+import os
+import os.path
+import re
+import optparse
+import urllib
+import urllib2
+import urlparse
+import unittest
+
+class EntrieParser(object):
+    """docstring for EntrieParser"""
+    def __init__(self, content):
+        # super(EntrieParser, self).__init__()
+        self.content = content
+        self.pattern = r'\f\n([^\n]+?)\n([^\n]+)\n'
+        self.childrens = {'dir':[], 'file':[]}
+    
+    def feed(self):
+        iter = re.finditer(self.pattern, self.content)
+        for match in iter:
+            self.childrens[match.groups()[1]].append(match.groups()[0])
+        return self.childrens
+
+class Downloader(object):
+    """docstring for Downloader"""
+    def __init__(self):
+        #super(Downloader, self).__init__()
+        #self.url = url
+        self.headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
+        
+    def download(self, url, local_base_dir='/tmp'):
+        # download all.
+        self.url = url
+        self.local_base_dir = local_base_dir
+        netloc = urlparse.urlsplit(self.url).netloc
+        local_dir = '%s/%s' % (self.local_base_dir, netloc)
+        if not os.path.exists(local_dir):
+            os.mkdir(local_dir)
+        svn_dir_url = '%s/.svn' % self.url
+        entries_url = '%s/entries' % svn_dir_url
+        text_base_url = '%s/text-base' % svn_dir_url
+        try:
+            self.req = urllib2.Request(entries_url, headers=self.headers)
+            self.page = urllib2.urlopen(self.req)
+            self.content = self.page.read()
+            parser = EntrieParser(self.content)
+            self.childrens = parser.feed()
+            for f in self.childrens['file']:
+                # save file
+                print 'file %s/%s was saved from: %s/%s.svn-base' % (local_dir, f, text_base_url, f)
+                #urllib.urlretrieve('%s/%s' % (text_base_url, f), '%s/%s' % (local_dir, f))
+            for d in self.childrens['dir']:
+                # download from sub-dir
+                sub_dir_url = '%s/%s' % (url, d)
+                #downloader = Downloader()
+                #downloader.download(sub_dir_url)
+                Downloader().download(sub_dir_url)
+        except urllib2.HTTPError, e:
+            print '*' * 10
+
+            print self.url, f
+            #raise e
+            pass
+        except IOError, e:
+            print '*' * 10
+            print self.url, f
+            raise e
+
+class EntrieParserTestCase(unittest.TestCase):
+    def test_local(self):
+        content = open('/opt/Documents/Source/wenjiagou/1/wenjiagou/.svn/entries', 'r').read()
+        parser = EntrieParser(content)
+        childrens_list = ('manage.py', '__init__.py', 'settings.py', 'urls.py')
+        values = parser.feed().values()
+        self.assertNotEqual(childrens_list, values)
+
+    def test_online(self):
+        url = 'http://www.liveport.cn/.svn/entries'
+
+        # fake as a normal web browser.
+        headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
+        req = urllib2.Request(url, headers=headers)
+        page = urllib2.urlopen(req)
+
+        content = page.read()
+        parser = EntrieParser(content)
+        self.assertNotIn(('includes', ), parser.feed().values())
+        #self.assertNotIn(('a', ), ('a', 'b'))
+
+if __name__ == '__main__':
+    __usage__ = 'python %prog [--url=URL] <--color>'
+    __desc__ = 'svn clone is a kit for downloading source code through .svn info'
+    parser = optparse.OptionParser(usage=__usage__, version=__version__, description=__desc__)
+    parser.add_option('-c', '--color', action='store_true', help='Export output using colorized console.')
+    parser.add_option('-u', '--url', help='The url where you wanna download source code from')
+    opts, args = parser.parse_args()
+
+    if opts.url:
+        downloader = Downloader()
+        downloader.download(opts.url)
+    else:
+        sys.exit('ERROR: You must specify the --url parameter.')