Chrysoberyl / src / chrysoberyl / localrepos.py

# encoding: UTF-8

import codecs
import os
import re

import yaml
try:
    from yaml import CDumper as Dumper
except ImportError:
    from yaml import Dumper

from chrysoberyl.util import get_it


def bitbucket_repos(data):
    for key in data:
        if data[key]['type'] != 'Distribution':
            continue
        if 'bitbucket' not in data[key]:
            continue
        (user, repo) = data[key]['bitbucket'].split('/')
        yield (key, user, repo)


def for_each_repo(data, clone_dir, fun):
    cwd = os.getcwd()
    count = 0
    for (distribution, user, repo) in sorted(bitbucket_repos(data)):
        if user != 'catseye':
            print "#-- non-catseye distribution: %s" % \
              (distribution)
            continue
        os.chdir(os.path.join(clone_dir, repo))
        result = fun(distribution, repo)
        if result != False:
            count += 1
    os.chdir(cwd)
    return count


DOC_PATTERNS = (
    r'^LICENSE$',
    r'^UNLICENSE$',
    r'^README',
    r'^.*?\.html$',
    r'^.*?\.markdown$',
    r'^.*?\.txt$',
    r'^.*?\.lhs$',
)


def troll_docs(data, clone_dir, data_dir):
    docdict = {}

    def troll_repo(distribution, repo):
        if data[distribution].get('skip-docs', False):
            print "#!! Skipping %s" % distribution
            return
        docs = []
        for root, dirnames, filenames in os.walk('.'):
            if root.endswith(".hg"):
                del dirnames[:]
                continue
            for filename in filenames:
                for pattern in DOC_PATTERNS:
                    if re.match(pattern, filename):
                        path = os.path.join(root, filename)[2:]
                        docs.append(path)
                        break
        docdict[distribution] = docs

    count = for_each_repo(data, clone_dir, troll_repo)
    
    docdata = {
        'Documentation Index': {
            'type': 'Metanode',
            'entries': docdict,
        }
    }
    output_filename = os.path.join(data_dir, 'documentation.yaml')
    with codecs.open(output_filename, 'w', 'utf-8') as file:
        file.write('# encoding: UTF-8\n')
        file.write('# AUTOMATICALLY GENERATED BY chrysoberyl.py\n')
        file.write(yaml.dump(docdata, Dumper=Dumper, default_flow_style=False))
    print "Doc lists extracted from %d clones." % count


def survey_repos(data, clone_dir):
    repos = {}

    def survey_repo(distribution, repo):
        print repo
        dirty = get_it("hg st")
        #outgoing = get_it("hg out")
        outgoing = ''
        if 'no changes found' in outgoing:
            outgoing = ''
        tags = {}
        latest_tag = None
        for line in get_it("hg tags").split('\n'):
            match = re.match(r'^\s*(\S+)\s+(\d+):(.*?)\s*$', line)
            if match:
                tag = match.group(1)
                tags[tag] = int(match.group(2))
                if tag != 'tip' and latest_tag is None:
                    latest_tag = tag
        due = ''
        diff = ''
        if latest_tag is None:
            due = 'NEVER RELEASED'
        else:
            diff = get_it('hg diff -r %s -r tip -X .hgtags' % latest_tag)
            if not diff:
                due = ''
            else:
                due = "%d changesets (tip=%d, %s=%d)" % \
                    ((tags['tip'] - tags[latest_tag]), tags['tip'], latest_tag, tags[latest_tag])
        repos[repo] = {
            'dirty': dirty,
            'outgoing': outgoing,
            'tags': tags,
            'latest_tag': latest_tag,
            'due': due,
            'diff': diff,
        }

    count = for_each_repo(data, clone_dir, survey_repo)

    print '-----'
    for repo in sorted(repos.keys()):
        r = repos[repo]
        if r['dirty'] or r['outgoing'] or r['due']:
            print repo
            if r['dirty']:
                print r['dirty']
            if r['outgoing']:
                print r['outgoing']
            #print r['tags']
            #print len(r['diff'])
            if r['due']:
                print "  DUE:", r['due']
            print
    print '-----'
    print "%d repos checked." % count


def test_repos(data, clone_dir):
    """STUB"""
    repos = {}

    def test_repo(distribution, repo):
        print repo
        print get_it("grep -rI falderal .")

    for_each_repo(data, clone_dir, test_repo)


OK_ROOT_FILES = (
    'LICENSE', 'UNLICENSE',
    'README.markdown', 'TODO.markdown', 'HISTORY.markdown',
    'test.sh', 'clean.sh',
    'make.sh', 'make-cygwin.sh', 'Makefile',
    '.hgtags', '.hgignore', '.gitignore',
)
OK_ROOT_DIRS = (
    'bin', 'contrib', 'demo', 'dialect',
    'disk', 'doc', 'ebin', 'eg',
    'impl', 'lib', 'priv', 'script',
    'src', 'tests',
    '.hg',
)

def lint_dists(data, clone_dir, host_language):
    problems = {}

    def lint_repo(distribution, repo):
        problems[distribution] = []
        show_it = True
        if host_language is not None:
            show_it = False
            for key in data:
                if data[key]['type'] != 'Implementation':
                    continue
                if distribution in data[key].get('in-distributions', []):
                    if data[key]['host-language'] == host_language:
                        show_it = True
                        break
        if not show_it:
            return False
        # Begin linting
        if not os.path.exists('README.markdown'):
            problems[distribution].append("No README.markdown")
        if not os.path.exists('LICENSE') and not os.path.exists('UNLICENSE'):
            problems[distribution].append("No LICENSE or UNLICENSE")
        if os.path.exists('LICENSE') and os.path.exists('UNLICENSE'):
            problems[distribution].append("Both LICENSE and UNLICENSE")
        for root, dirnames, filenames in os.walk('.'):
            if root.endswith(".hg"):
                del dirnames[:]
                continue
            if root == '.':
                root_files = []
                for filename in filenames:
                    if filename not in OK_ROOT_FILES:
                        root_files.append(filename)
                if root_files:
                    problems[distribution].append("Junk files in root: %s" % root_files)

                root_dirs = []
                for dirname in dirnames:
                    if dirname not in OK_ROOT_DIRS:
                        root_dirs.append(dirname)
                if root_dirs:
                    problems[distribution].append("Junk dirs in root: %s" % root_dirs)

        #~ version, revision = project.get_latest_version_and_revision()
        #~ distname = "%s-%s-%s" % (project.name, version, revision)
        #~ distfile = os.path.join("distfiles", "%s.zip" % distname)
        #~ if not isfile(distfile):
            #~ report.write("XXX No modernly-named distfile\n")
            #~ disturl = project.get_latest_explicit_dist_url()
            #~ match = re.match(r'^http://catseye\.tc/distfiles/(.*?)\.zip$', disturl)
            #~ if not match:
                #~ report.write("XXX Explicit distfile is not on catseye.tc or not a zipfile\n\n")
                #~ continue
            #~ distname = match.group(1)
            #~ distfile = "distfiles/%s.zip" % distname

    count = for_each_repo(data, clone_dir, lint_repo)

    problematic_count = 0
    for d in sorted(problems.keys()):
        if not problems[d]:
            continue        
        print d
        print '-' * len(d)
        print
        for problem in problems[d]:
            print "* %s" % problem
        print
        problematic_count += 1

    print "Linted %d clones, problems in %d of them." % (count, problematic_count)


def get_latest_release_tag(data, repo_name, clone_dir):
    result = {}

    def find_it(distribution, repo):
        if repo != repo_name:
            return

        latest_tag = None
        for line in get_it("hg tags").split('\n'):
            match = re.match(r'^\s*(\S+)\s+(\d+):(.*?)\s*$', line)
            if match:
                tag = match.group(1)
                if tag != 'tip' and latest_tag is None:
                    latest_tag = tag
        
        result[repo] = latest_tag

    for_each_repo(data, clone_dir, find_it)

    return result[repo_name]
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.