pep376 / pkgutil.py

# -*- coding: utf8 -*-
""" PEP 376
"""
from __future__ import with_statement
import os
from os.path import join, splitext, isdir
from os import listdir
from string import maketrans
import csv
import sys
import re

from distutils.dist  import  DistributionMetadata

#
# distutils.dist.DistributionMetadata new version
#
class _DistributionMetadata(DistributionMetadata):
    """distutils.dist.DistributionMetadata new version

    That can load an existing PKG-INFO file
    """
    def __init__ (self, file=None):
        if file is not None:
            self.read_pkg_file(file)
        else:
            self.name = None
            self.version = None
            self.author = None
            self.author_email = None
            self.maintainer = None
            self.maintainer_email = None
            self.url = None
            self.license = None
            self.description = None
            self.long_description = None
            self.keywords = None
            self.platforms = None
            self.classifiers = None
            self.download_url = None
            # PEP 314
            self.provides = None
            self.requires = None
            self.obsoletes = None

    def read_pkg_file(self, file):
        """Reads from a PKG-INFO file object and initialize the instance.
        """
        if isinstance(file, str):
            file = open(file, 'rU')

        pkg_info = file.read()
        re_options = re.I|re.DOTALL|re.M

        def _extract(fieldname):
            if fieldname == 'Description':
                # crappy, need to be reworked
                pattern = r'^Description: (.*)'
                res = re.findall(pattern, pkg_info , re_options)
                if len(res) == 0:
                    return 'UNKNOWN'
                else:
                    res = res[0].split('\n' + 8*' ')
                    res = [r for r in res if not r.startswith('\n')]
                    return '\n'.join(res) + '\n'

            pattern = r'^%s: (.*?)$' % fieldname
            res = re.findall(pattern, pkg_info , re_options)
            if fieldname in ('Classifier', 'Requires', 'Provides',
                             'Obsolete'):
                return res
            if len(res) == 0:
                return 'UNKNOWN'
            return res[0]

        version = _extract('Metadata-Version')
        self.name = _extract('Name')
        self.version = _extract('Version')
        self.summary = _extract('Summary')
        self.url = _extract('Home-page')
        self.author = _extract('Author')
        self.author_email = _extract('Author-email')
        self.license = _extract('License')
        self.download_url = _extract('Download-URL')
        self.long_description = _extract('Description')
        self.keywords = _extract('Keywords').split(',')
        self.classifiers = _extract('Classifier')
        self.platform = _extract('Platform')

        # PEP 314
        if version == '1.1':
            self.requires = _extract('Requires')
            self.provides = _extract('Provides')
            self.obsoletes = _extract('Obsoletes')
        else:
            self.requires = None
            self.provides = None
            self.obsoletes = None


#
# function used to detect a PEP 376 .egg-info directory
#
def is_egg_info(path):
    """Returns True if `path` is an egg-info directory.

    Also makes sure it doesn't pick older versions by checking
    the presence of `RECORD` and `PKG-INFO`.
    """
    if not (splitext(path)[-1].lower() == '.egg-info' and isdir(path)):
        return False
    content = os.listdir(path)
    return 'PKG-INFO' in content and 'RECORD' in content


SEP_TRANS = maketrans('/', os.path.sep)

#
# EggInfo class (with DistributionMetadata in it)
#
class EggInfo(object):

    def __init__(self, path):
        self.info_path = path
        self.pkg_info_path = join(path, 'PKG-INFO')
        self.record_path = join(path, 'RECORD')
        self.metadata = _DistributionMetadata(self.pkg_info_path)
        self.name = self.metadata.name
        self._files = None

    def __str__(self):
        return "EggInfo('%s')" % self.name

    def _read_record(self):
        """Reads RECORD."""
        files = []
        for row in csv.reader(open(self.record_path)):
            if row == []:
                continue
            location = row[0]
            md5 = len(row) > 1 and row[1] or None
            size = len(row) > 2 and row[2] or None
            files.append((location, md5, size ))
        return files

    def get_installed_files(self, local=False):
        """Iterates over the RECORD entries."""
        # the file is open once and kept in the object memory
        # to avoid spurious I/O accesses
        if self._files is None:
            self._files = self._read_record()

        # returning cross-platform *or* local paths
        for location, md5, size  in self._files:
            if local:
                location = location.translate(SEP_TRANS)
                location = join(self.info_path, location)
            yield location, md5, size

    def uses(self, path):
        """Returns True if the path is listed in the RECORD file.

        e.g. if the project uses this file.
        """
        local = os.path.exists(path)
        for location, md5, size in self.get_installed_files(local):
            if location == path:
                return True
        return False

    def get_file(self, path, binary=False):
        """Returns a file instance on the path.

        If binary is True, opens the file in binary mode.
        """
        if os.path.sep != '/':
            path = path.translate(SEP_TRANS)
        fullpath = join(self.info_path, path)
        return open(fullpath, binary and 'rb' or 'r')

#
# Directory represents a directory that contains egg-info files
#
class EggInfoDirectory(list):

    def __init__(self, path):
        self.path = path
        # filling the list once (see if it's the best way)
        # to minimize I/O
        for element in os.listdir(self.path):
            fullpath = join(self.path, element)
            if is_egg_info(fullpath):
                self.append(EggInfo(fullpath))

    def file_users(self, path):
        """Returns EggInfo instances for the projects that uses `path`."""
        for egg_info in self:
            if egg_info.uses(path):
                yield egg_info

    def owner(self, path):
        """Returns the owner of `path`."""
        users = [egg_info for egg_info in self if egg_info.uses(path)]
        if len(users) == 1:
            return users[0]
        return None

#
# Directories is a collection of directories, initialized with a
# list of paths.
#
class EggInfoDirectories(list):

    def __init__(self, paths=sys.path):
        super(EggInfoDirectories, self).__init__()
        for path in paths:
            if os.path.isdir(path):
                self.append(EggInfoDirectory(path))

    def get_egg_infos(self):
        """Iterates on all .egg-info directories founded in sys.path.

        Each returned element is an EggInfo instance.
        Uses a memory cache to minimize I/O access.
        """
        for directory in self:
            for egg_info in directory:
                yield egg_info

    def get_egg_info(self, project_name):
        """Returns an EggInfo instance for the given project name.

        If not found, returns None.
        """
        for directory in self:
            for egg_info in directory:
                if egg_info.name == project_name:
                    return egg_info

    def get_file_users(self, path):
        """Iterates over all projects to find out which project uses the file.

        Return EggInfo instances.
        """
        for directory in self:
            for egg_info in directory.file_users(path):
                yield egg_info

#
# high level APIs
#

def get_egg_infos(paths=sys.path):
    dirs = EggInfoDirectories(paths)
    return dirs.get_egg_infos()

def get_egg_info(project_name, paths=sys.path):
    dirs = EggInfoDirectories(paths)
    return dirs.get_egg_info(project_name)

def get_file_users(path, paths=sys.path):
    dirs = EggInfoDirectories(paths)
    return dirs.get_file_users(path)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.