Source

galaxy-central (ngs) / lib / galaxy / webapps / community / controllers / upload.py

import sys, os, shutil, logging, tarfile, tempfile
from galaxy.web.base.controller import *
from galaxy.model.orm import *
from galaxy.datatypes.checkers import *
from common import *
from mercurial import hg, ui, commands

log = logging.getLogger( __name__ )

# States for passing messages
SUCCESS, INFO, WARNING, ERROR = "done", "info", "warning", "error"
CHUNK_SIZE = 2**20 # 1Mb

class UploadError( Exception ):
    pass

class UploadController( BaseController ):
    @web.expose
    @web.require_login( 'upload', use_panels=True, webapp='community' )
    def upload( self, trans, **kwd ):
        params = util.Params( kwd )
        message = util.restore_text( params.get( 'message', ''  ) )
        status = params.get( 'status', 'done' )
        commit_message = util.restore_text( params.get( 'commit_message', 'Uploaded'  ) )
        category_ids = util.listify( params.get( 'category_id', '' ) )
        categories = get_categories( trans )
        repository_id = params.get( 'repository_id', '' )
        repository = get_repository( trans, repository_id )
        repo_dir = repository.repo_path
        repo = hg.repository( get_configured_ui(), repo_dir )
        uncompress_file = util.string_as_bool( params.get( 'uncompress_file', 'true' ) )
        remove_repo_files_not_in_tar = util.string_as_bool( params.get( 'remove_repo_files_not_in_tar', 'true' ) )
        uploaded_file = None
        upload_point = self.__get_upload_point( repository, **kwd )
        # Get the current repository tip.
        tip = repository.tip
        if params.get( 'upload_button', False ):
            current_working_dir = os.getcwd()
            file_data = params.get( 'file_data', '' )
            if file_data == '':
                message = 'No files were entered on the upload form.'
                status = 'error'
                uploaded_file = None
            elif file_data not in ( '', None ):
                uploaded_file = file_data.file
                uploaded_file_name = uploaded_file.name
                uploaded_file_filename = file_data.filename
            isempty = os.path.getsize( os.path.abspath( uploaded_file_name ) ) == 0
            if uploaded_file:
                isgzip = False
                isbz2 = False
                if uncompress_file:
                    isgzip = is_gzip( uploaded_file_name )
                    if not isgzip:
                        isbz2 = is_bz2( uploaded_file_name )
                ok = True
                if isempty:
                    tar = None
                    istar = False
                else:                
                    # Determine what we have - a single file or an archive
                    try:
                        if ( isgzip or isbz2 ) and uncompress_file:
                            # Open for reading with transparent compression.
                            tar = tarfile.open( uploaded_file_name, 'r:*' )
                        else:
                            tar = tarfile.open( uploaded_file_name )
                        istar = True
                    except tarfile.ReadError, e:
                        tar = None
                        istar = False
                if istar:
                    ok, message, files_to_remove = self.upload_tar( trans,
                                                                    repository,
                                                                    tar,
                                                                    uploaded_file,
                                                                    upload_point,
                                                                    remove_repo_files_not_in_tar,
                                                                    commit_message )
                else:
                    if ( isgzip or isbz2 ) and uncompress_file:
                        uploaded_file_filename = self.uncompress( repository, uploaded_file_name, uploaded_file_filename, isgzip, isbz2 )
                    if upload_point is not None:
                        full_path = os.path.abspath( os.path.join( repo_dir, upload_point, uploaded_file_filename ) )
                    else:
                        full_path = os.path.abspath( os.path.join( repo_dir, uploaded_file_filename ) )
                    # Move the uploaded file to the load_point within the repository hierarchy.
                    shutil.move( uploaded_file_name, full_path )
                    commands.add( repo.ui, repo, full_path )
                    try:
                        commands.commit( repo.ui, repo, full_path, user=trans.user.username, message=commit_message )
                    except Exception, e:
                        # I never have a problem with commands.commit on a Mac, but in the test/production
                        # tool shed environment, it occasionally throws a "TypeError: array item must be char"
                        # exception.  If this happens, we'll try the following.
                        repo.dirstate.write()
                        repo.commit( user=trans.user.username, text=commit_message )
                    if full_path.endswith( '.loc.sample' ):
                        # Handle the special case where a xxx.loc.sample file is
                        # being uploaded by copying it to ~/tool-data/xxx.loc.
                        copy_sample_loc_file( trans, full_path )
                    handle_email_alerts( trans, repository )
                if ok:
                    # Update the repository files for browsing.
                    update_for_browsing( trans, repository, current_working_dir, commit_message=commit_message )
                    # Get the new repository tip.
                    if tip != repository.tip:
                        if ( isgzip or isbz2 ) and uncompress_file:
                            uncompress_str = ' uncompressed and '
                        else:
                            uncompress_str = ' '
                        message = "The file '%s' has been successfully%suploaded to the repository." % ( uploaded_file_filename, uncompress_str )
                        if istar and remove_repo_files_not_in_tar and files_to_remove:
                            if upload_point is not None:
                                message += "  %d files were removed from the repository relative to the selected upload point '%s'." % ( len( files_to_remove ), upload_point )
                            else:
                                message += "  %d files were removed from the repository root." % len( files_to_remove )
                    else:
                        message = 'No changes to repository.'      
                    # Set metadata on the repository tip
                    error_message, status = set_repository_metadata( trans, repository_id, repository.tip, **kwd )
                    if error_message:
                        message = '%s<br/>%s' % ( message, error_message )
                        return trans.response.send_redirect( web.url_for( controller='repository',
                                                                          action='manage_repository',
                                                                          id=repository_id,
                                                                          message=message,
                                                                          status=status ) )
                    trans.response.send_redirect( web.url_for( controller='repository',
                                                               action='browse_repository',
                                                               id=repository_id,
                                                               commit_message='Deleted selected files',
                                                               message=message,
                                                               status=status ) )
                else:
                    status = 'error'
        selected_categories = [ trans.security.decode_id( id ) for id in category_ids ]
        return trans.fill_template( '/webapps/community/repository/upload.mako',
                                    repository=repository,
                                    commit_message=commit_message,
                                    uncompress_file=uncompress_file,
                                    remove_repo_files_not_in_tar=remove_repo_files_not_in_tar,
                                    message=message,
                                    status=status )
    def upload_tar( self, trans, repository, tar, uploaded_file, upload_point, remove_repo_files_not_in_tar, commit_message ):
        # Upload a tar archive of files.
        repo_dir = repository.repo_path
        repo = hg.repository( get_configured_ui(), repo_dir )
        files_to_remove = []
        ok, message = self.__check_archive( tar )
        if not ok:
            tar.close()
            uploaded_file.close()
            return ok, message, files_to_remove
        else:
            if upload_point is not None:
                full_path = os.path.abspath( os.path.join( repo_dir, upload_point ) )
            else:
                full_path = os.path.abspath( repo_dir )
            filenames_in_archive = [ tarinfo_obj.name for tarinfo_obj in tar.getmembers() ]
            filenames_in_archive = [ os.path.join( full_path, name ) for name in filenames_in_archive ]
            # Extract the uploaded tar to the load_point within the repository hierarchy.
            tar.extractall( path=full_path )
            tar.close()
            uploaded_file.close()
            if remove_repo_files_not_in_tar and not repository.is_new:
                # We have a repository that is not new (it contains files), so discover
                # those files that are in the repository, but not in the uploaded archive.
                for root, dirs, files in os.walk( full_path ):
                    if not root.find( '.hg' ) >= 0 and not root.find( 'hgrc' ) >= 0:
                        if '.hg' in dirs:
                            # Don't visit .hg directories - should be impossible since we don't
                            # allow uploaded archives that contain .hg dirs, but just in case...
                            dirs.remove( '.hg' )
                        if 'hgrc' in files:
                             # Don't include hgrc files in commit.
                            files.remove( 'hgrc' )
                        for name in files:
                            full_name = os.path.join( root, name )
                            if full_name not in filenames_in_archive:
                                files_to_remove.append( full_name )
                for repo_file in files_to_remove:
                    # Remove files in the repository (relative to the upload point)
                    # that are not in the uploaded archive.
                    try:
                        commands.remove( repo.ui, repo, repo_file, force=True )
                    except Exception, e:
                        # I never have a problem with commands.remove on a Mac, but in the test/production
                        # tool shed environment, it throws an exception whenever I delete all files from a
                        # repository.  If this happens, we'll try the following.
                        relative_selected_file = selected_file.split( 'repo_%d' % repository.id )[1].lstrip( '/' )
                        repo.dirstate.remove( relative_selected_file )
                        repo.dirstate.write()
                        absolute_selected_file = os.path.abspath( selected_file )
                        if os.path.isdir( absolute_selected_file ):
                            try:
                                os.rmdir( absolute_selected_file )
                            except OSError, e:
                                # The directory is not empty
                                pass
                        elif os.path.isfile( absolute_selected_file ):
                            os.remove( absolute_selected_file )
                            dir = os.path.split( absolute_selected_file )[0]
                            try:
                                os.rmdir( dir )
                            except OSError, e:
                                # The directory is not empty
                                pass
            for filename_in_archive in filenames_in_archive:
                commands.add( repo.ui, repo, filename_in_archive )
                if filename_in_archive.endswith( '.loc.sample' ):
                    # Handle the special case where a xxx.loc.sample file is
                    # being uploaded by copying it to ~/tool-data/xxx.loc.
                    copy_sample_loc_file( trans, filename_in_archive )
            try:
                commands.commit( repo.ui, repo, full_path, user=trans.user.username, message=commit_message )
            except Exception, e:
                # I never have a problem with commands.commit on a Mac, but in the test/production
                # tool shed environment, it occasionally throws a "TypeError: array item must be char"
                # exception.  If this happens, we'll try the following.
                repo.dirstate.write()
                repo.commit( user=trans.user.username, text=commit_message )
            handle_email_alerts( trans, repository )
            return True, '', files_to_remove
    def uncompress( self, repository, uploaded_file_name, uploaded_file_filename, isgzip, isbz2 ):
        if isgzip:
            self.__handle_gzip( repository, uploaded_file_name )
            return uploaded_file_filename.rstrip( '.gz' )
        if isbz2:
            self.__handle_bz2( repository, uploaded_file_name )
            return uploaded_file_filename.rstrip( '.bz2' )
    def __handle_gzip( self, repository, uploaded_file_name ):
        fd, uncompressed = tempfile.mkstemp( prefix='repo_%d_upload_gunzip_' % repository.id, dir=os.path.dirname( uploaded_file_name ), text=False )
        gzipped_file = gzip.GzipFile( uploaded_file_name, 'rb' )
        while 1:
            try:
                chunk = gzipped_file.read( CHUNK_SIZE )
            except IOError, e:
                os.close( fd )
                os.remove( uncompressed )
                log.exception( 'Problem uncompressing gz data "%s": %s' % ( uploaded_file_name, str( e ) ) )
                return
            if not chunk:
                break
            os.write( fd, chunk )
        os.close( fd )
        gzipped_file.close()
        shutil.move( uncompressed, uploaded_file_name )
    def __handle_bz2( self, repository, uploaded_file_name ):
        fd, uncompressed = tempfile.mkstemp( prefix='repo_%d_upload_bunzip2_' % repository.id, dir=os.path.dirname( uploaded_file_name ), text=False )
        bzipped_file = bz2.BZ2File( uploaded_file_name, 'rb' )
        while 1:
            try:
                chunk = bzipped_file.read( CHUNK_SIZE )
            except IOError:
                os.close( fd )
                os.remove( uncompressed )
                log.exception( 'Problem uncompressing bz2 data "%s": %s' % ( uploaded_file_name, str( e ) ) )
                return
            if not chunk:
                break
            os.write( fd, chunk )
        os.close( fd )
        bzipped_file.close()
        shutil.move( uncompressed, uploaded_file_name )
    def __get_upload_point( self, repository, **kwd ):
        upload_point = kwd.get( 'upload_point', None )
        if upload_point is not None:
            # The value of upload_point will be something like: database/community_files/000/repo_12/1.bed
            if os.path.exists( upload_point ):
                if os.path.isfile( upload_point ):
                    # Get the parent directory
                    upload_point, not_needed = os.path.split( upload_point )
                    # Now the value of uplaod_point will be something like: database/community_files/000/repo_12/
                upload_point = upload_point.split( 'repo_%d' % repository.id )[ 1 ]
                if upload_point:
                    upload_point = upload_point.lstrip( '/' )
                    upload_point = upload_point.rstrip( '/' )
                # Now the value of uplaod_point will be something like: /
                if upload_point == '/':
                    upload_point = None
            else:
                # Must have been an error selecting something that didn't exist, so default to repository root
                upload_point = None
        return upload_point
    def __check_archive( self, archive ):
        for member in archive.getmembers():
            # Allow regular files and directories only
            if not ( member.isdir() or member.isfile() ):
                message = "Uploaded archives can only include regular directories and files (no symbolic links, devices, etc)."
                return False, message
            for item in [ '.hg', '..', '/' ]:
                if member.name.startswith( item ):
                    message = "Uploaded archives cannot contain .hg directories, absolute filenames starting with '/', or filenames with two dots '..'."
                    return False, message
            if member.name in [ 'hgrc' ]:
                message = "Uploaded archives cannot contain hgrc files."
                return False, message
        return True, ''