Source

heechee / svndiff.py

Full commit

import zlib


try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO

OP_SOURCE_COPY = 0
OP_TARGET_COPY = 1
OP_NEW_DATA = 2


def read_integer(handle):
    """
    Integers in svndiff are a string of bytes, with the first byte being
    1 if there's more data in the next byte.
    """
    
    number_so_far = 0
    
    while True:
        byte = ord(handle.read(1))
        # Add the lower 7 bits onto our current lot
        number_so_far += byte & 127
        # If the top byte is 1, continue.
        if byte >> 7:
            number_so_far <<= 7
        else:
            return number_so_far


def make_integer(integer):
    "Encodes an integer in the svndiff variable-length format"
    output = ""
    
    was_shift = False
    while True:
        # Encode the lower 7 bits, then shift
        lower = integer & 127
        integer >>= 7
        # Is there more?
        if was_shift:
            lower |= 128
        output = chr(lower) + output
        # If there was more, remember that
        if integer:
            was_shift = True
        # If not, we're done
        else:
            return output


def read_section(handle, current_length, use_zlib):
    "Reads a section from the handle and possibly decompresses it."
    if use_zlib:
        offset_before = handle.tell()
        original_length = read_integer(handle)
        integer_length = handle.tell() - offset_before
    else:
        integer_length = 0
    # Read the data and possible decode it
    data = handle.read(current_length - integer_length)
    #print repr(data), len(data), original_length
    # If the length is different, zlib decode it
    if use_zlib and len(data) != original_length:
        data = zlib.decompress(data[1:])
    return data


def undiff(source, diff):
    
    # Initialise the target
    target = StringIO()
    
    # Get the header and the diff version
    assert diff.read(3) == "SVN"
    version_byte = diff.read(1)
    if version_byte == "\0":
        use_zlib = False
    elif version_byte == "\1":
        use_zlib = True
    else:
        raise ValueError("Invalid svndiff version: %i" % ord(version_byte))
    
    # Get the various window information
    source_offset = read_integer(diff)
    source_length = read_integer(diff)
    target_length = read_integer(diff)
    instructions_length = read_integer(diff)
    newdata_length = read_integer(diff)
    
    #print use_zlib, source_offset, source_length, target_length, instructions_length, newdata_length
    
    # Read the instructions and newdata into their own stringios.
    # First, get the "original length" header, and see how many bits it was
    instruction_data = read_section(diff, instructions_length, use_zlib)
    newdata_data = read_section(diff, newdata_length, use_zlib)
    
    instructions = StringIO(instruction_data)
    newdata = StringIO(newdata_data)
    
    while True:
        # The first byte should be an op plus either an offset or 0s.
        try:
            first_byte = ord(instructions.read(1))
        except TypeError:
            # End of file.
            break
        
        op = first_byte >> 6      # Top 2 bytes
        length = first_byte & 63  # Get lower 6 bytes

        # If the length is zero, the length is an integer following.
        if length == 0:
            length = read_integer(instructions)
        
        if op is OP_SOURCE_COPY:
            offset = read_integer(instructions)
            source.seek(offset)
            target.write(source.read(length))
            #print "source copy", length, offset
        elif op is OP_TARGET_COPY:
            offset = read_integer(instructions)
            for i in range(length):
                target.seek(offset+i)
                byte = target.read(1)
                target.seek(0, 2)
                target.write(byte)
            #print "target copy", length, offset
        elif op is OP_NEW_DATA:
            target.write(newdata.read(length))
            #print "newdata copy", length
        else:
            raise ValueError("Unknown op")
        
    return target.getvalue()


def make_cheap_diff(text):
    "Makes a completely uncompressed svndiff of the given text."
    
    instructions = chr(OP_NEW_DATA << 6) + make_integer(len(text))
    
    return ("SVN\0" +
           "\0" + # Source offset
           "\0" + # Source length
           make_integer(len(text)) + # Target length
           make_integer(len(instructions)) + # Instructions length
           make_integer(len(text)) + # New data length
           instructions +
           text)