Source

mycloud / src / mycloud / resource.py

#!/usr/bin/env python

'''Resources used for input and output to mycloud.

Typically these represent files (or parts of files).
'''
import blocked_table
import csv
import os

class ResourceException(Exception):
  pass


class Resource(object):
  def __init__(self, filename):
    self.filename = filename

  def __repr__(self):
    return self.__class__.__name__ + ':' + self.filename

  def exists(self):
    return os.path.exists(self.filename)

  def move(self, source, dest):
    os.rename(source, dest)

class CSV(Resource):
  class Writer(object):
    def __init__(self, file):
      self.file = open(file, 'w')
      self.csvwriter = csv.writer(self.file)

    def __del__(self):
      self.file.flush()
      self.file.close()

    def add(self, k, v):
      self.csvwriter.writerow([k, v])

  class Reader(object):
    def __init__(self, file):
      self.csvreader = csv.reader(open(file))

    def __iter__(self):
      for row in self.csvreader:
        yield row[0], row[1:]

  def reader(self):
    return CSV.Reader(self.filename)

  def writer(self):
    return CSV.Writer(self.filename)


class BlockedTable(Resource):
  def __init__(self, filename, blocksize=blocked_table.DEFAULT_BLOCK_SIZE):
    Resource.__init__(self, filename)
    self.blocksize = blocksize

  def reader(self):
    return blocked_table.Table(self.filename).iterator()

  def writer(self):
    return blocked_table.TableBuilder(self.filename,
                                      max_block_size=self.blocksize)

  def move(self, source, dest):
    os.rename(source, dest)
    os.rename(source + '.index', dest + '.index')


class SequenceFile(Resource):
  class Reader(object):
    def __init__(self, range):
      self.range = range

    def __iter__(self):
      for i in self.range:
        yield i, i

  def __init__(self, range):
    Resource.__init__(self, 'range(%d)' % len(range))
    self.range = range

  def reader(self):
    return SequenceFile.Reader(self.range)

  def writer(self):
    raise ResourceException, 'SequenceFile does not support writing.'


class MemoryFile(Resource):
  class Writer(object):
    def __init__(self, data):
      self.data = data

    def add(self, k, v):
      self.data.append((k, v))

  def __repr__(self):
    return 'MemoryFile(data = %s)' % self.data

  def __init__(self):
    self.filename = 'MemoryFile'
    self.data = []

  def reader(self):
    return iter(self.data)

  def writer(self):
    return MemoryFile.Writer(self.data)


class FilteredTableIterator(object):
  def __init__(self, iterator, key_start, key_end):
    self.iterator = iterator
    self.key_start = key_start
    self.key_end = key_end

  def __iter__(self):
    self.iterator.seek(self.key_start)

    for k, v in self.iterator:
      if k >= self.key_end:
        break
      yield k, v

    raise StopIteration