Snippets

DavidC dframe-scatter

Created by David Cuddihy last modified
#!/usr/bin/env python
"""
:Synopsis: Scatter a single dataframe over files with paths containing
    yyyy/mm/dd sections corresponding to the entries.

:Example Usage:

dframe-scatter "/tmp/me.acr/blah/{yyyy}/{mm}/{dd}/lgm/rets/cm" < rets.csv 
"""
import os
import click
import csv
import errno
from retrying import retry
import mint.disk.FolderKit as FK


@click.command(help=__doc__)
@click.option('-i', type=click.File('r'), default='-')
@click.argument('tpath', type=str)
def main(i,tpath):
    rdr = csv.reader(i)
    hdr = rdr.next()
    for row in rdr:
        yyyy,mm,dd = row[0].split('-')
        path = tpath.format(yyyy=yyyy,mm=mm,dd=dd)
        write_row(path, hdr, row)


def write_row(tgtfile, hdr, row):
    fldr, fn = os.path.split(tgtfile)
    tmpf=FK.make(fldr)
    ensure_flder(tmpf)
    with open(tgtfile,'w') as fp:
        wr = csv.writer(fp)
        wr.writerows([hdr,row])
        del wr

@retry(stop_max_attempt_number=5)
def ensure_flder(tmpf):
    # Make sure parent folder exists but be mindful of race-conditions due to
    #   similar programs being run at the same time.
    #   (e.g. seems other people have run into similar mkdir/stat issues 
    #   like [this](https://github.com/substack/node-mkdirp/pull/92) ) Their
    #   remedy was to retry the mkdir/stat-pair.
    #
    tmpf.touch()
    if not tmpf.exists():
        raise Exception("Touch failed")


if __name__ == '__main__':
    main()
++ lgm-calc-risk-model

Computers / CPU cores / Max jobs to run
1:local / 7 / 7

Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete
ETA: 2s 0left 0.98avg  local:0/3096/100%/1.0s       
++ lgm-calc-signals

Computers / CPU cores / Max jobs to run
1:local / 7 / 7

Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete
ETA: 7s 0left 2.27avg  local:0/26/100%/2.3s   
++ lgm-calc-simple-fps

Computers / CPU cores / Max jobs to run
1:local / 7 / 7

Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete
local:7/0/100%/0.0s Traceback (most recent call last):
  File "/opt/bbginger/current/bin/dframe-scatter", line 49, in <module>
    main()
  File "/usr/local/lib/python2.7/dist-packages/click/core.py", line 610, in __call__
    return self.main(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/click/core.py", line 590, in main
    rv = self.invoke(ctx)
  File "/usr/local/lib/python2.7/dist-packages/click/core.py", line 782, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/usr/local/lib/python2.7/dist-packages/click/core.py", line 416, in invoke
    return callback(*args, **kwargs)
  File "/opt/bbginger/current/bin/dframe-scatter", line 25, in main
    write_row(path, hdr, row)
  File "/opt/bbginger/current/bin/dframe-scatter", line 39, in write_row
    raise e
OSError: [Errno 2] No such file or directory: '/opt/var/me.acr/ginger/nobody/2014/04/09/lgm/factor-pflos-weights/simple'
ETA: 13s 0left 3.85avg  local:0/26/100%/3.9s   
++ lgm-calc-real-weights
11-18 21:00 dframe-slice: ERROR    Failed.
Traceback (most recent call last):
  File "/opt/bbginger/current/bin/dframe-slice", line 27, in main
    df = pd.DataFrame.from_csv(fp_in)
  File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line 1027, in from_csv
    infer_datetime_format=infer_datetime_format)
  File "/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.py", line 465, in parser_f
    return _read(filepath_or_buffer, kwds)
  File "/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.py", line 241, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
  File "/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.py", line 557, in __init__
    self._make_engine(self.engine)
  File "/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.py", line 694, in _make_engine
    self._engine = CParserWrapper(self.f, **self.options)
  File "/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.py", line 1061, in __init__
    self._reader = _parser.TextReader(src, **kwds)
  File "pandas/parser.pyx", line 512, in pandas.parser.TextReader.__cinit__ (pandas/parser.c:4792)
ValueError: No columns to parse from file

Comments (0)

HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.