Source

yelp-challenge / json2h5.py

Full commit
#!/usr/bin/env python2

import pandas as pd
import json

def load_records(filename):
    with open(filename) as fo:
        return pd.DataFrame.from_records([json.loads(line) for line in fo])

def load(name):
    tp = 'yelp_phoenix_academic_dataset/yelp_academic_dataset_{}.json'
    filename = tp.format(name)
    return load_records(filename)



store = pd.HDFStore('yelp.h5')
for name in ('user', 'review', 'business', 'checkin'):
    store[name] = load(name)
store.close()