Source

hn-wh / posts.py

Full commit
posts = {
    '201311': 'https://news.ycombinator.com/item?id=6653437',
    '201310': 'http://news.ycombinator.com/item?id=6475879',
    '201309': 'https://news.ycombinator.com/item?id=6310234',
    '201308': 'http://news.ycombinator.com/item?id=6139927',
    '201307': 'http://news.ycombinator.com/item?id=5970187',
    '201306': 'http://news.ycombinator.com/item?id=5803764',
    '201305': 'http://news.ycombinator.com/item?id=5637663',
    '201304': 'http://news.ycombinator.com/item?id=5472746',
    '201303': 'https://news.ycombinator.com/item?id=5304169',
    '201302': 'https://news.ycombinator.com/item?id=5150834',
    '201301': 'http://news.ycombinator.com/item?id=4992617',

    '201212': 'http://news.ycombinator.com/item?id=4857714',
    '201211': 'http://news.ycombinator.com/item?id=4727241',
    '201210': 'http://news.ycombinator.com/item?id=4596375',
    '201209': 'https://news.ycombinator.com/item?id=4463689',
    '201208': 'http://news.ycombinator.com/item?id=4323597',
    '201207': 'http://news.ycombinator.com/item?id=4184755',
    '201206': 'http://news.ycombinator.com/item?id=4053076',
    '201205': 'http://news.ycombinator.com/item?id=3913997',
    '201204': 'http://news.ycombinator.com/item?id=3783657',
    '201203': 'http://news.ycombinator.com/item?id=3652041',
    '201202': 'http://news.ycombinator.com/item?id=3537881',
    '201201': 'http://news.ycombinator.com/item?id=3412900',

    '201112': 'http://news.ycombinator.com/item?id=3300290',
    '201111': 'https://news.ycombinator.com/item?id=3181796',
    '201110': 'http://news.ycombinator.com/item?id=3060221',
    '201109': 'http://news.ycombinator.com/item?id=2949787',
    '201108': 'http://news.ycombinator.com/item?id=2831646',
    '201107': 'http://news.ycombinator.com/item?id=2719028',
    '201106': 'http://news.ycombinator.com/item?id=2607052',
    '201105': 'https://news.ycombinator.com/item?id=2503204',
    '201104': 'http://news.ycombinator.com/item?id=2396027',
    # Note: There seems to be 4 posts on march 2011, this is the one with most
    # comments
    '201103': 'http://news.ycombinator.com/item?id=2270790',
    '201102': 'http://news.ycombinator.com/item?id=2161360',
    '201101': 'http://news.ycombinator.com/item?id=2057704',
}


def post_file(date):
    return 'posts/{}.html'.format(date)


def get(date):
    with open(post_file(date)) as fo:
        return fo.read()


if __name__ == '__main__':
    import requests
    from os.path import isfile

    for date, url in posts.items():
        print(date, end=' ')
        filename = post_file(date)
        if isfile(filename):
            print('[SKIP]')
            continue

        resp = requests.get(url)
        if not resp.ok:
            raise SystemExit('error: {} - {}'.format(date, resp.reason))

        with open(filename, 'w') as out:
            out.write(resp.content.decode('utf8'))
        print('[OK]')