Snippets

Hiroaki Nakamura filter to convert delicious bookmark html to csv

Created by Hiroaki Nakamura
#!/usr/bin/env python
from datetime import datetime
from html.parser import HTMLParser
import csv
import itertools
import sys

def format_unixtime(t):
    dt = datetime.fromtimestamp(int(t))
    return dt.strftime('%Y-%m-%dT%H:%M:%S')

class MyHTMLParser(HTMLParser):
    def __init__(self, csv_file):
        super(MyHTMLParser, self).__init__()
        self.in_a = False
        self.attrs = {}
        self.csv_writer = csv.writer(csv_file)

    def handle_starttag(self, tag, attrs):
        if tag == 'a':
            self.in_a = True
            for (key, value) in attrs:
                self.attrs[key] = value

    def handle_endtag(self, tag):
        if tag == 'a':
            self.in_a = False

    def handle_data(self, data):
        if self.in_a:
            self.csv_writer.writerow([
                self.attrs.get('href'),
                data,
                self.attrs.get('private'),
                format_unixtime(self.attrs.get('add_date')),
                format_unixtime(self.attrs.get('last_visit')),
                self.attrs.get('tags'),
            ])

parser = MyHTMLParser(sys.stdout)
for chunk in sys.stdin:
    parser.feed(chunk)

Comments (0)

HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.