Snippets

markdiary Hatena::Let で自分の bookmarklet の Raw Code を取得、ファイル保存する

Created by markdiary last modified
#!/usr/bin/env perl
use strict;
use warnings;
use WWW::Mechanize;
use Web::Scraper;
use Encode;
use URI;

binmode(STDOUT, ":utf8");

my $user = 'USER NAME';
my $pass = '?? YOUR PASSWORD ??';
my $baseURL = 'http://let.hatelabo.jp/';
my $login_url = 'https://www.hatelabo.jp/login?backurl=http%3A%2F%2Flet.hatelabo.jp%2F' . $user . '%2F';

# Initialize MECH

my $mech = 'WWW::Mechanize'->new('autocheck', 1);
$mech->agent_alias("Windows Mozilla");
$mech->get($login_url);
$mech->submit_form( 
    form_number =>1,
    fields => {
        key => $user,
        password => $pass,
    },
);


# ユーザーページトップに移動
$mech->follow_link (url => "http://let.hatelabo.jp/".$user."/");

#
# 2ページめ 以降 20件単位(上のユーザーページはログイン後遷移させるページのためコメントアウトしない) 
# $mech->follow_link (url_regex => qr/of=20$/i);
#

my $link = $mech->find_all_links(url_regex => qr#/$user/let/#);
my $link_count = scalar(@$link);

my @links;
for(my $i=0; $i< $link_count; $i++){
    push (@links, $link->[$i][5]->{href});
}

# Scraping code
# default :
#     rawcode => pre class="code-raw"
#     doctitle => document title
#

    if ($mech->success()){
        for(my $j=0; $j < $link_count; $j++){
        my $scraper = scraper{
            process 'pre.code-raw', 'rawcode' => 'TEXT';
            process 'title', 'doctitle' => 'TEXT';
        };
        my $uri = new URI($baseURL . $links[$j]);
        my $res = $scraper->scrape($uri);
   	    my $rawcode = $res->{rawcode};
        my $filename = $res->{doctitle};
        &outfile($rawcode, $filename) or die 'error';
    }
}


sub outfile {
    my ($rawcode, $filename) = @_;
    $filename =~ s/ - Hatena::Let//;
    $filename =~ s/\s/_/g;
    $filename =~ s/\.$//;
    open (DATAFILE, ">$filename.txt") or die return 0;
    print DATAFILE encode('utf-8', $rawcode);
    close(DATAFILE);
    return 1;
}

__END__

- $user : はてなアカウント
- $pass : ログインパスワード

Comments (0)

HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.