Created by
markdiary
last modified
| #!/usr/bin/env perl
use strict;
use warnings;
use WWW::Mechanize;
use Web::Scraper;
use Encode;
use URI;
binmode(STDOUT, ":utf8");
my $user = 'USER NAME';
my $pass = '?? YOUR PASSWORD ??';
my $baseURL = 'http://let.hatelabo.jp/';
my $login_url = 'https://www.hatelabo.jp/login?backurl=http%3A%2F%2Flet.hatelabo.jp%2F' . $user . '%2F';
# Initialize MECH
my $mech = 'WWW::Mechanize'->new('autocheck', 1);
$mech->agent_alias("Windows Mozilla");
$mech->get($login_url);
$mech->submit_form(
form_number =>1,
fields => {
key => $user,
password => $pass,
},
);
# ユーザーページトップに移動
$mech->follow_link (url => "http://let.hatelabo.jp/".$user."/");
#
# 2ページめ 以降 20件単位(上のユーザーページはログイン後遷移させるページのためコメントアウトしない)
# $mech->follow_link (url_regex => qr/of=20$/i);
#
my $link = $mech->find_all_links(url_regex => qr#/$user/let/#);
my $link_count = scalar(@$link);
my @links;
for(my $i=0; $i< $link_count; $i++){
push (@links, $link->[$i][5]->{href});
}
# Scraping code
# default :
# rawcode => pre class="code-raw"
# doctitle => document title
#
if ($mech->success()){
for(my $j=0; $j < $link_count; $j++){
my $scraper = scraper{
process 'pre.code-raw', 'rawcode' => 'TEXT';
process 'title', 'doctitle' => 'TEXT';
};
my $uri = new URI($baseURL . $links[$j]);
my $res = $scraper->scrape($uri);
my $rawcode = $res->{rawcode};
my $filename = $res->{doctitle};
&outfile($rawcode, $filename) or die 'error';
}
}
sub outfile {
my ($rawcode, $filename) = @_;
$filename =~ s/ - Hatena::Let//;
$filename =~ s/\s/_/g;
$filename =~ s/\.$//;
open (DATAFILE, ">$filename.txt") or die return 0;
print DATAFILE encode('utf-8', $rawcode);
close(DATAFILE);
return 1;
}
__END__
- $user : はてなアカウント
- $pass : ログインパスワード
|