Commits

Shlomi Fish committed ea732c2

Aded the copied skeleton of WWW-Search-AOL - to be adapted from
WWW-Search-MSN.

  • Participants
  • Parent commits 178b357

Comments (0)

Files changed (9)

modules/WWW-Search-AOL/Build.PL

+use strict;
+use warnings;
+use Module::Build;
+
+my $builder = Module::Build->new(
+    module_name         => 'WWW::Search::MSN',
+    license             => 'bsd',
+    dist_author         => 'Shlomi Fish <shlomif@iglu.org.il>',
+    dist_version_from   => 'lib/WWW/Search/MSN.pm',
+    requires => {
+        'Test::More' => 0,
+        'WWW::Search' => 2.523,
+        'WWW::Search::Test' => 2.265,
+        'HTML::TreeBuilder' => 0,
+        'Scalar::Util' => 0,
+    },
+    add_to_cleanup      => [ 'WWW-Search-MSN-*' ],
+    create_makefile_pl => 'traditional',
+);
+
+$builder->create_build_script();

modules/WWW-Search-AOL/Changes

+Revision history for WWW-Search-MSN
+
+0.0101  Sun Apr 16 14:40:22 IDT 2006
+        - Fixed the POD from its module-starter leftovers.
+        - Added a note about the Subversion repository to the POD.
+
+0.01    Sat Apr 15 21:55:05 IDT 2006
+        First version, released on an unsuspecting world. Works from what
+        I tested.

modules/WWW-Search-AOL/MANIFEST

+Build.PL
+Changes
+lib/WWW/Search/MSN.pm
+MANIFEST
+META.yml # Will be created by "make dist"
+README
+t/00-load.t
+t/01basic.t
+t/pod-coverage.t
+t/pod.t
+Makefile.PL

modules/WWW-Search-AOL/README

+WWW-Search-MSN
+
+This is a WWW::Search backend for http://search.msn.com/. It was written
+and is maintained by Shlomi Fish and was funded by the following companies:
+
+* deviateMEDIA - http://www.deviatemedia.com/
+* Red Tree Systems - http://www.redtreesystems.com/
+
+INSTALLATION
+
+To install this module, run the following commands:
+
+    perl Build.PL
+    ./Build
+    ./Build test
+    ./Build install
+
+You should probably use your CPAN or CPANPLUS interface like so:
+
+$ perl -MCPAN -e 'install WWW::Search::MSN'
+
+or
+
+$ perl -MCPANPLUS -e 'install WWW::Search::MSN'
+
+COPYRIGHT AND LICENCE
+
+Copyright (C) 2006 Shlomi Fish
+
+This program is released under the MIT X11 license.
+

modules/WWW-Search-AOL/lib/WWW/Search/MSN.pm

+package WWW::Search::MSN;
+
+use warnings;
+use strict;
+
+require WWW::Search;
+
+use WWW::SearchResult;
+use Encode;
+
+use Scalar::Util ();
+
+=head1 NAME
+
+WWW::Search::MSN - backend for searching search.msn.com
+
+=head1 VERSION
+
+Version 0.01
+
+=cut
+
+our $VERSION = '0.0101';
+
+use vars qw(@ISA);
+
+@ISA=(qw(WWW::Search));
+
+=head1 SYNOPSIS
+
+This module provides a backend of L<WWW::Search> to search using 
+L<http://search.msn.com/>.
+
+    use WWW::Search;
+
+    my $oSearch = WWW::Search->new("MSN");
+
+=head1 FUNCTIONS
+
+All of these functions are internal to the module and are of no concern
+of the user.
+
+=head2 native_setup_search()
+
+This function sets up the search.
+
+=cut
+
+sub native_setup_search
+{
+    my ($self, $native_query, $opts) = @_;
+
+    $self->{'_hits_per_page'} = 10;
+
+    $self->user_agent('non-robot');
+
+    $self->{'_next_to_retrieve'} = 1;
+
+    $self->{'search_base_url'} ||= 'http://search.msn.com';
+    $self->{'search_base_path'} ||= '/results.aspx';
+
+    if (!defined($self->{'_options'}))
+    {
+        $self->{'_options'} = +{
+            'q' => $native_query,
+            'FORM' => "PORE",
+        };
+    }
+    my $self_options = $self->{'_options'};
+
+    if (defined($opts))
+    {
+        foreach my $k (keys %$opts)
+        {
+            if (WWW::Search::generic_option($k))
+            {
+                if (defined($opts->{$k}))
+                {
+                    $self->{$k} = $opts->{$k};
+                }
+            }
+            else
+            {
+                if (defined($opts->{$k}))
+                {
+                    $self_options->{$k} = $opts->{$k};
+                }
+            }
+        }
+    }
+
+    $self->{'_next_url'} = $self->{'search_base_url'} . $self->{'search_base_path'} . '?' . $self->hash_to_cgi_string($self_options);
+    $self->{'_MSN_first_retrieve_call'} = 1;
+}
+
+=head2 parse_tree()
+
+This function parses the tree and fetches the results.
+
+=cut
+
+sub parse_tree
+{
+    my ($self, $tree) = @_;
+
+    if ($self->{'_MSN_first_retrieve_call'})
+    {
+        $self->{'_MSN_first_retrieve_call'} = undef;
+        
+        my $header_div = $tree->look_down("_tag", "div", "id", "header");
+
+        my $h5 = $header_div->look_down("_tag", "h5");
+
+        if ($h5->as_text() =~ m{^\s*Page\s*\d+\s*of\s*([\d,]+)\s*results})
+        {
+            my $n = $1;
+            $n =~ tr/,//d;
+            $self->approximate_result_count($n);
+        }
+    }
+
+    my $results_div = $tree->look_down("_tag", "div", "id", "results");
+    my $results_ul = $results_div->look_down("_tag", "ul");
+    my @items;
+    @items = (grep { Scalar::Util::blessed($_) && ($_->tag() eq "li") } $results_ul->content_list());
+
+    my $hits_found = 0;
+    foreach my $item (@items)
+    {
+        my $h3 = $item->look_down("_tag", "h3");
+        my ($a_tag) = (grep { $_->tag() eq "a" } $h3->content_list());
+        my ($p_tag) = (grep { $_->tag() eq "p" } $item->content_list());
+        my $url = $a_tag->attr("href");
+
+        my $hit = WWW::SearchResult->new();
+        $hit->add_url($url);
+        $hit->title($a_tag->as_text());
+        $hit->description(defined($p_tag) ? $p_tag->as_text() : "");
+        push @{$self->{'cache'}}, $hit;
+        $hits_found++;
+    }
+
+    # Get the next URL
+    {
+        my $pagination_div = $tree->look_down("_tag", "div", "id", "pagination_bottom");
+        my @li_tags = $pagination_div->look_down("_tag", "li");
+        foreach my $li (@li_tags)
+        {
+            my ($a_tag) = (grep { Scalar::Util::blessed($_) && ($_->tag() eq "a") } $li->content_list());
+            if (!$a_tag)
+            {
+                next;
+            }
+            if ($a_tag->as_text() eq "Next")
+            {
+                $self->{'_next_url'} =
+                    $self->absurl(
+                        $self->{'_prev_url'},
+                        $a_tag->attr('href')
+                    );
+                last;
+            }
+        }
+    }
+    return $hits_found;
+}
+
+=head2 preprocess_results_page()
+
+The purpose of this function is to decode the HTML text as returned by
+search.msn.com as UTF-8.
+
+=cut
+
+sub preprocess_results_page
+{
+    my $self = shift;
+    my $contents = shift;
+
+    return decode('UTF-8', $contents);
+}
+
+=head1 AUTHOR
+
+Shlomi Fish, C<< <shlomif@iglu.org.il> >>
+
+Funded by L<http://www.deviatemedia.com/> and
+L<http://www.redtreesystems.com/>.
+
+=head1 BUGS
+
+Please report any bugs or feature requests to
+C<bug-www-search-msn@rt.cpan.org>, or through the web interface at
+L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WWW-Search-MSN>.
+I will be notified, and then you'll automatically be notified of progress on
+your bug as I make changes.
+
+=head1 ACKNOWLEDGEMENTS
+
+Funded by L<http://www.deviatemedia.com/> and
+L<http://www.redtreesystems.com/>.
+
+=head1 DEVELOPMENT
+
+Source code is version-controlled in a Subversion repository in Berlios:
+
+L<http://svn.berlios.de/svnroot/repos/web-cpan/WWW-Search/trunk/>
+
+One can find the most up-to-date version there.
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2006 Shlomi Fish, all rights reserved.
+
+This program is released under the following license: MIT X11 (a BSD-style
+license).
+
+=cut
+
+1; # End of WWW::Search::MSN

modules/WWW-Search-AOL/t/00-load.t

+#!perl -T
+
+use Test::More tests => 1;
+
+BEGIN {
+    # TEST
+	use_ok( 'WWW::Search::MSN' );
+}
+

modules/WWW-Search-AOL/t/01basic.t

+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use Test::More tests => 108;
+
+# TEST
+BEGIN { use_ok('WWW::Search'); };
+# TEST
+BEGIN { use_ok('WWW::Search::Test'); };
+# TEST
+BEGIN { use_ok('WWW::Search::MSN'); };
+
+$WWW::Search::Test::oSearch = new WWW::Search('MSN');
+# TEST
+isa_ok ($WWW::Search::Test::oSearch, "WWW::Search");
+$WWW::Search::Test::oSearch->env_proxy('yes');
+
+my $debug = 0;
+my $dump  = 0;
+
+$debug = 0;
+$dump = 0;
+
+my $count = 
+    WWW::Search::Test::count_results(
+        'normal',
+        'firs' . 't co' . 'me f' . 'irst se' . 'rved',
+        0,
+        49,
+        $debug,
+        $dump
+    );
+
+# TEST
+ok (($WWW::Search::Test::oSearch->approximate_result_count() =~ /^\d+$/),
+    "approximate_result_count is a number");
+
+# TEST
+ok (($WWW::Search::Test::oSearch->approximate_result_count() > 0),
+    "approximate_result_count is greater than 0");
+
+# TEST
+is ($count, 50, "Checking for count");
+
+my @results = $WWW::Search::Test::oSearch->results();
+
+# TEST
+is (scalar(@results), 50, "Checking for results");
+
+# TEST*2*50
+foreach my $r (@results)
+{
+    like ($r->url(), qr{\Ahttp://},
+        'Result URL is http');
+    ok ((length($r->title()) > 0), "Has a non-empty title");
+}
+

modules/WWW-Search-AOL/t/pod-coverage.t

+#!perl -T
+
+use Test::More;
+eval "use Test::Pod::Coverage 1.04";
+plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" if $@;
+all_pod_coverage_ok();

modules/WWW-Search-AOL/t/pod.t

+#!perl -T
+
+use Test::More;
+eval "use Test::Pod 1.14";
+plan skip_all => "Test::Pod 1.14 required for testing POD" if $@;
+all_pod_files_ok();