Shlomi Fish avatar Shlomi Fish committed 8b2d32e

Updated to the new search.aol.com and WWW::Search. Bumped the ver to 0.0103.

Comments (0)

Files changed (4)

modules/WWW-Search-AOL/Build.PL

 
 my $builder = Module::Build->new(
     module_name         => 'WWW::Search::AOL',
-    license             => 'bsd',
+    license             => 'mit',
     dist_author         => 'Shlomi Fish <shlomif@iglu.org.il>',
     dist_version_from   => 'lib/WWW/Search/AOL.pm',
     requires => {
         'Test::More' => 0,
-        'WWW::Search' => 2.523,
-        'WWW::Search::Test' => 2.265,
+        'WWW::Search' => 2.565,
+        'WWW::Search::Test' => 2.287,
         'HTML::TreeBuilder' => 0,
         'Scalar::Util' => 0,
     },

modules/WWW-Search-AOL/Changes

 Revision history for WWW-Search-AOL
 
+0.0103 Wed Mar 25 18:03:37 IST 2009
+    - Updated to the recent markup as returned by search.aol.com
+    - Updated to the recent UTF-8 handling of WWW::Search, and now
+    depending on it in Build.PL.
+
 0.0102 Sat May  6 19:18:31 IDT 2006
     More searches with no results fixes.
 

modules/WWW-Search-AOL/lib/WWW/Search/AOL.pm

 
 =head1 VERSION
 
-Version 0.01
+Version 0.0103
 
 =cut
 
-our $VERSION = '0.0102';
+our $VERSION = '0.0103';
 
 use vars qw(@ISA);
 
     {
         $self->{'_AOL_first_retrieve_call'} = undef;
 
-        my $nohit_div = $tree->look_down("_tag", "div", "id", "nohit");
+        my $nohit_div = $tree->look_down("_tag", "div", "class", "NH");
 
         if (defined($nohit_div))
         {
             }
         }
 
-        my $wr_div = $tree->look_down("_tag", "div", "id", "wr");
+        my $wr_div = $tree->look_down("_tag", "div", "class", "BB");
 
         if (!defined($wr_div))
         {
             return $self->_no_hits();
         }
 
-        if ($wr_div->as_text() =~ m{page 1 of (\d+)})
+        # A word separator that includes whitespace and &nbsp; (\x{a0}.
+        my $word_sep = qr/[\s\x{a0}]+/;
+
+        if (my ($n) = 
+            (
+                $wr_div->as_text() =~ 
+                m/of${word_sep}about${word_sep}([\d,]+)/
+            )
+        )
         {
-            my $n = $1;
-            $self->approximate_result_count($n*10);
+            $n =~ tr/,//d;
+            $self->approximate_result_count($n);
         }
     }
 
 
 =cut
 
-    my $wr_div = $tree->look_down("_tag", "div", "id", "wr");
-    my $r_web_div = $wr_div->look_down("_tag", "div", "class", "r-web");
-    my @results_divs = $r_web_div->look_down("_tag", "div", "id", qr{^r\d+$});
+    my $r_web_div = $tree->look_down("_tag", "ul", "content", "MSL");
+    my @results_divs = $r_web_div->look_down("_tag", "li", "about", qr{^r\d+$});
     my $hits_found = 0;
     foreach my $result (@results_divs)
     {
-        if ($result->attr('id') !~ m/^r(\d+)$/)
+        if ($result->attr('about') !~ m/^r(\d+)$/)
         {
             die "Broken Parsing. Please contact the author to fix it.";
         }
         my $id_num = $1;
-        my $url_tag = $result->look_down("_tag", "b", "id", "ldurl$id_num");
-        my $desc_tag = $result->look_down("_tag", "p", "id", "ldesc$id_num");
-        my $a_tag = $result->look_down("_tag", "a", "id", "lrurl$id_num");
+        my $desc_tag = $result->look_down("_tag", "p", "property", "f:desc");
+        my $a_tag = $result->look_down("_tag", "a", "class", "find");
         my $hit = WWW::SearchResult->new();
-        $hit->add_url($url_tag->as_text());
+        $hit->add_url($a_tag->attr("href"));
         $hit->description($desc_tag->as_text());
         $hit->title($a_tag->as_text());
         push @{$self->{'cache'}}, $hit;
 
     # Get the next URL
     {
-        my $pagination_div = $tree->look_down("_tag", "div", "class", "pagination");
-        my @a_tags = $pagination_div->look_down("_tag", "a");
-        # The reverse() is because it seems the "next" link is at the end.
+        my $span_next_page = $tree->look_down("_tag", "span", "class", "gspPageNext");
+        my @a_tags = $span_next_page->look_down("_tag", "a");
+        # The reverse() is there because it seems the "next" link is at 
+        # the end.
         foreach my $a_tag (reverse(@a_tags))
         {
-            if ($a_tag->as_text() =~ "next")
+            if ($a_tag->as_text() =~ "Next")
             {
                 $self->{'_next_url'} =
                     $self->absurl(
     return $hits_found;
 }
 
+
+=begin Removed
+
 =head2 preprocess_results_page()
 
-The purpose of this function is to decode the HTML text as returned by
-search.aol.com as UTF-8.
-
-=cut
+The purpose of this function was to decode the HTML text as returned by
+search.aol.com as UTF-8. But it seems recent versions of WWW::Search already
+have a similar mechanism.
 
 sub preprocess_results_page
 {
     return decode('UTF-8', $contents);
 }
 
+=end Removed
+
+=cut
+
 =head1 AUTHOR
 
 Shlomi Fish, C<< <shlomif@iglu.org.il> >>

modules/WWW-Search-AOL/t/01basic.t

 # TEST*2*50
 foreach my $r (@results)
 {
-    like ($r->url(), qr{\Ahttp://},
+    like ($r->url(), qr{\A(?:http|https)?://},
         'Result URL is http');
     ok ((length($r->title()) > 0), "Has a non-empty title");
 }
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.