WWW-Search Backends / modules / WWW-Search-MSN / lib / WWW / Search /

package WWW::Search::MSN;

use warnings;
use strict;

require WWW::Search;

use WWW::SearchResult;
use Encode;

use Scalar::Util ();

=head1 NAME

WWW::Search::MSN - backend for searching

=head1 VERSION

Version 0.01


our $VERSION = '0.0102';

use vars qw(@ISA);



This module provides a backend of L<WWW::Search> to search using 

    use WWW::Search;

    my $oSearch = WWW::Search->new("MSN");


All of these functions are internal to the module and are of no concern
of the user.

=head2 native_setup_search()

This function sets up the search.


sub native_setup_search
    my ($self, $native_query, $opts) = @_;

    $self->{'_hits_per_page'} = 10;


    $self->{'_next_to_retrieve'} = 1;

    $self->{'search_base_url'} ||= '';
    $self->{'search_base_path'} ||= '/results.aspx';

    if (!defined($self->{'_options'}))
        $self->{'_options'} = +{
            'q' => $native_query,
            'FORM' => "PORE",
    my $self_options = $self->{'_options'};

    if (defined($opts))
        foreach my $k (keys %$opts)
            if (WWW::Search::generic_option($k))
                if (defined($opts->{$k}))
                    $self->{$k} = $opts->{$k};
                if (defined($opts->{$k}))
                    $self_options->{$k} = $opts->{$k};

    $self->{'_next_url'} = $self->{'search_base_url'} . $self->{'search_base_path'} . '?' . $self->hash_to_cgi_string($self_options);
    $self->{'_MSN_first_retrieve_call'} = 1;

=head2 parse_tree()

This function parses the tree and fetches the results.


sub parse_tree
    my ($self, $tree) = @_;

    if ($self->{'_MSN_first_retrieve_call'})
        $self->{'_MSN_first_retrieve_call'} = undef;
        my $header_div = $tree->look_down("_tag", "div", "id", "header");

        my $h5 = $header_div->look_down("_tag", "h5");

        if ($h5->as_text() =~ m{^\s*Page\s*\d+\s*of\s*([\d,]+)\s*results})
            my $n = $1;
            $n =~ tr/,//d;

    my $results_div = $tree->look_down("_tag", "div", "id", "results");
    my $results_ul = $results_div->look_down("_tag", "ul");
    my @items;
    @items = (grep { Scalar::Util::blessed($_) && ($_->tag() eq "li") } $results_ul->content_list());

    my $hits_found = 0;
    foreach my $item (@items)
        my $h3 = $item->look_down("_tag", "h3");
        my ($a_tag) = (grep { $_->tag() eq "a" } $h3->content_list());
        my ($p_tag) = (grep { $_->tag() eq "p" } $item->content_list());
        my $url = $a_tag->attr("href");

        my $hit = WWW::SearchResult->new();
        $hit->description(defined($p_tag) ? $p_tag->as_text() : "");
        push @{$self->{'cache'}}, $hit;

    # Get the next URL
        my $pagination_div = $tree->look_down("_tag", "div", "id", "pagination_bottom");
        my @li_tags = $pagination_div->look_down("_tag", "li");
        foreach my $li (@li_tags)
            my ($a_tag) = (grep { Scalar::Util::blessed($_) && ($_->tag() eq "a") } $li->content_list());
            if (!$a_tag)
            if ($a_tag->as_text() eq "Next")
                $self->{'_next_url'} =
    return $hits_found;

=head2 preprocess_results_page()

The purpose of this function is to decode the HTML text as returned by as UTF-8.


sub preprocess_results_page
    my $self = shift;
    my $contents = shift;

    return decode('UTF-8', $contents);

=head1 AUTHOR

Shlomi Fish, C<< <> >>

Funded by L<> and

=head1 BUGS

Please report any bugs or feature requests to
C<>, or through the web interface at
I will be notified, and then you'll automatically be notified of progress on
your bug as I make changes.


Funded by L<> and


Source code is version-controlled in a Subversion repository in Berlios:


One can find the most up-to-date version there.


Copyright 2006 Shlomi Fish, all rights reserved.

This program is released under the following license: MIT X11 (a BSD-style


1; # End of WWW::Search::MSN
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.