Commits

Anonymous committed 8a6fb26

Importing the initial version of HTML::Strip::Whitespace from the hard-disk

Comments (0)

Files changed (7)

+Revision history for Perl extension HTML::Strip::Whitespace.
+
+0.01  Fri Nov 12 21:56:34 2004
+	- original version; created by h2xs 1.23 with options
+		-b 5.4.0 -X HTML::Strip::Whitespace
+
+Changes
+Makefile.PL
+MANIFEST
+README
+t/HTML-Strip-Whitespace.t
+lib/HTML/Strip/Whitespace.pm
+META.yml                                 Module meta-data (added by MakeMaker)
+# http://module-build.sourceforge.net/META-spec.html
+#XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
+name:         HTML-Strip-Whitespace
+version:      0.1.0
+version_from: lib/HTML/Strip/Whitespace.pm
+installdirs:  site
+requires:
+
+distribution_type: module
+generated_by: ExtUtils::MakeMaker version 6.17
+use 5.004;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+    NAME              => 'HTML::Strip::Whitespace',
+    VERSION_FROM      => 'lib/HTML/Strip/Whitespace.pm', # finds $VERSION
+    PREREQ_PM         => {}, # e.g., Module::Name => 1.1
+    ($] >= 5.005 ?     ## Add these new keywords supported since 5.005
+      (ABSTRACT_FROM  => 'lib/HTML/Strip/Whitespace.pm', # retrieve abstract from module
+       AUTHOR         => 'Shlomi Fish <shlomi@mandrakesoft.com>') : ()),
+);
+HTML-Strip-Whitespace version 0.01
+==================================
+
+The README is used to introduce the module and provide instructions on
+how to install the module, any machine dependencies it may have (for
+example C compilers and installed libraries) and any other information
+that should be provided before the module is installed.
+
+A README file is required for CPAN modules since CPAN extracts the
+README file from a module distribution so that people browsing the
+archive can use it get an idea of the modules uses. It is usually a
+good idea to provide version information here so that people can
+decide whether fixes for the module are worth downloading.
+
+INSTALLATION
+
+To install this module type the following:
+
+   perl Makefile.PL
+   make
+   make test
+   make install
+
+DEPENDENCIES
+
+This module requires these other modules and libraries:
+
+  blah blah blah
+
+COPYRIGHT AND LICENCE
+
+Put the correct copyright and licence information here.
+
+Copyright (C) 2004 by Shlomi Fish
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.8.5 or,
+at your option, any later version of Perl 5 you may have available.
+
+

lib/HTML/Strip/Whitespace.pm

+package HTML::Strip::Whitespace;
+
+use 5.004;
+use strict;
+
+use HTML::TokeParser::Simple;
+
+require Exporter;
+use AutoLoader qw(AUTOLOAD);
+use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+@ISA = qw(Exporter);
+
+# Items to export into callers namespace by default. Note: do not export
+# names by default without a very good reason. Use EXPORT_OK instead.
+# Do not simply export all your public functions/methods/constants.
+
+# This allows declaration	use HTML::Strip::Whitespace ':all';
+# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
+# will save memory.
+%EXPORT_TAGS = ( 'all' => [ qw(
+	html_strip_whitespace
+) ] );
+
+@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+@EXPORT = qw(
+	
+);
+
+$VERSION = '0.1.0';
+
+# Preloaded methods go here.
+
+sub html_strip_whitespace
+{
+    my $source = shift;
+    my $out_fh = shift;
+    my %args = (@_);
+    my $strip_newlines = $args{'strip_newlines'} || 0;
+    
+    my $parser = HTML::TokeParser::Simple->new($source);
+
+    my $out = sub {
+        my $what = shift;
+        if (ref($out_fh) eq "CODE")
+        {
+            &{$out_fh}($what);
+        }
+        elsif (ref($out_fh) eq "SCALAR")
+        {
+            $$out_fh .= $what;
+        }
+        elsif (ref($out_fh) eq "GLOB")
+        {
+            print {*{$out_fh}} $what;
+        }
+    };
+
+    my $token;
+    while ($token = $parser->get_token)
+    {
+        $out->($token->as_is());
+    }
+
+    # Return 0 on success.
+    return 0;
+}
+
+
+# Autoload methods go after =cut, and are processed by the autosplit program.
+
+1;
+__END__
+# Below is stub documentation for your module. You'd better edit it!
+
+=head1 NAME
+
+HTML::Strip::Whitespace - Perl extension for blah blah blah
+
+=head1 SYNOPSIS
+
+  use HTML::Strip::Whitespace;
+  blah blah blah
+
+=head1 DESCRIPTION
+
+Stub documentation for HTML::Strip::Whitespace, created by h2xs. It looks like the
+author of the extension was negligent enough to leave the stub
+unedited.
+
+Blah blah blah.
+
+=head2 EXPORT
+
+None by default.
+
+
+
+=head1 SEE ALSO
+
+Mention other useful documentation such as the documentation of
+related modules or operating system documentation (such as man pages
+in UNIX), or any relevant external documentation such as RFCs or
+standards.
+
+If you have a mailing list set up for your module, mention it here.
+
+If you have a web site set up for your module, mention it here.
+
+=head1 AUTHOR
+
+Shlomi Fish, E<lt>shlomi@mandrakesoft.comE<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2004 by Shlomi Fish
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.8.5 or,
+at your option, any later version of Perl 5 you may have available.
+
+
+=cut

t/HTML-Strip-Whitespace.t

+#!/usr/bin/perl -w
+
+use strict;
+
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use Test::More tests => 7;
+
+BEGIN
+{
+    use_ok('HTML::Strip::Whitespace', "html_strip_whitespace"); # TEST
+}
+
+sub get_html
+{
+    my $source = shift;
+    my $buffer = "";
+    html_strip_whitespace(\$source, \$buffer, @_);
+    return $buffer;
+}
+
+
+{
+    my $in = "<html><body><p>Hello world!</p></body></html>";
+
+    my $expected_with_newlines = $in;
+
+    my $expected_wo_newlines = $in;
+
+    my $result_with_newlines = get_html($in, 'strip_newlines' => 0);
+    my $result_wo_newlines = get_html($in, 'strip_newlines' => 1);
+
+    # TEST
+    is($result_with_newlines, $expected_with_newlines, "Do Nothing - w Newlines");
+    # TEST
+    is($result_wo_newlines, $expected_wo_newlines, "Do Nothing - wo Newlines");
+}
+
+{
+    my $in = <<"EOF";
+<html>
+    <body>
+        <p>
+        Hello world!
+        </p>
+    </body>
+</html>
+EOF
+
+    my $expected_with_newlines = <<"EOF";
+<html>
+<body>
+<p>
+Hello world!
+</p>
+</body>
+</html>
+EOF
+
+    my $expected_wo_newlines = <<"EOF";
+<html><body><p>Hello world!</p></body></html>
+EOF
+
+    my $result_with_newlines = get_html($in, 'strip_newlines' => 0);
+    my $result_wo_newlines = get_html($in, 'strip_newlines' => 1);
+
+    # TEST
+    is($result_with_newlines, $expected_with_newlines, "Simple Test #1 - w Newlines");
+    # TEST
+    is($result_wo_newlines, $expected_wo_newlines, "Simple Test #1 - wo Newlines");
+}
+
+
+{
+    my $in = <<"EOF";
+<html>
+<body>
+<p>Hello world!</p>
+</body>
+</html>
+EOF
+
+    my $expected_with_newlines = $in;
+
+    my $expected_wo_newlines = $in;
+    $expected_wo_newlines =~ s/\n//g;
+
+    my $result_with_newlines = get_html($in, 'strip_newlines' => 0);
+    my $result_wo_newlines = get_html($in, 'strip_newlines' => 1);
+
+    # TEST
+    is($result_with_newlines, $expected_with_newlines, "Simple #1 - w Newlines");
+    # TEST
+    is($result_wo_newlines, $expected_wo_newlines, "Simple #2 - wo Newlines");
+}
+