Commits

Burak Gürsoy committed e23cc2f

Handle embedded parens in a better way.
Set the ua parser name only if it wasn't set inside the actual parser.
Fixes for Maxthon and Android stupidities.
Fix for emacs parser.

Comments (0)

Files changed (11)

 Revision history for Perl extension Parse::HTTP::UserAgent
 
+0.35 Mon May 14 03:53:18 2012
+   => New .NET string support.
+   => Handle embedded parens in a better way.
+   => Set the ua parser name only if it wasn't set inside the actual parser.
+   => Fixes for Maxthon and Android stupidities.
+   => Fix for emacs parser.
+
 0.34 Sun Apr  8 00:08:41 2012
    => "normalize" option.
    => Fix RT#76351 reported by DAMOG.

lib/Parse/HTTP/UserAgent.pm

 use warnings;
 use vars qw( $VERSION );
 
-$VERSION = '0.34';
+$VERSION = '0.35';
 
 use base qw(
     Parse::HTTP::UserAgent::Base::IS
     my $self = shift;
     $self->[IS_MAXTHON] = index(uc $self->[UA_STRING], 'MAXTHON') != NO_IMATCH;
     my $ua = $self->[UA_STRING];
-    my($moz, $thing, $extra, @others) = split RE_SPLIT_PARSE, $ua;
+
+    my @parts;
+    my $i     = 0;
+    my $depth = 0;
+    foreach my $token ( split RE_SPLIT_PARSE, $ua ) {
+        if ( $token eq '(' ) {
+            $i++ if ++$depth == 1;
+            next;
+        }
+        if ( $token eq ')' ) {
+            $i++ if --$depth == 0;
+            next;
+        }
+        push @{ $parts[$i] ||= [] }, $token;
+    }
+
+    # Hopefully the above code was successful and now we can set the actual
+    # tokens to use inside parsers.
+    my($moz)    = join ' ', @{ shift(@parts) || []  };
+    my($thing)  = join ' ', @{ shift(@parts) || []  };
+    my($extra)  = join ' ', @{ shift(@parts) || []  };
+    my(@others) = map { @{ $_ } } @parts;
+
     $thing = $thing ? [ split RE_SC_WS, $thing ] : [];
     $extra = [ split RE_WHITESPACE, $extra ] if $extra;
+
     $self->_debug_pre_parse( $moz, $thing, $extra, @others ) if DEBUG;
     return $moz, $thing, $extra, @others;
 }
         my $method = '_parse_' . $pname;
         my $rvx    = $self->$method( @{ $rv } );
         if ( $rvx ) {
-            $self->[UA_PARSER] = $pname;
+            $self->[UA_PARSER] ||= $pname;
             return $rvx;
         }
     }

lib/Parse/HTTP/UserAgent/Base/IS.pm

     my($self, $thing, $others) = @_;
     my $has_android = grep { index( lc $_, 'android' ) != NO_IMATCH  } @{ $thing  };
     my $has_safari  = grep { index( lc $_, 'safari'  ) != NO_IMATCH  } @{ $others };
-    return $has_android && $has_safari;
+    if ( $has_android && $has_safari ) {
+        return 1;
+    }
+    if (   @{ $others } == 0
+        && @{ $thing  }  > 0
+        && $thing->[-1]
+        && index( $thing->[-1], 'AppleWebKit' ) != NO_IMATCH
+    ) {
+        # More stupidity: ua string is missing a closing paren
+        my($part, @rest) = split m{(AppleWebKit)}xms, $thing->[-1];
+        $thing->[-1] = $part;
+        @{ $others } =  map   { $self->trim( $_ ) }
+                        split m{ (\QKHTML, like Gecko\E) }xms,
+                        join  q{}, @rest;
+        return 1;
+    }
+    return;
 }
 
 sub _is_ff {

lib/Parse/HTTP/UserAgent/Base/Parsers.pm

         }
         if ( $e =~ RE_WINDOWS_OS ) {
             if ( $1 && $1 ne '64' ) {
-                $self->[UA_OS] = $e;
+                # Maxthon stupidity: multiple OS definitions
+                $self->[UA_OS] ||= $e;
                 next;
             }
         }
     my($self, $moz, $thing, $extra, @others) = @_;
     my @omap = grep { $_ } map { split RE_SC_WS_MULTI, $_ } @others;
     my($maxthon, $msie, @buf);
+
     foreach my $e ( @omap, @{$thing} ) { # $extra -> junk
-        if ( index(uc $e, 'MAXTHON') != NO_IMATCH ) { $maxthon = $e; next; }
-        if ( index(uc $e, 'MSIE'   ) != NO_IMATCH ) { $msie    = $e; next; }
+        if ( index(uc $e, 'MAXTHON') != NO_IMATCH ) {
+            $maxthon = $e;
+            next;
+        }
+        if ( index(uc $e, 'MSIE' ) != NO_IMATCH ) {
+            # Maxthon stupidity: multiple MSIE strings
+            $msie ||= $e;
+            next;
+        }
         push @buf, $e;
     }
 
 
     $self->[UA_ORIGINAL_VERSION] = $v;
     $self->[UA_ORIGINAL_NAME]    = 'Maxthon';
+    $self->[UA_PARSER]           = 'maxthon';
     return 1;
 }
 
         }
         push @buf, $e;
     }
-    $self->[UA_EXTRAS] = [ @buf ];
+
+    $self->[UA_EXTRAS] = [
+        map  { $self->trim( $_ ) }
+        grep { $_ !~ m{ \s+ compatible \z }xms }
+        @buf
+    ];
+
     $self->[UA_PARSER] = 'msie';
+
     return 1;
 }
 
 sub _parse_android {
     my($self, $moz, $thing, $extra, @others) = @_;
     (undef, @{$self}[UA_STRENGTH, UA_OS, UA_LANG, UA_DEVICE]) = @{ $thing };
+    if ( ! $extra
+        && $others[0]
+        && index( $others[0], 'AppleWebKit' ) != NO_IMATCH
+    ) {
+        $extra = [ shift @others ];
+        $self->[UA_PARSER] = 'android:paren_fixer';
+    }
     $self->[UA_TOOLKIT] = [ split RE_SLASH, $extra->[0] ] if $extra;
     my(@extras, $is_phone);
 
     $self->[UA_NAME]   = 'Android';
     $self->[UA_MOBILE] = 1;
     $self->[UA_TABLET] = $is_phone ? undef : 1;
-    $self->[UA_EXTRAS] = [ @extras ];
+    $self->[UA_EXTRAS] = [ grep { $_ } @extras ];
 
     return 1;
 }
     $self->[UA_NAME]        = $name;
     $self->[UA_VERSION_RAW] = $version || 0;
     $self->[UA_OS]          = shift @{ $thing };
+    $self->[UA_OS]          = $self->trim( $self->[UA_OS] ) if $self->[UA_OS];
     my @rest = (  @{ $thing }, @moz );
     push @rest, @{ $extra } if $extra && ref $extra eq 'ARRAY';
     push @rest, ( map { split RE_SC_WS, $_ } @others ) if @others;

lib/Parse/HTTP/UserAgent/Constants.pm

 use constant RE_DOTNET           => qr{ \A [.]NET (?: \s+ CLR \s+ )? (.+?) \z    }xms;
 use constant RE_WINDOWS_OS       => qr{ \A Win(dows|NT|[0-9]+)?           }xmsi;
 use constant RE_SLASH            => qr{ /                                 }xms;
-use constant RE_SPLIT_PARSE      => qr{ \s? [()] \s?                      }xms;
+use constant RE_SPLIT_PARSE      => qr{ \s? ([()]) \s?                    }xms;
 use constant RE_OPERA_MINI       => qr{ \A (Opera \s+ Mini) / (.+?) \z    }xms;
 use constant RE_TRIDENT          => qr{ \A (Trident) / (.+?) \z           }xmsi;
 use constant RE_EPIPHANY_GECKO   => qr{ \A (Epiphany) / (.+?) \z          }xmsi;

t/data/desktop/firefox/2x

 'version'          => '2.000000012',
 'name'             => 'Firefox',
 'toolkit'          => ['Gecko','20080129','20080129.000']
+
+[AGENT]
+
+Mozilla/5.0 (X11; U; Linux i686 (x86_64); de; rv:1.8.1.19) Gecko/20081202 Iceweasel/2.0.0.19 (Debian-2.0.0.19-0etch1)
+
+extras          => [ 'X11', 'Debian-2.0.0.19-0etch1' ],
+lang            => 'de',
+mozilla         => [ '1.8.1.19', '1.008001019' ],
+name            => 'Firefox',
+os              => 'Linux i686 x86_64 ',
+parser          => 'firefox',
+strength        => 'U',
+toolkit         => [ 'Gecko', '20081202', '20081202.000' ],
+version         => '2.000000019',
+version_raw     => '2.0.0.19',

t/data/desktop/maxthon/1x

 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows Vista / Server 2008',
-'extras'           => ['SLCC1','Media Center PC 5.0','InfoPath.2','Mozilla/4.0'],
+'extras'           => ['SV1','SLCC1','Media Center PC 5.0','InfoPath.2'],
 'version_raw'      => '7.0',
 'version'          => '7.000',
 'name'             => 'MSIE',
 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows XP',
-'extras'           => ['Mozilla/4.0'],
+'extras'           => ['SV1'],
 'version_raw'      => '7.0',
 'version'          => '7.000',
 'name'             => 'MSIE',
 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows XP',
-'extras'           => ['InfoPath.1','Mozilla/4.0'],
+'extras'           => ['SV1','InfoPath.1'],
 'version_raw'      => '7.0',
 'version'          => '7.000',
 'name'             => 'MSIE',
 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows 2000',
-'extras'           => ['Mozilla/4.0'],
+'extras'           => ['SV1'],
 'version_raw'      => '6.0',
 'version'          => '6.000',
 'name'             => 'MSIE',
 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows XP',
-'extras'           => ['SV1','MRA 4.6'],
+'extras'           => ['SV1','MRA 4.6 build 01425'],
 'version_raw'      => '6.0',
 'version'          => '6.000',
 'name'             => 'MSIE',

t/data/desktop/maxthon/2x

 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows XP',
-'extras'           => [ 'Mozilla/4.0' ],
+'extras'           => [ 'SV1' ],
 'version_raw'      => '8.0',
 'version'          => '8.000',
 'name'             => 'MSIE',
 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows Vista / Server 2008',
-'extras'           => ['SLCC1','Media Center PC 5.0','Mozilla/4.0'],
+'extras'           => ['SV1','SLCC1','Media Center PC 5.0'],
 'version_raw'      => '7.0',
 'version'          => '7.000',
 'name'             => 'MSIE',

t/data/desktop/msie/7x

 
 'parser'           => 'msie',
 'os'               => 'Windows XP',
-'extras'           => [ 'compatible', 'MSIE 6.0','SV1','InfoPath.1','Zune 2.5','WWTClient2','MS-RTC LM 8'],
+'extras'           => ['MSIE 6.0','SV1','InfoPath.1','Zune 2.5','WWTClient2','MS-RTC LM 8'],
 'version_raw'      => '7.0',
 'version'          => '7.000',
 'name'             => 'MSIE',

t/data/mobile/phone/android/dell

 [AGENT]
 
+# Missing the closing parenthesis before "AppleWebKit"
+
 Mozilla/5.0 (Linux; U; Android 1.6; en-gb; Dell Streak Build/Donut AppleWebKit/528.5+ (KHTML, like Gecko) Version/3.1.2 Mobile Safari/ 525.20.1
 
-'parser'      => 'android',
+'parser'      => 'android:paren_fixer',
 'mobile'      => 1,
 'strength'    => 'U',
 'os'          => 'Android 1.6 (Donut)',
-'extras'      => ['Safari/','525.20.1' ],
+'extras'      => ['KHTML, like Gecko', 'Safari/','525.20.1' ],
 'toolkit'     => [ 'AppleWebKit', '528.5+', '528.500' ],
 'version_raw' => '3.1.2',
 'lang'        => 'en-gb',
 
 Emacs-W3/2.1.105 URL/1.267 ((Unix?) ; TTY ; sparc-sun-solaris2.3)
 
-'parser'           => 'emacs',
-'extras'           => [ 'URL/1.267', 'Unix?', 'TTY', 'sparc-sun-solaris2.3' ],
-'version_raw'      => '2.1.105',
-'version'          => '2.001105',
-'name'             => 'Emacs-W3',
+parser           => 'emacs',
+extras           => [ 'TTY','sparc-sun-solaris2.3','URL/1.267' ],
+version_raw      => '2.1.105',
+version          => '2.001105',
+name             => 'Emacs-W3',
+os               => 'Unix?',
 
 [AGENT]