Commits

Burak Gürsoy  committed e23cc2f

Handle embedded parens in a better way.
Set the ua parser name only if it wasn't set inside the actual parser.
Fixes for Maxthon and Android stupidities.
Fix for emacs parser.

  • Participants
  • Parent commits c83f157

Comments (0)

Files changed (11)

 Revision history for Perl extension Parse::HTTP::UserAgent
 
+0.35 Mon May 14 03:53:18 2012
+   => New .NET string support.
+   => Handle embedded parens in a better way.
+   => Set the ua parser name only if it wasn't set inside the actual parser.
+   => Fixes for Maxthon and Android stupidities.
+   => Fix for emacs parser.
+
 0.34 Sun Apr  8 00:08:41 2012
    => "normalize" option.
    => Fix RT#76351 reported by DAMOG.

File lib/Parse/HTTP/UserAgent.pm

 use warnings;
 use vars qw( $VERSION );
 
-$VERSION = '0.34';
+$VERSION = '0.35';
 
 use base qw(
     Parse::HTTP::UserAgent::Base::IS
     my $self = shift;
     $self->[IS_MAXTHON] = index(uc $self->[UA_STRING], 'MAXTHON') != NO_IMATCH;
     my $ua = $self->[UA_STRING];
-    my($moz, $thing, $extra, @others) = split RE_SPLIT_PARSE, $ua;
+
+    my @parts;
+    my $i     = 0;
+    my $depth = 0;
+    foreach my $token ( split RE_SPLIT_PARSE, $ua ) {
+        if ( $token eq '(' ) {
+            $i++ if ++$depth == 1;
+            next;
+        }
+        if ( $token eq ')' ) {
+            $i++ if --$depth == 0;
+            next;
+        }
+        push @{ $parts[$i] ||= [] }, $token;
+    }
+
+    # Hopefully the above code was successful and now we can set the actual
+    # tokens to use inside parsers.
+    my($moz)    = join ' ', @{ shift(@parts) || []  };
+    my($thing)  = join ' ', @{ shift(@parts) || []  };
+    my($extra)  = join ' ', @{ shift(@parts) || []  };
+    my(@others) = map { @{ $_ } } @parts;
+
     $thing = $thing ? [ split RE_SC_WS, $thing ] : [];
     $extra = [ split RE_WHITESPACE, $extra ] if $extra;
+
     $self->_debug_pre_parse( $moz, $thing, $extra, @others ) if DEBUG;
     return $moz, $thing, $extra, @others;
 }
         my $method = '_parse_' . $pname;
         my $rvx    = $self->$method( @{ $rv } );
         if ( $rvx ) {
-            $self->[UA_PARSER] = $pname;
+            $self->[UA_PARSER] ||= $pname;
             return $rvx;
         }
     }

File lib/Parse/HTTP/UserAgent/Base/IS.pm

     my($self, $thing, $others) = @_;
     my $has_android = grep { index( lc $_, 'android' ) != NO_IMATCH  } @{ $thing  };
     my $has_safari  = grep { index( lc $_, 'safari'  ) != NO_IMATCH  } @{ $others };
-    return $has_android && $has_safari;
+    if ( $has_android && $has_safari ) {
+        return 1;
+    }
+    if (   @{ $others } == 0
+        && @{ $thing  }  > 0
+        && $thing->[-1]
+        && index( $thing->[-1], 'AppleWebKit' ) != NO_IMATCH
+    ) {
+        # More stupidity: ua string is missing a closing paren
+        my($part, @rest) = split m{(AppleWebKit)}xms, $thing->[-1];
+        $thing->[-1] = $part;
+        @{ $others } =  map   { $self->trim( $_ ) }
+                        split m{ (\QKHTML, like Gecko\E) }xms,
+                        join  q{}, @rest;
+        return 1;
+    }
+    return;
 }
 
 sub _is_ff {

File lib/Parse/HTTP/UserAgent/Base/Parsers.pm

         }
         if ( $e =~ RE_WINDOWS_OS ) {
             if ( $1 && $1 ne '64' ) {
-                $self->[UA_OS] = $e;
+                # Maxthon stupidity: multiple OS definitions
+                $self->[UA_OS] ||= $e;
                 next;
             }
         }
     my($self, $moz, $thing, $extra, @others) = @_;
     my @omap = grep { $_ } map { split RE_SC_WS_MULTI, $_ } @others;
     my($maxthon, $msie, @buf);
+
     foreach my $e ( @omap, @{$thing} ) { # $extra -> junk
-        if ( index(uc $e, 'MAXTHON') != NO_IMATCH ) { $maxthon = $e; next; }
-        if ( index(uc $e, 'MSIE'   ) != NO_IMATCH ) { $msie    = $e; next; }
+        if ( index(uc $e, 'MAXTHON') != NO_IMATCH ) {
+            $maxthon = $e;
+            next;
+        }
+        if ( index(uc $e, 'MSIE' ) != NO_IMATCH ) {
+            # Maxthon stupidity: multiple MSIE strings
+            $msie ||= $e;
+            next;
+        }
         push @buf, $e;
     }
 
 
     $self->[UA_ORIGINAL_VERSION] = $v;
     $self->[UA_ORIGINAL_NAME]    = 'Maxthon';
+    $self->[UA_PARSER]           = 'maxthon';
     return 1;
 }
 
         }
         push @buf, $e;
     }
-    $self->[UA_EXTRAS] = [ @buf ];
+
+    $self->[UA_EXTRAS] = [
+        map  { $self->trim( $_ ) }
+        grep { $_ !~ m{ \s+ compatible \z }xms }
+        @buf
+    ];
+
     $self->[UA_PARSER] = 'msie';
+
     return 1;
 }
 
 sub _parse_android {
     my($self, $moz, $thing, $extra, @others) = @_;
     (undef, @{$self}[UA_STRENGTH, UA_OS, UA_LANG, UA_DEVICE]) = @{ $thing };
+    if ( ! $extra
+        && $others[0]
+        && index( $others[0], 'AppleWebKit' ) != NO_IMATCH
+    ) {
+        $extra = [ shift @others ];
+        $self->[UA_PARSER] = 'android:paren_fixer';
+    }
     $self->[UA_TOOLKIT] = [ split RE_SLASH, $extra->[0] ] if $extra;
     my(@extras, $is_phone);
 
     $self->[UA_NAME]   = 'Android';
     $self->[UA_MOBILE] = 1;
     $self->[UA_TABLET] = $is_phone ? undef : 1;
-    $self->[UA_EXTRAS] = [ @extras ];
+    $self->[UA_EXTRAS] = [ grep { $_ } @extras ];
 
     return 1;
 }
     $self->[UA_NAME]        = $name;
     $self->[UA_VERSION_RAW] = $version || 0;
     $self->[UA_OS]          = shift @{ $thing };
+    $self->[UA_OS]          = $self->trim( $self->[UA_OS] ) if $self->[UA_OS];
     my @rest = (  @{ $thing }, @moz );
     push @rest, @{ $extra } if $extra && ref $extra eq 'ARRAY';
     push @rest, ( map { split RE_SC_WS, $_ } @others ) if @others;

File lib/Parse/HTTP/UserAgent/Constants.pm

 use constant RE_DOTNET           => qr{ \A [.]NET (?: \s+ CLR \s+ )? (.+?) \z    }xms;
 use constant RE_WINDOWS_OS       => qr{ \A Win(dows|NT|[0-9]+)?           }xmsi;
 use constant RE_SLASH            => qr{ /                                 }xms;
-use constant RE_SPLIT_PARSE      => qr{ \s? [()] \s?                      }xms;
+use constant RE_SPLIT_PARSE      => qr{ \s? ([()]) \s?                    }xms;
 use constant RE_OPERA_MINI       => qr{ \A (Opera \s+ Mini) / (.+?) \z    }xms;
 use constant RE_TRIDENT          => qr{ \A (Trident) / (.+?) \z           }xmsi;
 use constant RE_EPIPHANY_GECKO   => qr{ \A (Epiphany) / (.+?) \z          }xmsi;

File t/data/desktop/firefox/2x

 'version'          => '2.000000012',
 'name'             => 'Firefox',
 'toolkit'          => ['Gecko','20080129','20080129.000']
+
+[AGENT]
+
+Mozilla/5.0 (X11; U; Linux i686 (x86_64); de; rv:1.8.1.19) Gecko/20081202 Iceweasel/2.0.0.19 (Debian-2.0.0.19-0etch1)
+
+extras          => [ 'X11', 'Debian-2.0.0.19-0etch1' ],
+lang            => 'de',
+mozilla         => [ '1.8.1.19', '1.008001019' ],
+name            => 'Firefox',
+os              => 'Linux i686 x86_64 ',
+parser          => 'firefox',
+strength        => 'U',
+toolkit         => [ 'Gecko', '20081202', '20081202.000' ],
+version         => '2.000000019',
+version_raw     => '2.0.0.19',

File t/data/desktop/maxthon/1x

 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows Vista / Server 2008',
-'extras'           => ['SLCC1','Media Center PC 5.0','InfoPath.2','Mozilla/4.0'],
+'extras'           => ['SV1','SLCC1','Media Center PC 5.0','InfoPath.2'],
 'version_raw'      => '7.0',
 'version'          => '7.000',
 'name'             => 'MSIE',
 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows XP',
-'extras'           => ['Mozilla/4.0'],
+'extras'           => ['SV1'],
 'version_raw'      => '7.0',
 'version'          => '7.000',
 'name'             => 'MSIE',
 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows XP',
-'extras'           => ['InfoPath.1','Mozilla/4.0'],
+'extras'           => ['SV1','InfoPath.1'],
 'version_raw'      => '7.0',
 'version'          => '7.000',
 'name'             => 'MSIE',
 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows 2000',
-'extras'           => ['Mozilla/4.0'],
+'extras'           => ['SV1'],
 'version_raw'      => '6.0',
 'version'          => '6.000',
 'name'             => 'MSIE',
 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows XP',
-'extras'           => ['SV1','MRA 4.6'],
+'extras'           => ['SV1','MRA 4.6 build 01425'],
 'version_raw'      => '6.0',
 'version'          => '6.000',
 'name'             => 'MSIE',

File t/data/desktop/maxthon/2x

 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows XP',
-'extras'           => [ 'Mozilla/4.0' ],
+'extras'           => [ 'SV1' ],
 'version_raw'      => '8.0',
 'version'          => '8.000',
 'name'             => 'MSIE',
 'original_name'    => 'Maxthon',
 'parser'           => 'maxthon',
 'os'               => 'Windows Vista / Server 2008',
-'extras'           => ['SLCC1','Media Center PC 5.0','Mozilla/4.0'],
+'extras'           => ['SV1','SLCC1','Media Center PC 5.0'],
 'version_raw'      => '7.0',
 'version'          => '7.000',
 'name'             => 'MSIE',

File t/data/desktop/msie/7x

 
 'parser'           => 'msie',
 'os'               => 'Windows XP',
-'extras'           => [ 'compatible', 'MSIE 6.0','SV1','InfoPath.1','Zune 2.5','WWTClient2','MS-RTC LM 8'],
+'extras'           => ['MSIE 6.0','SV1','InfoPath.1','Zune 2.5','WWTClient2','MS-RTC LM 8'],
 'version_raw'      => '7.0',
 'version'          => '7.000',
 'name'             => 'MSIE',

File t/data/mobile/phone/android/dell

 [AGENT]
 
+# Missing the closing parenthesis before "AppleWebKit"
+
 Mozilla/5.0 (Linux; U; Android 1.6; en-gb; Dell Streak Build/Donut AppleWebKit/528.5+ (KHTML, like Gecko) Version/3.1.2 Mobile Safari/ 525.20.1
 
-'parser'      => 'android',
+'parser'      => 'android:paren_fixer',
 'mobile'      => 1,
 'strength'    => 'U',
 'os'          => 'Android 1.6 (Donut)',
-'extras'      => ['Safari/','525.20.1' ],
+'extras'      => ['KHTML, like Gecko', 'Safari/','525.20.1' ],
 'toolkit'     => [ 'AppleWebKit', '528.5+', '528.500' ],
 'version_raw' => '3.1.2',
 'lang'        => 'en-gb',

File t/data/various/1

 
 Emacs-W3/2.1.105 URL/1.267 ((Unix?) ; TTY ; sparc-sun-solaris2.3)
 
-'parser'           => 'emacs',
-'extras'           => [ 'URL/1.267', 'Unix?', 'TTY', 'sparc-sun-solaris2.3' ],
-'version_raw'      => '2.1.105',
-'version'          => '2.001105',
-'name'             => 'Emacs-W3',
+parser           => 'emacs',
+extras           => [ 'TTY','sparc-sun-solaris2.3','URL/1.267' ],
+version_raw      => '2.1.105',
+version          => '2.001105',
+name             => 'Emacs-W3',
+os               => 'Unix?',
 
 [AGENT]