Source

pyga / pyga / entities.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
# -*- coding: utf-8 -*-

from datetime import datetime
from operator import itemgetter
from urlparse import urlparse
from urllib import unquote_plus
from pyga import utils
from pyga import exceptions

__author__ = "Arun KR (kra3) <the1.arun@gmail.com>"
__license__ = "Simplified BSD"


class Campaign(object):
    '''
    A representation of Campaign

    Properties:
    _type -- See TYPE_* constants, will be mapped to "__utmz" parameter.
    creation_time --  Time of the creation of this campaign, will be mapped to "__utmz" parameter.
    response_count -- Response Count, will be mapped to "__utmz" parameter.
        Is also used to determine whether the campaign is new or repeated,
        which will be mapped to "utmcn" and "utmcr" parameters.
    id -- Campaign ID, a.k.a. "utm_id" query parameter for ga.js
           Will be mapped to "__utmz" parameter.
    source -- Source, a.k.a. "utm_source" query parameter for ga.js.
              Will be mapped to "utmcsr" key in "__utmz" parameter.
    g_click_id -- Google AdWords Click ID, a.k.a. "gclid" query parameter for ga.js.
                  Will be mapped to "utmgclid" key in "__utmz" parameter.
    d_click_id -- DoubleClick (?) Click ID. Will be mapped to "utmdclid" key in "__utmz" parameter.
    name --  Name, a.k.a. "utm_campaign" query parameter for ga.js.
             Will be mapped to "utmccn" key in "__utmz" parameter.
    medium -- Medium, a.k.a. "utm_medium" query parameter for ga.js.
              Will be mapped to "utmcmd" key in "__utmz" parameter.
    term -- Terms/Keywords, a.k.a. "utm_term" query parameter for ga.js.
            Will be mapped to "utmctr" key in "__utmz" parameter.
    content -- Ad Content Description, a.k.a. "utm_content" query parameter for ga.js.
               Will be mapped to "utmcct" key in "__utmz" parameter.

    '''

    TYPE_DIRECT = 'direct'
    TYPE_ORGANIC = 'organic'
    TYPE_REFERRAL = 'referral'

    CAMPAIGN_DELIMITER = '|'

    UTMZ_PARAM_MAP = {
        'utmcid': 'id',
        'utmcsr': 'source',
        'utmgclid': 'g_click_id',
        'utmdclid': 'd_click_id',
        'utmccn': 'name',
        'utmcmd': 'medium',
        'utmctr': 'term',
        'utmcct': 'content',
    }

    def __init__(self, typ):
        self._type = None
        self.creation_time = None
        self.response_count = 0
        self.id = None
        self.source = None
        self.g_click_id = None
        self.d_click_id = None
        self.name = None
        self.medium = None
        self.term = None
        self.content = None

        if typ:
            if typ not in ('direct', 'organic', 'referral'):
                raise ValueError('Campaign type has to be one of the Campaign::TYPE_* constant values.')

            self._type = typ
            if typ == Campaign.TYPE_DIRECT:
                self.name = '(direct)'
                self.source = '(direct)'
                self.medium = '(none)'
            elif typ == Campaign.TYPE_REFERRAL:
                self.name = '(referral)'
                self.medium = 'referral'
            elif typ == Campaign.TYPE_ORGANIC:
                self.name = '(organic)'
                self.medium = 'organic'
            else:
                self._type = None

        self.creation_time = datetime.utcnow()

    def validate(self):
        if not self.source:
            raise exceptions.ValidationError('Campaigns need to have at least the "source" attribute defined.')

    def create_from_referrer(self, url):
        obj = Campaign(Campaign.TYPE_REFERRAL)
        parse_rslt = urlparse(url)
        obj.source = parse_rslt.netloc
        obj.content = parse_rslt.path
        return obj

    def extract_from_utmz(self, utmz):
        parts = utmz.split('.', 4)

        if len(parts) != 5:
            raise ValueError('The given "__utmz" cookie value is invalid.')

        self.creation_time = utils.convert_ga_timestamp(parts[1])
        self.response_count = int(parts[3])
        params = parts[4].split(Campaign.CAMPAIGN_DELIMITER)

        for param in params:
            key, val = param.split('=')

            try:
                setattr(self, self.UTMZ_PARAM_MAP[key], unquote_plus(val))
            except KeyError:
                pass

        return self


class CustomVariable(object):
    '''
    Represent a Custom Variable

    Properties:
    index -- Is the slot, you have 5 slots
    name -- Name given to custom variable
    value -- Value for the variable
    scope -- Scope can be any one of 1, 2 or 3.

    WATCH OUT: It's a known issue that GA will not decode URL-encoded
    characters in custom variable names and values properly, so spaces
    will show up as "%20" in the interface etc. (applicable to name & value)
    http://www.google.com/support/forum/p/Google%20Analytics/thread?tid=2cdb3ec0be32e078

    '''

    SCOPE_VISITOR = 1
    SCOPE_SESSION = 2
    SCOPE_PAGE = 3

    def __init__(self, index=None, name=None, value=None, scope=3):
        self.index = index
        self.name = name
        self.value = value
        self.scope = CustomVariable.SCOPE_PAGE
        if scope:
            self.scope = scope

    def __setattr__(self, name, value):
        if name == 'scope':
            if value and value not in range(1, 4):
                raise ValueError('Custom Variable scope has to be one of the 1,2 or 3')

        if name == 'index':
            # Custom Variables are limited to five slots officially, but there seems to be a
            # trick to allow for more of them which we could investigate at a later time (see
            # http://analyticsimpact.com/2010/05/24/get-more-than-5-custom-variables-in-google-analytics/
            if value and (value < 0 or value > 5):
                raise ValueError('Custom Variable index has to be between 1 and 5.')

        object.__setattr__(self, name, value)

    def validate(self):
        '''
        According to the GA documentation, there is a limit to the combined size of
        name and value of 64 bytes after URL encoding,
        see http://code.google.com/apis/analytics/docs/tracking/gaTrackingCustomVariables.html#varTypes
        and http://xahlee.org/js/google_analytics_tracker_2010-07-01_expanded.js line 563
        This limit was increased to 128 bytes BEFORE encoding with the 2012-01 release of ga.js however,
        see http://code.google.com/apis/analytics/community/gajs_changelog.html
        '''
        if len('%s%s' % (self.name, self.value)) > 128:
            raise exceptions.ValidationError('Custom Variable combined name and value length must not be larger than 128 bytes.')


class Event(object):
    '''
    Represents an Event
    http://code.google.com/apis/analytics/docs/tracking/eventTrackerOverview.html

    Properties:
    category -- The general event category
    action -- The action for the event
    label -- An optional descriptor for the event
    value -- An optional value associated with the event. You can see your
             event values in the Overview, Categories, and Actions reports,
             where they are listed by event or aggregated across events,
             depending upon your report view.
    noninteraction -- By default, event hits will impact a visitor's bounce rate.
                      By setting this parameter to true, this event hit
                      will not be used in bounce rate calculations.
                      (default False)
    '''

    def __init__(self, category=None, action=None, label=None, value=None, noninteraction=False):
        self.category = category
        self.action = action
        self.label = label
        self.value = value
        self.noninteraction = bool(noninteraction)

        if self.noninteraction and not self.value:
            self.value = 0

    def validate(self):
        if not(self.category and self.action):
            raise exceptions.ValidationError('Events, at least need to have a category and action defined.')


class Item(object):
    '''
    Represents an Item in Transaction

    Properties:
    order_id -- Order ID, will be mapped to "utmtid" parameter
    sku -- Product Code. This is the sku code for a given product, will be mapped to "utmipc" parameter
    name -- Product Name, will be mapped to "utmipn" parameter
    variation -- Variations on an item, will be mapped to "utmiva" parameter
    price -- Unit Price. Value is set to numbers only, will be mapped to "utmipr" parameter
    quantity -- Unit Quantity, will be mapped to "utmiqt" parameter

    '''

    def __init__(self):
        self.order_id = None
        self.sku = None
        self.name = None
        self.variation = None
        self.price = None
        self.quantity = 1

    def validate(self):
        if not self.sku:
            raise exceptions.ValidationError('sku/product is a required parameter')


class Page(object):
    '''
    Contains all parameters needed for tracking a page

    Properties:
    path -- Page request URI, will be mapped to "utmp" parameter
    title -- Page title, will be mapped to "utmdt" parameter
    charset -- Charset encoding, will be mapped to "utmcs" parameter
    referrer -- Referer URL, will be mapped to "utmr" parameter
    load_time -- Page load time in milliseconds, will be encoded into "utme" parameter.

    '''
    REFERRER_INTERNAL = '0'

    def __init__(self, path):
        self.path = None
        self.title = None
        self.charset = None
        self.referrer = None
        self.load_time = None

        if path:
            self.path = path

    def __setattr__(self, name, value):
        if name == 'path':
            if value and value != '':
                if value[0] != '/':
                    raise ValueError('The page path should always start with a slash ("/").')
        elif name == 'load_time':
            if value and not isinstance(value, int):
                raise ValueError('Page load time must be specified in integer milliseconds.')

        object.__setattr__(self, name, value)


class Session(object):
    '''
    You should serialize this object and store it in the user session to keep it
    persistent between requests (similar to the "__umtb" cookie of the GA Javascript client).

    Properties:
    session_id -- A unique per-session ID, will be mapped to "utmhid" parameter
    track_count -- The amount of pageviews that were tracked within this session so far,
                   will be part of the "__utmb" cookie parameter.
                   Will get incremented automatically upon each request
    start_time -- Timestamp of the start of this new session, will be part of the "__utmb" cookie parameter

    '''
    def __init__(self):
        self.session_id = utils.get_32bit_random_num()
        self.track_count = 0
        self.start_time = datetime.utcnow()

    @staticmethod
    def generate_session_id():
        return utils.get_32bit_random_num()

    def extract_from_utmb(self, utmb):
        '''
        Will extract information for the "trackCount" and "startTime"
        properties from the given "__utmb" cookie value.
        '''
        parts = utmb.split('.')
        if len(parts) != 4:
            raise ValueError('The given "__utmb" cookie value is invalid.')

        self.track_count = int(parts[1])
        self.start_time = utils.convert_ga_timestamp(parts[3])

        return self


class SocialInteraction(object):
    '''

    Properties:
    action -- Required. A string representing the social action being tracked,
              will be mapped to "utmsa" parameter
    network -- Required. A string representing the social network being tracked,
               will be mapped to "utmsn" parameter
    target -- Optional. A string representing the URL (or resource) which receives the action.

    '''

    def __init__(self, action=None, network=None, target=None):
        self.action = action
        self.network = network
        self.target = target

    def validate(self):
        if not(self.action and self.network):
            raise exceptions.ValidationError('Social interactions need to have at least the "network" and "action" attributes defined.')


class Transaction(object):
    '''
    Represents parameters for a Transaction call

    Properties:
    order_id -- Order ID, will be mapped to "utmtid" parameter
    affiliation -- Affiliation, Will be mapped to "utmtst" parameter
    total -- Total Cost, will be mapped to "utmtto" parameter
    tax -- Tax Cost, will be mapped to "utmttx" parameter
    shipping -- Shipping Cost, values as for unit and price, will be mapped to "utmtsp" parameter
    city -- Billing City, will be mapped to "utmtci" parameter
    state -- Billing Region, will be mapped to "utmtrg" parameter
    country -- Billing Country, will be mapped to "utmtco" parameter
    items -- @entity.Items in a transaction

    '''
    def __init__(self):
        self.items = []
        self.order_id = None
        self.affiliation = None
        self.total = None
        self.tax = None
        self.shipping = None
        self.city = None
        self.state = None
        self.country = None

    def __setattr__(self, name, value):
        if name == 'order_id':
            for itm in self.items:
                itm.order_id = value
        object.__setattr__(self, name, value)

    def validate(self):
        if len(self.items) == 0:
            raise exceptions.ValidationError('Transaction need to consist of at least one item')

    def add_item(self, item):
        ''' item of type entities.Item '''
        if isinstance(item, Item):
            item.order_id = self.order_id
            self.items.append(item)


class Visitor(object):
    '''
    You should serialize this object and store it in the user database to keep it
    persistent for the same user permanently (similar to the "__umta" cookie of
    the GA Javascript client).

    Properties:
    unique_id -- Unique user ID, will be part of the "__utma" cookie parameter
    first_visit_time -- Time of the very first visit of this user, will be part of the "__utma" cookie parameter
    previous_visit_time -- Time of the previous visit of this user, will be part of the "__utma" cookie parameter
    current_visit_time -- Time of the current visit of this user, will be part of the "__utma" cookie parameter
    visit_count -- Amount of total visits by this user, will be part of the "__utma" cookie parameter
    ip_address -- IP Address of the end user, will be mapped to "utmip" parameter and "X-Forwarded-For" request header
    user_agent -- User agent string of the end user, will be mapped to "User-Agent" request header
    locale -- Locale string (country part optional) will be mapped to "utmul" parameter
    flash_version -- Visitor's Flash version, will be maped to "utmfl" parameter
    java_enabled -- Visitor's Java support, will be mapped to "utmje" parameter
    screen_colour_depth -- Visitor's screen color depth, will be mapped to "utmsc" parameter
    screen_resolution -- Visitor's screen resolution, will be mapped to "utmsr" parameter
    '''
    def __init__(self):
        now = datetime.utcnow()

        self.unique_id = None
        self.first_visit_time = now
        self.previous_visit_time = now
        self.current_visit_time = now
        self.visit_count = 1
        self.ip_address = None
        self.user_agent = None
        self.locale = None
        self.flash_version = None
        self.java_enabled = None
        self.screen_colour_depth = None
        self.screen_resolution = None

    def __setattr__(self, name, value):
        if name == 'unique_id':
            if value and value < 0 or value > 0x7fffffff:
                raise ValueError('Visitor unique ID has to be a 32-bit integer between 0 and 0x7fffffff')
        object.__setattr__(self, name, value)

    def __getattribute__(self, name):
        if name == 'unique_id':
            tmp = object.__getattribute__(self, name)
            if tmp == None:
                self.unique_id = self.generate_unique_id()
        return object.__getattribute__(self, name)

    def __getstate__(self):
        state = self.__dict__
        if state.get('user_agent') is None:
            state['unique_id'] = self.generate_unique_id()

        return state

    def extract_from_utma(self, utma):
        '''
        Will extract information for the "unique_id", "first_visit_time", "previous_visit_time",
        "current_visit_time" and "visit_count" properties from the given "__utma" cookie value.
        '''
        parts = utma.split('.')
        if len(parts) != 6:
            raise ValueError('The given "__utma" cookie value is invalid.')

        self.unique_id = int(parts[1])
        self.first_visit_time = utils.convert_ga_timestamp(parts[2])
        self.previous_visit_time = utils.convert_ga_timestamp(parts[3])
        self.current_visit_time = utils.convert_ga_timestamp(parts[4])
        self.visit_count = int(parts[5])

        return self

    def extract_from_server_meta(self, meta):
        '''
        Will extract information for the "ip_address", "user_agent" and "locale"
        properties from the given WSGI REQUEST META variable or equivalent.
        '''
        if 'REMOTE_ADDR' in meta and meta['REMOTE_ADDR']:
            ip = None
            for key in ('HTTP_X_FORWARDED_FOR', 'REMOTE_ADDR'):
                if key in meta and not ip:
                    ips = meta.get(key, '').split(',')
                    ip = ips[len(ips) - 1].strip()
                    if not utils.is_valid_ip(ip):
                        ip = ''
                    if utils.is_private_ip(ip):
                        ip = ''
            if ip:
                self.ip_address = ip

        if 'HTTP_USER_AGENT' in meta and meta['HTTP_USER_AGENT']:
            self.user_agent = meta['HTTP_USER_AGENT']

        if 'HTTP_ACCEPT_LANGUAGE' in meta and meta['HTTP_ACCEPT_LANGUAGE']:
            user_locals = []
            matched_locales = utils.validate_locale(meta['HTTP_ACCEPT_LANGUAGE'])
            if matched_locales:
                lang_lst = map((lambda x: x.replace('-', '_')), (i[1] for i in matched_locales))
                quality_lst = map((lambda x: x and x or 1), (float(i[4] and i[4] or '0') for i in matched_locales))
                lang_quality_map = map((lambda x, y: (x, y)), lang_lst, quality_lst)
                user_locals = [x[0] for x in sorted(lang_quality_map, key=itemgetter(1), reverse=True)]

            if user_locals:
                self.locale = user_locals[0]

        return self

    def generate_hash(self):
        '''Generates a hashed value from user-specific properties.'''
        tmpstr = "%s%s%s" % (self.user_agent, self.screen_resolution, self.screen_colour_depth)
        return utils.generate_hash(tmpstr)

    def generate_unique_id(self):
        '''Generates a unique user ID from the current user-specific properties.'''
        return ((utils.get_32bit_random_num() ^ self.generate_hash()) & 0x7fffffff)

    def add_session(self, session):
        '''
        Updates the "previousVisitTime", "currentVisitTime" and "visitCount"
        fields based on the given session object.
        '''
        start_time = session.start_time
        if start_time != self.current_visit_time:
            self.previous_visit_time = self.current_visit_time
            self.current_visit_time = start_time
            self.visit_count = self.visit_count + 1
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.