Source

hbwebui / hbwebui / queries.py

Full commit
from django.db import connections

class QueryBase(object):
    def __init__(self):
        self.map = {}
        self.list = []
        self.cursor = None
    
    def part_init(self, part):
        self.map[part] = []
        self.map[part].append(set()) # [0]: set of subpart names, to filter duplicates, trigger insertion
        self.map[part].append('') # [1]: query fragment text
        self.map[part].append([]) # [2]: query params
        self.list.append(part)
    
    def part_append(self, part, subpart, sep, text, params):
        if subpart in self.map[part][0]: # filter duplicate subparts
            return False
        else:
            if len(self.map[part][0]):
                self.map[part][1] += sep + text
            else:
                self.map[part][1] += text
            self.map[part][2].extend(params)
            self.map[part][0].add(subpart)
            return True
    
    def part_append2(self, part, text):
        self.map[part][1] += text
    
    def part_init2(self, part, text):
        self.part_init(part)
        self.part_append2(part, text)
        
    def where_init(self):
        self.part_init2('where_clause', 'WHERE ')
    
    def order_by_init(self):
        self.part_init2('order_by_clause', 'ORDER BY ')
    
    def with_init(self):
        self.part_init2('with_clause', 'WITH ')
        self.part_init2('with_tables', ', ')
    
    def limit_init(self):
        self.part_init2('limit_clause', 'LIMIT ')
    
    def offset_init(self):
        self.part_init2('offset_clause', 'OFFSET ')
    
    def where_append(self, subpart, text, params):
        return self.part_append('where_clause', subpart, ' AND ', text, params)
    
    def order_by_append(self, subpart, text, desc=False):
        return self.part_append('order_by_clause', subpart, ', ', text + (' DESC' if desc else ''), [])
    
    def with_append(self, subpart, query, table, params):
        b = self.part_append('with_clause', subpart, ', ', query, params)
        if b:
            self.part_append('with_tables', subpart, ', ', table, [])
            return True
        else:
            return False
    
    def limit(self, limit):
        return self.part_append('limit_clause', 'limit_clause', '', '%s', [limit])
    
    def offset(self, offset):
        return self.part_append('offset_clause', 'offset_clause', '', '%s', [offset])
    
    def build(self):
        query = ''
        params = []
        format_params = {}
        
        for part in self.list:
            if len(self.map[part][0]): # got subparts
                format_params[part] = self.map[part][1]
                params.extend(self.map[part][2])
            else:
                format_params[part] = ''
        
        query = self.tpl % format_params
        return [query, params]
    
    def execute(self):
        self.cursor = connections[self.db].cursor()
        query, params = self.build()
        
        self.cursor.execute(query, params)
        return self.cursor


class Stats_Binaries(QueryBase):
    db = 'hbstats'
    tpl = \
        """SELECT
                md5, sum(agg_main.n_count)::bigint as n_count,
                count(DISTINCT ident_id) AS n_ident, count(DISTINCT source_ip) AS n_source_ip, count(DISTINCT target_port) AS n_target_port,
                max(agg_main.ts_last) as ts_last, min(agg_main.ts_first) as ts_first
            FROM
                agg_main
            JOIN
                dim_ident ON agg_main.ident_id = dim_ident.id
            JOIN
                dim_binary ON agg_main.binary_id = dim_binary.id
            %(where_clause)s
            GROUP BY md5
            %(order_by_clause)s"""
    
    def __init__(self):
        super(Stats_Binaries, self).__init__()
        self.where_init()
        self.order_by_init()
    
    def filter_ident(self, idents):
        self.where_append('filter_ident', 'ident_id IN (SELECT id from dim_ident WHERE ident IN %s)', [tuple(idents)])
    
    def filter_source_ip(self, source_ips):
        self.where_append('filter_source_ip', 'source_ip IN %s', [tuple(source_ips)])
    
    def order_n_count(self, desc=False):
        self.order_by_append('order_n_count', 'n_count', desc)


class Stats_Binaries_TimeRange(QueryBase):
    db = 'hbstats'
    tpl = \
        """%(with_clause)s
        
        SELECT
            md5, sum(t.n_count)::bigint as n_count,
            count(DISTINCT ident_id) AS n_ident, count(DISTINCT source_ip) AS n_source_ip, count(DISTINCT target_port) AS n_target_port,
            max(t.ts_last) AS ts_last, min(t.ts_first) AS ts_first
        FROM
            (SELECT
                agg_id, sum(n_count) as n_count, max(ts_min) as ts_last, min(ts_min) as ts_first
                FROM ts_main_min
                WHERE agg_id IN (SELECT agg_id FROM agg_main %(where_clause)s)
                GROUP BY agg_id
            ) as t  %(with_tables)s
        JOIN
            agg_main ON t.agg_id = agg_main.agg_id
        JOIN
            dim_ident ON agg_main.ident_id = dim_ident.id
        JOIN
            dim_binary ON agg_main.binary_id = dim_binary.id
        GROUP BY md5
        %(order_by_clause)s"""
    
    def __init__(self):
        super(Stats_Binaries_TimeRange, self).__init__()
        self.with_init()
        self.where_init()
        self.order_by_init()
        
    def filter_time_range_recent(self, hours):
        b = self.where_append('filter_time_range', 'ts_min <= tr_end AND ts_min > tr_begin', [begin, end])
        if b:
            self.with_append('filter_time_range',
                "tr AS SELECT max(ts_min) AS tr_end, max(ts_min) - interval '%s hour' AS tr_begin from ts_main_min)",
                'tr',
                [hours]
            )
    
    def filter_time_range(self, begin, end):
        self.where_append('filter_time_range', 'ts_min >= %s AND ts_min <= %s', [begin, end])
    
    def filter_ident(self, idents):
        self.where_append('filter_ident', 'ident_id IN (SELECT id from dim_ident WHERE ident IN %s)', [tuple(idents)])
    
    def filter_source_ip(self, source_ips):
        self.where_append('filter_source_ip', 'source_ip IN %s', [tuple(source_ips)])
    
    def order_n_count(self, desc=False):
        self.order_by_append('order_n_count', 'n_count', desc)


class Stats_Binaries_Summary(QueryBase):
    db = 'hbstats'
    tpl = \
        """SELECT
                count(DISTINCT binary_id) as n_count,
                count(DISTINCT ident_id) AS n_ident, count(DISTINCT source_ip) AS n_source_ip, count(DISTINCT target_port) AS n_target_port,
                max(agg_main.ts_last) as ts_last, min(agg_main.ts_first) as ts_first,
                sum(n_count)::bigint as n_total
            FROM
                agg_main
            %(where_clause)s"""
    
    def __init__(self):
        super(Stats_Binaries_Summary, self).__init__()
        self.where_init()
    
    def filter_ident(self, idents):
        self.where_append('filter_ident', 'ident_id IN (SELECT id from dim_ident WHERE ident IN %s)', [tuple(idents)])
    
    def filter_source_ip(self, source_ips):
        self.where_append('filter_source_ip', 'source_ip IN %s', [tuple(source_ips)])
        


class Stats_Source_IPs(QueryBase):
    db = 'hbstats'
    tpl = \
        """SELECT
                source_ip, cc, asn,
                sum(agg_main.n_count)::bigint as n_count,
                count(DISTINCT ident_id) AS n_ident, count(DISTINCT binary_id) AS n_binary, count(DISTINCT target_port) AS n_target_port,
                max(agg_main.ts_last) as ts_last, min(agg_main.ts_first) as ts_first
            FROM
                agg_main
            JOIN
                dim_ident ON agg_main.ident_id = dim_ident.id
            LEFT JOIN
                att_asn_shadowserver ON agg_main.source_ip = att_asn_shadowserver.ip
            %(where_clause)s
            GROUP BY source_ip, cc, asn
            %(order_by_clause)s"""
    
    def __init__(self):
        super(Stats_Source_IPs, self).__init__()
        self.where_init()
        self.order_by_init()
    
    def filter_ident(self, idents):
        self.where_append('filter_ident', 'ident_id IN (SELECT id from dim_ident WHERE ident IN %s)', [tuple(idents)])
        
    def order_n_count(self, desc=False):
        self.order_by_append('order_n_count', 'n_count', desc)


class Stats_Source_IPs_Summary(QueryBase):
    db = 'hbstats'
    tpl = \
        """SELECT
                count(DISTINCT source_ip) as n_count,
                count(DISTINCT cc) as n_cc, count(DISTINCT asn) as n_asn,
                count(DISTINCT ident_id) AS n_ident, count(DISTINCT binary_id) AS n_binary, count(DISTINCT target_port) AS n_target_port,
                max(agg_main.ts_last) as ts_last, min(agg_main.ts_first) as ts_first,
                sum(n_count)::bigint as n_total
            FROM
                agg_main
            LEFT JOIN
                att_asn_shadowserver ON agg_main.source_ip = att_asn_shadowserver.ip
            %(where_clause)s"""
    
    def __init__(self):
        super(Stats_Source_IPs_Summary, self).__init__()
        self.where_init()
    
    def filter_ident(self, idents):
        self.where_append('filter_ident', 'ident_id IN (SELECT id from dim_ident WHERE ident IN %s)', [tuple(idents)])


class Stats_ASNs(QueryBase):
    db = 'hbstats'
    tpl = \
        """SELECT
                as_name, asn, cc,
                sum(agg_main.n_count)::bigint as n_count,
                count(DISTINCT ident_id) AS n_ident, count(DISTINCT binary_id) AS n_binary, count(DISTINCT source_ip) AS n_source_ip, count(DISTINCT target_port) AS n_target_port,
                max(agg_main.ts_last) as ts_last, min(agg_main.ts_first) as ts_first
            FROM
                agg_main
            JOIN
                dim_ident ON agg_main.ident_id = dim_ident.id
            LEFT JOIN
                att_asn_shadowserver ON agg_main.source_ip = att_asn_shadowserver.ip
            %(where_clause)s
            GROUP BY as_name, asn, cc
            %(order_by_clause)s"""
    
    def __init__(self):
        super(Stats_ASNs, self).__init__()
        self.where_init()
        self.order_by_init()
    
    def filter_ident(self, idents):
        self.where_append('filter_ident', 'ident_id IN (SELECT id from dim_ident WHERE ident IN %s)', [tuple(idents)])
        
    def order_n_count(self, desc=False):
        self.order_by_append('order_n_count', 'n_count', desc)


class Stats_ASNs_Summary(QueryBase):
    db = 'hbstats'
    tpl = \
        """SELECT
                count(DISTINCT asn) as n_count,
                count(DISTINCT cc) as n_cc, count(DISTINCT source_ip) as n_source_ip,
                count(DISTINCT ident_id) AS n_ident, count(DISTINCT binary_id) AS n_binary, count(DISTINCT target_port) AS n_target_port,
                max(agg_main.ts_last) as ts_last, min(agg_main.ts_first) as ts_first,
                sum(n_count)::bigint as n_total
            FROM
                agg_main
            LEFT JOIN
                att_asn_shadowserver ON agg_main.source_ip = att_asn_shadowserver.ip
            %(where_clause)s"""
    
    def __init__(self):
        super(Stats_ASNs_Summary, self).__init__()
        self.where_init()
    
    def filter_ident(self, idents):
        self.where_append('filter_ident', 'ident_id IN (SELECT id from dim_ident WHERE ident IN %s)', [tuple(idents)])


class Stats_Countries(QueryBase):
    db = 'hbstats'
    tpl = \
        """SELECT
                cc,
                sum(agg_main.n_count)::bigint as n_count,
                count(DISTINCT ident_id) AS n_ident, count(DISTINCT binary_id) AS n_binary, count(DISTINCT asn) AS n_asn, count(DISTINCT source_ip) AS n_source_ip, count(DISTINCT target_port) AS n_target_port,
                max(agg_main.ts_last) as ts_last, min(agg_main.ts_first) as ts_first
            FROM
                agg_main
            JOIN
                dim_ident ON agg_main.ident_id = dim_ident.id
            LEFT JOIN
                att_asn_shadowserver ON agg_main.source_ip = att_asn_shadowserver.ip
            %(where_clause)s
            GROUP BY cc
            %(order_by_clause)s"""
    
    def __init__(self):
        super(Stats_Countries, self).__init__()
        self.where_init()
        self.order_by_init()
    
    def filter_ident(self, idents):
        self.where_append('filter_ident', 'ident_id IN (SELECT id from dim_ident WHERE ident IN %s)', [tuple(idents)])
        
    def order_n_count(self, desc=False):
        self.order_by_append('order_n_count', 'n_count', desc)


class Stats_Countries_Summary(QueryBase):
    db = 'hbstats'
    tpl = \
        """SELECT
                count(DISTINCT cc) as n_count,
                count(DISTINCT asn) as n_asn, count(DISTINCT source_ip) as n_source_ip,
                count(DISTINCT ident_id) AS n_ident, count(DISTINCT binary_id) AS n_binary, count(DISTINCT target_port) AS n_target_port,
                max(agg_main.ts_last) as ts_last, min(agg_main.ts_first) as ts_first,
                sum(n_count)::bigint as n_total
            FROM
                agg_main
            LEFT JOIN
                att_asn_shadowserver ON agg_main.source_ip = att_asn_shadowserver.ip
            %(where_clause)s"""
    
    def __init__(self):
        super(Stats_Countries_Summary, self).__init__()
        self.where_init()
    
    def filter_ident(self, idents):
        self.where_append('filter_ident', 'ident_id IN (SELECT id from dim_ident WHERE ident IN %s)', [tuple(idents)])