Alexander Hanel ObfStrReplacer

Created by Alexander Hanel last modified
__author__ = "Alexander Hanel"
__version__ = "1.0"
__contact__ = "alexander<dot>hanel<at>gmail<dot>com"
__date__ = "2015/06"


"""
Command line example

python obf_str_replace.py -r "[)|1|I|l|9|\+|]{10,}" -g "*as"

-r is the regex pattern.
    Please place the regex in quotes . The regex pattern  "[)|1|I|l|9|\+|]{10,}" would match the random looking
    strings as seen below.
        public var Illl1III111I11:String;
        public var lIll1III111I11:String;
        public var llll1III111I11:String;
        public var ))ll1III111I11:String;
    The output is saved to a file with the original file name with ".d" as an extension. The output for bad.as
    would be bad.as.d

-t use this option to test the regex pattern and print the found strings. Output
    Match on "s 1I111IIIlllIl1 e" at 98:116 in 1I111IIIlllIl1.as
    Match on "r Il111IIIlllIl1:O" at 157:175 in 1I111IIIlllIl1.as
    Match on " _lllIII111I11:C" at 201:217 in 1I111IIIlllIl1.as
    Match on "r 1Ill1III111I11:I" at 241:259 in 1I111IIIlllIl1.as
    Match on "1:IIll1III111I11;" at 256:274 in 1I111IIIlllIl1.as
    Match on "r lI111IIIlllIl19:u" at 292:311 in 1I111IIIlllIl1.as
    Match on "r ll111IIIlllIl1:u" at 338:356 in 1I111IIIlllIl1.as
-g is the glob file pattern. It is optional. If In the example above it will only match files that end with "as"

"""


import sys
import re
import glob
import os
import argparse

class ObfStrReplacer():
    """
    A module that can be used to de-obfuscate code by searching
    for strings that match a regular express pattern and replace
    them with more readable characters.
    """
    def __init__(self):
        self.regex_pattern = None
        self.compiled_regex = None
        self.file_glob_pattern = None
        self.test_regex = False
        self.script_name = None
        self.globbed_files = None
        self.word_list = [
                "abacus",  "iota",  "nu", "baryon", "ceres", "dean", "zipf",
                "mu", "epsilon", "lune", "fermat", "gamma", "carat", "gaudi",
                "ides", "alpha", "iris", "julia", "tare", "omicron", "pascal",
                "kappa", "aeon", "umbra", "secant", "lambda", "beta", "lemma",
                "eta", "mars", "nocebo", "occam", "chaos", "arc", "omega",
                "xenon", "pareto", "locus", "psi", "rho", "delta", "sigma",
                "pi", "simson", "tau", "gnomen", "theta", "atlas", "upsilon",
                "phi", "venus", "ogive", "surd", "xi", "zeta", "sabot", "chi",
                "kite"]
        self.match_set = set([])
        self.names = []
        self.name_mapping = {}

    def get_args(self):
        """
        gets the command line arguments.
        """
        parser = argparse.ArgumentParser(
            description='Replaces strings matched by a regular expression with more \
                        distinguishable text/strings.')
        parser.add_argument('-r', '--regex', type=str, required=True, help="regex pattern for search and replace")
        parser.add_argument('-g', '--glob', type=str, help="glob file pattern to search in")
        parser.add_argument('-t', '--test', action='store_true', help="only print found strings that match regex")
        args = parser.parse_args()
        # assign arguments
        self.regex_pattern = args.regex
        # default is all files in the current working directory
        if args.glob is None:
            self.file_glob_pattern = "*"
        else:
            self.file_glob_pattern = args.glob
        self.test_regex = args.test
        self.script_name = parser.prog

    def get_files(self):
        """
        uses glob to get paths & names of files. Removes script from the list
        """
        try:
            self.globbed_files = glob.glob(self.file_glob_pattern)
        except:
            print " * ERROR: Glob failed, aborting."
        # remove script from list
        if self.script_name in self.globbed_files:
            self.globbed_files.remove(self.script_name)
        # verify glob results
        if not self.globbed_files:
            print " * ERROR: No matches for glob file pattern, aborting."
            os._exit(1)

    def compile_regex(self):
        """
        verifies the regex pattern can be compiled
        """
        try:
            self.compiled_regex = re.compile(self.regex_pattern)
        except:
            print " * ERROR: regex pattern compiling failed, aborting."
            os._exit(1)

    def get_matches(self):
        """
        get all matches
        """
        file_data = None
        for file_name in self.globbed_files:
            with open(file_name, 'r') as f:
                file_data = f.read()
                for match in re.finditer(self.compiled_regex, file_data):
                    self.match_set.add(file_data[match.start():match.end()].rstrip())

    def print_regex_matches(self):
        """
        prints all matches
        """
        file_data = None
        for file_name in self.globbed_files:
            with open(file_name, 'r') as f:
                file_data = f.read()
                for match in re.finditer(self.compiled_regex, file_data):
                    try:
                        s = match.start() - 2
                    except:
                        s = match.start()
                    try:
                        e = match.end() +  2
                    except:
                        e = match.end()
                    print 'Match on "%s" at %d:%d in %s' % (file_data[s:e].rstrip(), s, e, file_name)

    def create_str(self):
        """
        constructs unique strings
        """
        len_set = len(self.word_list)
        temp_name_list = []
        temp_str = ""
        for count,name in enumerate(self.match_set):
            temp_str = "_" + self.word_list[count % len_set]
            if temp_str in temp_name_list:
                while True:
                    temp_str = "_" + self.word_list[count % len_set] + "_" + str(count)
                    count += 1
                    if temp_str not in temp_name_list:
                        break
            temp_name_list.append(temp_str)
        # should never happen
        self.names = temp_name_list
        if not self.names:
            print " * ERROR: no names created, aborting."
            os._exit(1)

    def string_to_name(self):
        """
        map string to regex match
        """
        if len(self.names) == len(self.match_set):
            for count, name in enumerate(self.match_set):
                self.name_mapping[name] = self.names[count]
        else:
            print " * ERROR: logic bug. length of name and matches incorrect, aborting."
            os._exit(1)

    def replace_str(self):
        """
        replace regex match with generated string
        """
        # names sorted the largest match is done first. If not sub-strings
        names_sorted = sorted(list(self.match_set), key=len)[::-1]
        for file_name in self.globbed_files:
            with open(file_name, 'r') as f:
                file_data = f.read()
                for key in names_sorted:
                    file_data = file_data.replace(key, self.name_mapping[key])
            with open(file_name + ".d", 'w') as ff:
                ff.write(file_data)

    def run(self):
        self.get_args()
        self.get_files()
        self.compile_regex()
        if self.test_regex:
            self.print_regex_matches()
            return
        self.get_matches()
        self.create_str()
        self.string_to_name()
        self.replace_str()

if __name__ == "__main__":
    xx = ObfStrReplacer()
    xx.run()

Comments (0)