+__author__ = "Alexander Hanel"
+__contact__ = "alexander<dot>hanel<at>gmail<dot>com"
+python obf_str_replace.py -r "[)|1|I|l|9|\+|]{10,}" -g "*as"
+-r is the regex pattern.
+ Please place the regex in quotes . The regex pattern "[)|1|I|l|9|\+|]{10,}" would match the random looking
+ public var Illl1III111I11:String;
+ public var lIll1III111I11:String;
+ public var llll1III111I11:String;
+ public var ))ll1III111I11:String;
+-t use this option to test the regex pattern and print the found strings. Output
+ Match on "s 1I111IIIlllIl1 e" at 98:116 in 1I111IIIlllIl1.as
+ Match on "r Il111IIIlllIl1:O" at 157:175 in 1I111IIIlllIl1.as
+ Match on " _lllIII111I11:C" at 201:217 in 1I111IIIlllIl1.as
+ Match on "r 1Ill1III111I11:I" at 241:259 in 1I111IIIlllIl1.as
+ Match on "1:IIll1III111I11;" at 256:274 in 1I111IIIlllIl1.as
+ Match on "r lI111IIIlllIl19:u" at 292:311 in 1I111IIIlllIl1.as
+ Match on "r ll111IIIlllIl1:u" at 338:356 in 1I111IIIlllIl1.as
+-g is the glob file pattern. It is optional. If In the example above it will oly match files that end with "as"
+ A module that can be used to de-obfuscate code by searching
+ for strings that match a regular express pattern and replace
+ them with more readable characters.
+ self.regex_pattern = None
+ self.compiled_regex = None
+ self.file_glob_pattern = None
+ self.test_regex = False
+ self.script_name = None
+ self.globbed_files = None
+ "abacus", "iota", "nu", "baryon", "ceres", "dean", "zipf",
+ "mu", "epsilon", "lune", "fermat", "gamma", "carat", "gaudi",
+ "ides", "alpha", "iris", "julia", "tare", "omicron", "pascal",
+ "kappa", "aeon", "umbra", "secant", "lambda", "beta", "lemma",
+ "eta", "mars", "nocebo", "occam", "chaos", "arc", "omega",
+ "xenon", "pareto", "locus", "psi", "rho", "delta", "sigma",
+ "pi", "simson", "tau", "gnomen", "theta", "atlas", "upsilon",
+ "phi", "venus", "ogive", "surd", "xi", "zeta", "sabot", "chi",
+ self.match_set = set([])
+ gets the command line arguments.
+ parser = argparse.ArgumentParser(
+ description='Replaces strings matched by a regular expression with more \
+ distinguishable text/strings.')
+ parser.add_argument('-r', '--regex', type=str, required=True, help="regex pattern for search and replace")
+ parser.add_argument('-g', '--glob', type=str, help="glob file pattern to search in")
+ parser.add_argument('-t', '--test', action='store_true', help="only print found strings that match regex")
+ args = parser.parse_args()
+ self.regex_pattern = args.regex
+ # default is all files in the current working directory
+ self.file_glob_pattern = "*"
+ self.file_glob_pattern = args.glob
+ self.test_regex = args.test
+ self.script_name = parser.prog
+ uses glob to get paths & names of files. Removes script from the list
+ self.globbed_files = glob.glob(self.file_glob_pattern)
+ print " * ERROR: Glob failed, aborting."
+ # remove script from list
+ if self.script_name in self.globbed_files:
+ self.globbed_files.remove(self.script_name)
+ if not self.globbed_files:
+ print " * ERROR: No matches for glob file pattern, aborting."
+ def compile_regex(self):
+ verifies the regex pattern can be compiled
+ self.compiled_regex = re.compile(self.regex_pattern)
+ print " * ERROR: regex pattern compiling failed, aborting."
+ for file_name in self.globbed_files:
+ with open(file_name, 'r') as f:
+ for match in re.finditer(self.compiled_regex, file_data):
+ self.match_set.add(file_data[match.start():match.end()].rstrip())
+ def print_regex_matches(self):
+ for file_name in self.globbed_files:
+ with open(file_name, 'r') as f:
+ for match in re.finditer(self.compiled_regex, file_data):
+ print 'Match on "%s" at %d:%d in %s' % (file_data[s:e].rstrip(), s, e, file_name)
+ constructs unique strings
+ len_set = len(self.word_list)
+ for count,name in enumerate(self.match_set):
+ temp_str = "_" + self.word_list[count % len_set]
+ if temp_str in temp_name_list:
+ temp_str = "_" + self.word_list[count % len_set] + "_" + str(count)
+ if temp_str not in temp_name_list:
+ temp_name_list.append(temp_str)
+ self.names = temp_name_list
+ print " * ERROR: no names created, aborting."
+ def string_to_name(self):
+ map string to regex match
+ if len(self.names) == len(self.match_set):
+ for count, name in enumerate(self.match_set):
+ self.name_mapping[name] = self.names[count]
+ print " * ERROR: logic bug. length of name and matches incorrect, aborting."
+ replace regex match with generated string
+ # names sorted the largest match is done first. If not sub-strings
+ names_sorted = sorted(list(self.match_set), key=len)[::-1]
+ for file_name in self.globbed_files:
+ with open(file_name, 'r') as f:
+ for key in names_sorted:
+ file_data = file_data.replace(key, self.name_mapping[key])
+ with open(file_name + ".d", 'w') as ff:
+ self.print_regex_matches()
+if __name__ == "__main__":