""" reScan - v0.05 2008-06-06 Philippe Lagadec to find IP addresses and other interesting patterns in cleartext in a file. This simple tool gives a lot of false positives: its only purpose is to save time when looking for patterns, not to provide accurate detection. ;-) For more info and updates: http://www.decalage.info/rescan usage: reScan """ # LICENSE: CeCILL v2 (GPL-compatible) # see http://www.cecill.info/licences/Licence_CeCILL_V2-en.html # CHANGELOG: # - 2007-07-11 v0.01 PL: - 1st version # - 2007-07-30 v0.02 PL: - added list of patterns # - 2007-07-31 v0.03 PL: - added patterns # - added hexadecimal dump # - 2007-08-09 v0.04 PL: - improved some regexs, added Petite detection # - 2008-06-06 v0.05 PL: - escape non-printable characters with '\xNN' when # displaying matches # - optional custom pattern list in reScan_custom.py # - optional call to magic.py to guess filetype # TODO: # + improve patterns to avoid some false positives: maybe use pefile or magic.py ? # + improve regex list with http://regexlib.com (add domain names, e-mail addresses, ...) # - extract list of common strings found in EXE files # - add headers from other filetypes (Office, ...) # - add regex for e-mail addresses, URLS, ... # - HTML report with color highlighting # - GUI ? import sys, re, os, os.path # try to import magic.py - see http://www.jsnp.net/code/magic.py try: import magic MAGIC = True except: MAGIC = False try: f = file(sys.argv[1], 'rb') except: sys.exit(__doc__) data = f.read() f.close() # list of regular expressions for patterns FIND_REGEX = { # NOTE: '(?i)' makes a regex case-insensitive "IP addresses": r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", "EXE MZ headers": r"MZ|ZM", "EXE PE headers": r"PE", "EXE PE DOS message": r"(?i)This program cannot be run in DOS mode", #TODO: quicker way to have a list of extensions (bat, cmd, vbs, js, ...) ? ".EXE/.COM/.VBS/.JS/.BAT/.CMD/.DLL filename": r"(?i)\.EXE|\.COM|\.VBS|\.JS|\.VBE|\.JSE|\.BAT|\.CMD|\.DLL", "EXE: UPX header": r"(?i)UPX", "EXE: .text/.data/.rdata section": r"(?i)\.text|\.data|\.rdata", "EXE: packed with Petite": r"(?i)\.petite", "EXE: interesting Win32 function names": r"(?i)WriteFile|IsDebuggerPresent|RegSetValue|CreateRemoteThread", "EXE: interesting WinSock function names": r"(?i)WS2_32\.dll|WSASocket|WSASend|WSARecv", "EXE: possibly compiled with Microsoft Visual C++": r"(?i)Microsoft Visual C\+\+", "Interesting registry keys": r"(?i)CurrentVersion\\Run|UserInit", "Interesting file names": r"(?i)\\drivers\\etc\\hosts|cmd\.exe|\\Start Menu\\Programs\\Startup", "Interesting keywords": r"(?i)password|administrator|smtp|pop|http|ftp|ssh|icq|backdoor|vmware", "NOP instructions (possible shellcode)": r"\x90{4,}", # this regex matches 4 NOPs or more "Possible OLE2 header (D0CF)": r"\xD0\xCF\x11\xE0", "VBA macros": r"(?i)VBA", } # try to import reScan_custom.py to add custom FIND_REGEX (optional): try: import reScan_custom for pattern in reScan_custom.FIND_REGEX: FIND_REGEX[pattern] = reScan_custom.FIND_REGEX[pattern] except: pass #------------------------------------------------------------------------------ # HEXDUMP from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/142812 FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)]) def hexdump(src, length=8): N=0; result='' while src: s,src = src[:length],src[length:] hexa = ' '.join(["%02X"%ord(x) for x in s]) s = s.translate(FILTER) result += "%04X %-*s %s\n" % (N, length*3, hexa, s) N+=length return result def hexdump2(src, length=8): result=[] for i in xrange(0, len(src), length): s = src[i:i+length] hexa = ' '.join(["%02X"%ord(x) for x in s]) printable = s.translate(FILTER) result.append("%04X %-*s %s\n" % (i, length*3, hexa, printable)) return ''.join(result) # my improved hexdump, to add a start index: def hexdump3(src, length=8, startindex=0): """ Returns a hexadecimal dump of a binary string. length: number of bytes per row. startindex: index of 1st byte. """ result=[] for i in xrange(0, len(src), length): s = src[i:i+length] hexa = ' '.join(["%02X"%ord(x) for x in s]) printable = s.translate(FILTER) result.append("%04X %-*s %s\n" % (i+startindex, length*3, hexa, printable)) return ''.join(result) #------------------------------------------------------------------------------ if MAGIC: print "Filetype according to magic: %s\n" % magic.whatis(data) for item in FIND_REGEX: r = re.compile(FIND_REGEX[item]) matches = [] for m in r.finditer(data): matches.append(m) if len(matches)>0: print "-"*79 print "%s:" % item for m in matches: print "at %08X: %s" % (m.start(), repr(m.group())) # 5 lines of hexadecimal dump around the pattern: 2 lines = 32 bytes start = max(m.start()-32, 0) & 0xFFFFFFF0 end = min(m.end()+32+15, len(data)) & 0xFFFFFFF0 length = end-start #print start, end, length print hexdump3(data[start:end], length=16, startindex=start) print "" ## if item == "EXE MZ headers" and MAGIC: ## # Check if it's really a EXE header ## print "Magic: %s\n" % magic.whatis(data[m.start():])