#!/usr/bin/env python
##WingHeader v1
###############################################################################
## File : liveUnPYC.py
## Description: Superclass of UnPyc functionality to allow the decompilation
## : of raw code objects in memory rather than pyc's on disk
## Created_On : Tue Aug 3 20:28:36 2010
## Created_By : Rich Smith
## Modified_On: Mon Dec 6 12:13:39 2010
## Modified_By: Rich Smith
## License : GPLv3 (Docs/LICENSE.txt)
##
## (c) Copyright 2010, Rich Smith all rights reserved.
###############################################################################
##This is wrapper functionality around the core decompiler UnPyc which is written
## by Dmitri Kornev and available from http://unpyc.sourceforge.net
## At the time of writing the latest version was 0.81
#Refs
#http://docs.python.org/reference/datamodel.html
try:
##Current runtime's modules
import sys ##builtin
import marshal ##c module
##Module from the projects lib - corresponding to the projects version number
import inspect
import traceback
import types
except ImportError, err:
print "[-] Problem importing required module: %s"
##From UnPyc
from Decompilers.unpyc import parse
from Decompilers.unpyc import disasm
from Decompilers.unpyc import decompile
[docs]class CoParser:
"""
An equivilent to the Parser class in the parse module where rather than
parsing marshalled code objects we parse a raw code object.
The result being the creation of a PyCode object that UnPYC can
then do it's magic on
"""
[docs] def r_tuple(self, obj):
"""
Construct a PyTuple object from an existing tuple with each member
being set as a valid Py* type as well, so e.g. a PyTuple of
PyStrings
"""
py_tuple = []
for item in obj:
##For each item in the tuple find it's type and create the
## corresponding Py* type
py_tuple.append( self.r_object(item) )
return parse.pyTuple( 0, tuple(py_tuple) )
def r_code(self, obj):
#print "[*] argcount"
argcount = parse.pyLong(0, long(obj.co_argcount), "NA")
#print "[*] nlocals"
nlocals = parse.pyLong(0, long(obj.co_nlocals), "NA")
#print "[*] stacksize"
stacksize = parse.pyLong(0, long(obj.co_stacksize), "NA")
#print "[*] flags"
flags = parse.pyLong(0, long(obj.co_flags), "NA")
#print "[*] code"
code = parse.pyString(0, obj.co_code, "NA")
#print "[*] consts"
consts = self.r_object(obj.co_consts)
#print "[*] names"
names = self.r_object(obj.co_names)
#print "[*] varnames"
varnames = self.r_object(obj.co_varnames)
#print "[*] freevars"
freevars = self.r_object(obj.co_freevars)
#print "[*] cellvars"
cellvars = self.r_object(obj.co_cellvars)
#print "[*] filename"
filename = parse.pyString(0, obj.co_filename, "NA")
#print "[*] name"
name = parse.pyString(0, obj.co_name, "NA")
#print "[*] firstlineno"
firstlineno = parse.pyLong(0, long(obj.co_firstlineno), "NA")
#print "[*] lnotab"
lnotab = parse.pyString(0, obj.co_lnotab, "NA")
return parse.pyCode(0, argcount, nlocals,
stacksize, flags, code,
consts, names,
varnames, freevars,
cellvars, filename,
name, firstlineno,
lnotab, self.verboseDisasm,
False)
def r_object(self, obj):
if type(obj) == types.StringType:
return parse.pyString(0, obj, "NA")
elif type(obj) == types.UnicodeType:
return parse.pyUnicode(0, obj, "NA")
elif type(obj) == types.NoneType:
return parse.pyNone(0)
elif type(obj) == types.IntType:
return parse.pyLong(0, long(obj), "NA")
elif type(obj) == types.FloatType:
return parse.pyFloat(0, float(obj), "NA")
elif type(obj) == types.TupleType:
return self.r_tuple(obj)
elif type(obj) == types.CodeType:
return self.r_code(obj)
#return parse.pyString(0, obj.co_code, "NA")
else:
print "UNKNOWN DATA TYPE: %s"%(type(obj))
def __init__(self, code_obj, verboseDisasm=False):
"""
Pass in an unmarshalled code object for decompilation
"""
self.verboseDisasm = verboseDisasm
#TODO - generalise, make the functions below find the code obect
#self.co = self.r_code(obj.func_code)
self.co = self.r_code(code_obj)
self.verbose = False
#TODO - generators, decorators
[docs]class liveUnPYC(object):
"""
Traverse an object either in memory or from filesystem
and decompile back to source through accessing code objects
rather than .pyc files
Relies on the UnPyc decompiler: http://unpyc.sourceforge.net/
top_level_object - the object that will be treated as parent, anything
not parented by this module will not be traversed
stops you diving down rabbit holes with
'from foo import *'
Leave as None is you want to recurse
"""
def __init__(self, pyretic, top_level_module = None, verbose = True, debug = True):
##The pyREtic instance
self.pyretic = pyretic
##The name of the module we are decompiling - stops us going into
## objects imported to our top level name space e.g. from foo import *
#TODO #__package__ ?
self.top_level_module = top_level_module
#TODO
##Source code derived from our objects
self.source_code = ""
#TODO - do proper deindent
##Current indentation level - increased / decreased by code below
self.indent = 0
self.verbose = verbose
self.debug = debug
def set_top_level_module(self, mod_name):
self.top_level_module = mod_name
[docs] def fs_decompile(self, mod_name):
"""
Take an obfuscated .pyc file and decompile by grabbing the bytecode
and unmarshaling with the obfuscating Python runtimes own marshalling code
The opcode remapping must already have taken place
If you have access to to files/filesystem and the runtime allows you
access to it's marshaller module use this, you will get the best output
"""
##Open obfusated file read in binary data
try:
mod_f = open(mod_name,"rb")
except Exception, err:
print "[-] Problem opening module '%s' : %s"%(mod_name, err)
return ""
try:
obf_bc = mod_f.read()
except Exception, err:
mod_f.close()
print "[-] Problem reading module '%s' : %s"%(mod_name, err)
return ""
##Skip magic & time stampe (first 8 bytes) & unmarshel the series of
## code objects
##IF THE MARSHALLER HAS CHANGED YOU MUST USE THAT MARSHALER
try:
co=marshal.loads(obf_bc[8:])
except Exception, err:
mod_f.close()
print "[-] Problem unmarshaling module '%s' : %s"%(mod_name, err)
return ""
#if self.debug:
# raw_input("Decompiling: %s, Any key to proceed.... "%mod_name)
self.source_code = self._decompile(co, identity = "%s-%s"%(co.co_filename, co.co_name))
return self.source_code
[docs] def mem_decompile(self, obj):
"""
Take an object & interogate it, do both decompilation of code objects
and source code reconstruction from live interactive querying
obj - Python object to interogate for decompilation back to source
"""
try:
self.source_code = self.get_py(obj)
except:
##Top level unexpected exception handler .... decompiler not perfect!
import traceback
return self.source_code
#TODO - ordering of: Imports, constants, classes, functions,
[docs] def get_py(self, obj, indent = 0):
"""
Determine the type of python object that has been passed and
as appropriate call a sub function to access the code object
and decompile that back to source
Called recursively
"""
print "[+] Object: ",obj,type(obj)
exclude_list = ["__builtins__","__class__", "__objclass__"] #__objclass__
##The source code generated from this depth
source_str = ""
##if the object passed in is not from the module we have set as top
## level then skip it - stops us recursing off into from x imports
if hasattr(obj, "__module__") and self.top_level_module:
if obj.__module__ != self.top_level_module:
# TODO detect the from foo import bar 'as' blah
# TODO from imports on functions ---- don't think this is possible
print "[-] NOT recursing into %s module"%(obj.__module__)
return ""
##Now find what type of objects we have & traverse to the code objects
##First look at top level instances - not really decompilation, more
## reconstruction from artefacts and analysis at run time
#TODO seperate import analsysis from this
source_str += self.get_instances(obj, indent)
##Find and decompile all class objects (and their members)
for name, class_obj in inspect.getmembers(obj, inspect.isclass):
if name in exclude_list:
continue
print "[+] Class %s found...."%(name)
print "from file %s"%(class_obj.__module__)
##Get documentation
doc = self.get_doc(class_obj, indent+1)
if self.top_level_module and class_obj.__module__ != self.top_level_module:
print "[-] NOT recursing into %s module"%(class_obj.__module__)
##Must ? be a from foo import bar construct ?
source_str += "from %s import %s\n"%(class_obj.__module__,
class_obj.__name__)
continue
##Get members of the class recursively
content = self.get_class(class_obj, indent + 1)
if not content.strip():
content = "\n%spass\n"%((indent+1)*"\t")
superclasses = class_obj.__bases__
if not superclasses:
source_str += "class %s:\n"%(name)
else:
source_str += "class %s("%(name)
for x in superclasses:
source_str += "%s, "%(x.__name__)
##remove final comma
source_str = source_str[:-2]
source_str += "):\n"
source_str += doc
##Get top class level attributes/instances
source_str += self.get_instances(class_obj, indent+1)
source_str += content + "\n\n"
##Find and decompile all method objects
for name, method in inspect.getmembers(obj, inspect.ismethod):
if name in exclude_list:
continue
print "[+] Method %s found...."%(name)
##Get the code from the method & its arguments
content = self.get_method(method, indent + 1)
source_str += "%sdef %s(%s):\n"%("\t"*indent, name, self.get_args(method))
##Get documentation
source_str += self.get_doc(method, indent+1)
source_str += content + "\n\n"
##Find and decompile all function objects
for name, function in inspect.getmembers(obj, inspect.isfunction):
if name in exclude_list:
continue
print "[+] Function %s found...."%(name)
##Get the code from the function
content = self.get_func(function, indent + 1)
source_str += "%sdef %s(%s):\n"%("\t"*indent, name, self.get_args(function))
##Get documentation
source_str += self.get_doc(function, indent+1)
source_str += content + "\n\n"
return source_str
[docs] def get_instances(self, obj, indent):
"""
Get all top level instances into a usable string form, do not show builtins etc
http://docs.python.org/reference/datamodel.html
"""
exclude_list = ["__builtins__", "__name__", "__file__", "__class__",
"__package__", "__doc__", "__module__"]
#TODO - how to get invocation args ?
# instances from other modules ?
# import 'from' and 'as' - must move to top of file (return as sep)
#
# decorators
# nested functions ?
# De-indent ?
# Lambdas
#globals ?
instances = ""
pad = "\t"*indent
#TODO compensate for __slots__
if not hasattr(obj, "__dict__"):
print "[-] %s has no __dict__"%(obj)
return ""
#http://mypythonnotes.wordpress.com/2008/09/04/__slots__/
for inst, val in obj.__dict__.items():
##Exclude types that we decompile elsewhere or that are things
## that don't show up in the source & reflect the underlying objects
if inst in exclude_list or\
inspect.isfunction(val) or\
inspect.ismethod(val) or\
inspect.isclass(val) or\
inspect.isbuiltin(val) or\
inspect.ismethoddescriptor(val) or\
inspect.isgetsetdescriptor(val) or\
inspect.isdatadescriptor(val) or\
inspect.ismemberdescriptor(val):
continue
##If it's a generator find out which one - only available in > 2.6
elif sys.version_info[0] == 2 and sys.version_info[1] >5 and inspect.isgenerator(val):
instances += "%s%s = %s()\n"%(pad, inst, val.__name__)
elif inspect.ismodule(val):
#MUST be a better way than this ?
module_name = str(val).split(" ")[1].replace("'","")
#
instances += "import %s\n"%(module_name)
##Things that eval true as new/old class here but false for isclass
## above are class invocations
elif self._is_new_style_class(val) :
#MUST be a better way than this ?
instance_of_name = str( type(val) ).split(" ")[1].replace(">","").replace("'","").replace("<","")
parent_names = instance_of_name.split(".")
if obj.__name__ == parent_names[0]:
instance_of_name = '.'.join(parent_names[1:])
instances += "%s%s = %s()\n"%(pad, inst, instance_of_name)
print inst,val,type(val), dir(val)
print "new",self._is_new_style_class(val)
#TODO - put the repr i.e. str(val) in the comments after ?
#raw_input("+=+=+=+= %s%s = %s() #!ARGS UNKNOWN!\n"%(pad, inst, instance_of_name))
elif self._is_old_style_class(val) or type(val) == types.InstanceType:
instance_of_name = str( str(val) ).split(" ")[0].replace(">","").replace("<","")
parent_names = instance_of_name.split(".")
if obj.__name__ == parent_names[0]:
instance_of_name = '.'.join(parent_names[1:])
instances += "%s%s = %s()\n"%(pad, inst, instance_of_name)
print inst,val,type(val), dir(val)
print "old",self._is_old_style_class(val)
#TODO - put the repr i.e. str(val) in the comments after ?
#raw_input("+=+=+=+= %s%s = %s() #!ARGS UNKNOWN!\n"%(pad, inst, instance_of_name))
else:
##Variable a=1 or whatever
print "++++++",val,type(val)
if type(val) == types.StringType:
if "\n" in val or "\r" in val:
val = '"""%s"""'%(val)
elif "'" in val:
val = '"%s"'%(val)
else :
val = "'%s'"%(val)
instances += "%s%s = %s\n"%(pad, inst, val)
return instances+"\n"
[docs] def get_doc(self, obj, indent):
"""
My get doc function - wraps the inspect modules getdoc
but adds in indentation and triple quotes so we can drop into
a code listing
"""
docstring = inspect.getdoc(obj)
if docstring:
pad = '\n%s'%(indent*"\t")
idoc = indent*"\t" + pad.join( docstring.strip().split("\n") )
return '%s"""\n%s\n%s"""\n'%(indent*"\t", idoc, indent*"\t")
else:
return ""
[docs] def get_args(self, obj):
"""
get the argument spec for a function / method
"""
print "GETTING ARGS FOR",obj,type(obj)
arg_spec = inspect.getargspec(obj)
if arg_spec[3]:
args_with_defaults = arg_spec[0][-len(arg_spec[3]):]
default_pairs = zip(args_with_defaults, arg_spec[3])
else:
default_pairs = []
arg_str = ""
if len(default_pairs) > 0:
arg_str += ', '.join(arg_spec[0][:len(default_pairs)-1])
#TODO respec to a .join logic
for def_pair in default_pairs:
arg_str += ", %s=%s"%(def_pair[0], def_pair[1])
else:
arg_str += ', '.join(arg_spec[0])
if arg_spec[1]:
print "2",arg_spec[1]
arg_str += ", *%s"%(arg_spec[1])
if arg_spec[2]:
print "3",arg_spec[2]
arg_str += ", **%s"%(arg_spec[2])
return arg_str
[docs] def get_class(self, obj, indent):
"""
Breaks a class into it's consituents: variables, functions etc
"""
#TODO get SUPERCLASSES, decorators ?
##Call back into get_py to get the components of the class object,
## classes in this sense are just a non top level container
source = self.get_py(obj, indent)
print "CLASS",obj.__name__
#raw_input()
return source
[docs] def get_method(self, obj, indent):
"""
Get access to the function object in a method
"""
m_identity = "%s.%s"%(obj.im_class, obj.__name__)
print "METHOD",m_identity
source = self.get_func(obj.im_func, indent, m_identity)
if self.verbose:
print "[+]Method code:%s"%(source)
return source
[docs] def get_func(self, obj, indent, identity = ""):
"""
Decompiles an instantiated Python function object from
"""
if not identity:
identity = obj.__name__
print "FUNCTION:",identity
source_code = self._decompile(obj.func_code, identity)
if source_code:
source_code = source_code.strip()
source_code = source_code.strip("\n")
source_code = source_code.strip("\r")
pad = '\n%s'%(indent*"\t")
return indent*"\t" + pad.join( source_code.split("\n") )
else:
return ""
[docs] def get_generator(self, obj, indent):
"""
Decompiles a generator object
"""
source_code = self._decompile(obj.gi_code)
if source_code:
source_code = source_code.strip()
source_code = source_code.strip("\n")
source_code = source_code.strip("\r")
pad = '\n%s'%(indent*"\t")
return indent*"\t" + pad.join( source_code.split("\n") )
else:
return ""
def _decompile(self, code_obj, identity, verbose = False):
"""
Do the in memory decompilation
"""
print "[=] Decompiling %s"%(identity)
try:
#parser = parse.Parser(f_code, raw=True)
print "[+] Parsing code object of %s"%(code_obj)
parser = CoParser(code_obj, verboseDisasm=verbose)
print "[+] Disassembling.... "
optimizingDisassembler = disasm.Disassembler(parser.co,
optimizeJumps=True)
print "[+] Decompiling.... "
decompiler = decompile.Decompiler(optimizingDisassembler)
#TODO - try and get code that was decompiled before error
except (parse.ParseErrorException,
parse.IOErrorException,
parse.BadFirstObjectException), err:
print err
return ""
except:
print '>>> Unexpected exception:'
traceback.print_exc()
return ""
sc = decompiler.decompile()
return sc
def _is_new_style_class(self, cls):
"""
Check to see if this is a new style class
"""
return hasattr(cls, '__class__') \
and ('__dict__' in dir(cls) \
or hasattr(cls, '__slots__'))
def _is_old_style_class(self, cls):
"""
Check to see if this is a old style class
"""
return hasattr(cls, '__class__') \
and type(cls) == types.InstanceType