miasm/core/objc.py
"""
C helper for Miasm:
* raw C to Miasm expression
* Miasm expression to raw C
* Miasm expression to C type
"""
from builtins import zip
from builtins import int as int_types
import warnings
from pycparser import c_parser, c_ast
from functools import total_ordering
from miasm.core.utils import cmp_elts
from miasm.expression.expression_reduce import ExprReducer
from miasm.expression.expression import ExprInt, ExprId, ExprOp, ExprMem
from miasm.arch.x86.arch import is_op_segm
from miasm.core.ctypesmngr import CTypeUnion, CTypeStruct, CTypeId, CTypePtr,\
CTypeArray, CTypeOp, CTypeSizeof, CTypeEnum, CTypeFunc, CTypeEllipsis
PADDING_TYPE_NAME = "___padding___"
def missing_definition(objtype):
warnings.warn("Null size type: Missing definition? %r" % objtype)
"""
Display C type
source: "The C Programming Language - 2nd Edition - Ritchie Kernighan.pdf"
p. 124
"""
def objc_to_str(objc, result=None):
if result is None:
result = ""
while True:
if isinstance(objc, ObjCArray):
result += "[%d]" % objc.elems
objc = objc.objtype
elif isinstance(objc, ObjCPtr):
if not result and isinstance(objc.objtype, ObjCFunc):
result = objc.objtype.name
if isinstance(objc.objtype, (ObjCPtr, ObjCDecl, ObjCStruct, ObjCUnion)):
result = "*%s" % result
else:
result = "(*%s)" % result
objc = objc.objtype
elif isinstance(objc, (ObjCDecl, ObjCStruct, ObjCUnion)):
if result:
result = "%s %s" % (objc, result)
else:
result = str(objc)
break
elif isinstance(objc, ObjCFunc):
args_str = []
for name, arg in objc.args:
args_str.append(objc_to_str(arg, name))
args = ", ".join(args_str)
result += "(%s)" % args
objc = objc.type_ret
elif isinstance(objc, ObjCInt):
return "int"
elif isinstance(objc, ObjCEllipsis):
return "..."
else:
raise TypeError("Unknown c type")
return result
@total_ordering
class ObjC(object):
"""Generic ObjC"""
def __init__(self, align, size):
self._align = align
self._size = size
@property
def align(self):
"""Alignment (in bytes) of the C object"""
return self._align
@property
def size(self):
"""Size (in bytes) of the C object"""
return self._size
def cmp_base(self, other):
assert self.__class__ in OBJC_PRIO
assert other.__class__ in OBJC_PRIO
if OBJC_PRIO[self.__class__] != OBJC_PRIO[other.__class__]:
return cmp_elts(
OBJC_PRIO[self.__class__],
OBJC_PRIO[other.__class__]
)
if self.align != other.align:
return cmp_elts(self.align, other.align)
return cmp_elts(self.size, other.size)
def __hash__(self):
return hash((self.__class__, self._align, self._size))
def __str__(self):
return objc_to_str(self)
def __eq__(self, other):
return self.cmp_base(other) == 0
def __ne__(self, other):
# required Python 2.7.14
return not self == other
def __lt__(self, other):
return self.cmp_base(other) < 0
@total_ordering
class ObjCDecl(ObjC):
"""C Declaration identified"""
def __init__(self, name, align, size):
super(ObjCDecl, self).__init__(align, size)
self._name = name
name = property(lambda self: self._name)
def __hash__(self):
return hash((super(ObjCDecl, self).__hash__(), self._name))
def __repr__(self):
return '<%s %s>' % (self.__class__.__name__, self.name)
def __str__(self):
return str(self.name)
def __eq__(self, other):
ret = self.cmp_base(other)
if ret:
return False
return self.name == other.name
def __lt__(self, other):
ret = self.cmp_base(other)
if ret:
if ret < 0:
return True
return False
return self.name < other.name
class ObjCInt(ObjC):
"""C integer"""
def __init__(self):
super(ObjCInt, self).__init__(None, 0)
def __str__(self):
return 'int'
@total_ordering
class ObjCPtr(ObjC):
"""C Pointer"""
def __init__(self, objtype, void_p_align, void_p_size):
"""Init ObjCPtr
@objtype: pointer target ObjC
@void_p_align: pointer alignment (in bytes)
@void_p_size: pointer size (in bytes)
"""
super(ObjCPtr, self).__init__(void_p_align, void_p_size)
self._lock = False
self.objtype = objtype
if objtype is None:
self._lock = False
def get_objtype(self):
assert self._lock is True
return self._objtype
def set_objtype(self, objtype):
assert self._lock is False
self._lock = True
self._objtype = objtype
objtype = property(get_objtype, set_objtype)
def __hash__(self):
# Don't try to hash on an unlocked Ptr (still mutable)
assert self._lock
return hash((super(ObjCPtr, self).__hash__(), hash(self._objtype)))
def __repr__(self):
return '<%s %r>' % (
self.__class__.__name__,
self.objtype.__class__
)
def __eq__(self, other):
ret = self.cmp_base(other)
if ret:
return False
return self.objtype == other.objtype
def __lt__(self, other):
ret = self.cmp_base(other)
if ret:
if ret < 0:
return True
return False
return self.objtype < other.objtype
@total_ordering
class ObjCArray(ObjC):
"""C array (test[XX])"""
def __init__(self, objtype, elems):
"""Init ObjCArray
@objtype: pointer target ObjC
@elems: number of elements in the array
"""
super(ObjCArray, self).__init__(objtype.align, elems * objtype.size)
self._elems = elems
self._objtype = objtype
objtype = property(lambda self: self._objtype)
elems = property(lambda self: self._elems)
def __hash__(self):
return hash((super(ObjCArray, self).__hash__(), self._elems, hash(self._objtype)))
def __repr__(self):
return '<%r[%d]>' % (self.objtype, self.elems)
def __eq__(self, other):
ret = self.cmp_base(other)
if ret:
return False
if self.objtype != other.objtype:
return False
return self.elems == other.elems
def __lt__(self, other):
ret = self.cmp_base(other)
if ret > 0:
return False
if self.objtype > other.objtype:
return False
return self.elems < other.elems
@total_ordering
class ObjCStruct(ObjC):
"""C object for structures"""
def __init__(self, name, align, size, fields):
super(ObjCStruct, self).__init__(align, size)
self._name = name
self._fields = tuple(fields)
name = property(lambda self: self._name)
fields = property(lambda self: self._fields)
def __hash__(self):
return hash((super(ObjCStruct, self).__hash__(), self._name))
def __repr__(self):
out = []
out.append("Struct %s: (align: %d)" % (self.name, self.align))
out.append(" off sz name")
for name, objtype, offset, size in self.fields:
out.append(" 0x%-3x %-3d %-10s %r" %
(offset, size, name, objtype.__class__.__name__))
return '\n'.join(out)
def __str__(self):
return 'struct %s' % (self.name)
def __eq__(self, other):
ret = self.cmp_base(other)
if ret:
return False
return self.name == other.name
def __lt__(self, other):
ret = self.cmp_base(other)
if ret:
if ret < 0:
return True
return False
return self.name < other.name
@total_ordering
class ObjCUnion(ObjC):
"""C object for unions"""
def __init__(self, name, align, size, fields):
super(ObjCUnion, self).__init__(align, size)
self._name = name
self._fields = tuple(fields)
name = property(lambda self: self._name)
fields = property(lambda self: self._fields)
def __hash__(self):
return hash((super(ObjCUnion, self).__hash__(), self._name))
def __repr__(self):
out = []
out.append("Union %s: (align: %d)" % (self.name, self.align))
out.append(" off sz name")
for name, objtype, offset, size in self.fields:
out.append(" 0x%-3x %-3d %-10s %r" %
(offset, size, name, objtype))
return '\n'.join(out)
def __str__(self):
return 'union %s' % (self.name)
def __eq__(self, other):
ret = self.cmp_base(other)
if ret:
return False
return self.name == other.name
def __lt__(self, other):
ret = self.cmp_base(other)
if ret:
if ret < 0:
return True
return False
return self.name < other.name
class ObjCEllipsis(ObjC):
"""C integer"""
def __init__(self):
super(ObjCEllipsis, self).__init__(None, None)
align = property(lambda self: self._align)
size = property(lambda self: self._size)
@total_ordering
class ObjCFunc(ObjC):
"""C object for Functions"""
def __init__(self, name, abi, type_ret, args, void_p_align, void_p_size):
super(ObjCFunc, self).__init__(void_p_align, void_p_size)
self._name = name
self._abi = abi
self._type_ret = type_ret
self._args = tuple(args)
args = property(lambda self: self._args)
type_ret = property(lambda self: self._type_ret)
abi = property(lambda self: self._abi)
name = property(lambda self: self._name)
def __hash__(self):
return hash((super(ObjCFunc, self).__hash__(), hash(self._args), self._name))
def __repr__(self):
return "<%s %s>" % (
self.__class__.__name__,
self.name
)
def __str__(self):
out = []
out.append("Function (%s) %s: (align: %d)" % (self.abi, self.name, self.align))
out.append(" ret: %s" % (str(self.type_ret)))
out.append(" Args:")
for name, arg in self.args:
out.append(" %s %s" % (name, arg))
return '\n'.join(out)
def __eq__(self, other):
ret = self.cmp_base(other)
if ret:
return False
return self.name == other.name
def __lt__(self, other):
ret = self.cmp_base(other)
if ret:
if ret < 0:
return True
return False
return self.name < other.name
OBJC_PRIO = {
ObjC: 0,
ObjCDecl:1,
ObjCInt:2,
ObjCPtr:3,
ObjCArray:4,
ObjCStruct:5,
ObjCUnion:6,
ObjCEllipsis:7,
ObjCFunc:8,
}
def access_simplifier(expr):
"""Expression visitor to simplify a C access represented in Miasm
@expr: Miasm expression representing the C access
Example:
IN: (In c: ['*(&((&((*(ptr_Test)).a))[0]))'])
[ExprOp('deref', ExprOp('addr', ExprOp('[]', ExprOp('addr',
ExprOp('field', ExprOp('deref', ExprId('ptr_Test', 64)),
ExprId('a', 64))), ExprInt(0x0, 64))))]
OUT: (In c: ['(ptr_Test)->a'])
[ExprOp('->', ExprId('ptr_Test', 64), ExprId('a', 64))]
"""
if (expr.is_op("addr") and
expr.args[0].is_op("[]") and
expr.args[0].args[1] == ExprInt(0, 64)):
return expr.args[0].args[0]
elif (expr.is_op("[]") and
expr.args[0].is_op("addr") and
expr.args[1] == ExprInt(0, 64)):
return expr.args[0].args[0]
elif (expr.is_op("addr") and
expr.args[0].is_op("deref")):
return expr.args[0].args[0]
elif (expr.is_op("deref") and
expr.args[0].is_op("addr")):
return expr.args[0].args[0]
elif (expr.is_op("field") and
expr.args[0].is_op("deref")):
return ExprOp("->", expr.args[0].args[0], expr.args[1])
return expr
def access_str(expr):
"""Return the C string of a C access represented in Miasm
@expr: Miasm expression representing the C access
In:
ExprOp('->', ExprId('ptr_Test', 64), ExprId('a', 64))
OUT:
'(ptr_Test)->a'
"""
if isinstance(expr, ExprId):
out = str(expr)
elif isinstance(expr, ExprInt):
out = str(int(expr))
elif expr.is_op("addr"):
out = "&(%s)" % access_str(expr.args[0])
elif expr.is_op("deref"):
out = "*(%s)" % access_str(expr.args[0])
elif expr.is_op("field"):
out = "(%s).%s" % (access_str(expr.args[0]), access_str(expr.args[1]))
elif expr.is_op("->"):
out = "(%s)->%s" % (access_str(expr.args[0]), access_str(expr.args[1]))
elif expr.is_op("[]"):
out = "(%s)[%s]" % (access_str(expr.args[0]), access_str(expr.args[1]))
else:
raise RuntimeError("unknown op")
return out
class CGen(object):
"""Generic object to represent a C expression"""
default_size = 64
def __init__(self, ctype):
self._ctype = ctype
@property
def ctype(self):
"""Type (ObjC instance) of the current object"""
return self._ctype
def __hash__(self):
return hash(self.__class__)
def __eq__(self, other):
return (self.__class__ == other.__class__ and
self._ctype == other.ctype)
def __ne__(self, other):
return not self.__eq__(other)
def to_c(self):
"""Generate corresponding C"""
raise NotImplementedError("Virtual")
def to_expr(self):
"""Generate Miasm expression representing the C access"""
raise NotImplementedError("Virtual")
class CGenInt(CGen):
"""Int C object"""
def __init__(self, integer):
assert isinstance(integer, int_types)
self._integer = integer
super(CGenInt, self).__init__(ObjCInt())
@property
def integer(self):
"""Value of the object"""
return self._integer
def __hash__(self):
return hash((super(CGenInt, self).__hash__(), self._integer))
def __eq__(self, other):
return (super(CGenInt, self).__eq__(other) and
self._integer == other.integer)
def __ne__(self, other):
return not self.__eq__(other)
def to_c(self):
"""Generate corresponding C"""
return "0x%X" % self.integer
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__,
self.integer)
def to_expr(self):
"""Generate Miasm expression representing the C access"""
return ExprInt(self.integer, self.default_size)
class CGenId(CGen):
"""ID of a C object"""
def __init__(self, ctype, name):
self._name = name
assert isinstance(name, str)
super(CGenId, self).__init__(ctype)
@property
def name(self):
"""Name of the Id"""
return self._name
def __hash__(self):
return hash((super(CGenId, self).__hash__(), self._name))
def __eq__(self, other):
return (super(CGenId, self).__eq__(other) and
self._name == other.name)
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__,
self.name)
def to_c(self):
"""Generate corresponding C"""
return "%s" % (self.name)
def to_expr(self):
"""Generate Miasm expression representing the C access"""
return ExprId(self.name, self.default_size)
class CGenField(CGen):
"""
Field of a C struct/union
IN:
- struct (not ptr struct)
- field name
OUT:
- input type of the field => output type
- X[] => X[]
- X => X*
"""
def __init__(self, struct, field, fieldtype, void_p_align, void_p_size):
self._struct = struct
self._field = field
assert isinstance(field, str)
if isinstance(fieldtype, ObjCArray):
ctype = fieldtype
else:
ctype = ObjCPtr(fieldtype, void_p_align, void_p_size)
super(CGenField, self).__init__(ctype)
@property
def struct(self):
"""Structure containing the field"""
return self._struct
@property
def field(self):
"""Field name"""
return self._field
def __hash__(self):
return hash((super(CGenField, self).__hash__(), self._struct, self._field))
def __eq__(self, other):
return (super(CGenField, self).__eq__(other) and
self._struct == other.struct and
self._field == other.field)
def to_c(self):
"""Generate corresponding C"""
if isinstance(self.ctype, ObjCArray):
return "(%s).%s" % (self.struct.to_c(), self.field)
elif isinstance(self.ctype, ObjCPtr):
return "&((%s).%s)" % (self.struct.to_c(), self.field)
else:
raise RuntimeError("Strange case")
def __repr__(self):
return "<%s %s %s>" % (self.__class__.__name__,
self.struct,
self.field)
def to_expr(self):
"""Generate Miasm expression representing the C access"""
if isinstance(self.ctype, ObjCArray):
return ExprOp("field",
self.struct.to_expr(),
ExprId(self.field, self.default_size))
elif isinstance(self.ctype, ObjCPtr):
return ExprOp("addr",
ExprOp("field",
self.struct.to_expr(),
ExprId(self.field, self.default_size)))
else:
raise RuntimeError("Strange case")
class CGenArray(CGen):
"""
C Array
This object does *not* deref the source, it only do object casting.
IN:
- obj
OUT:
- X* => X*
- ..[][] => ..[]
- X[] => X*
"""
def __init__(self, base, elems, void_p_align, void_p_size):
ctype = base.ctype
if isinstance(ctype, ObjCPtr):
pass
elif isinstance(ctype, ObjCArray) and isinstance(ctype.objtype, ObjCArray):
ctype = ctype.objtype
elif isinstance(ctype, ObjCArray):
ctype = ObjCPtr(ctype.objtype, void_p_align, void_p_size)
else:
raise TypeError("Strange case")
self._base = base
self._elems = elems
super(CGenArray, self).__init__(ctype)
@property
def base(self):
"""Base object supporting the array"""
return self._base
@property
def elems(self):
"""Number of elements in the array"""
return self._elems
def __hash__(self):
return hash((super(CGenArray, self).__hash__(), self._base, self._elems))
def __eq__(self, other):
return (super(CGenField, self).__eq__(other) and
self._base == other.base and
self._elems == other.elems)
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__,
self.base)
def to_c(self):
"""Generate corresponding C"""
if isinstance(self.ctype, ObjCPtr):
out_str = "&((%s)[%d])" % (self.base.to_c(), self.elems)
elif isinstance(self.ctype, ObjCArray):
out_str = "(%s)[%d]" % (self.base.to_c(), self.elems)
else:
raise RuntimeError("Strange case")
return out_str
def to_expr(self):
"""Generate Miasm expression representing the C access"""
if isinstance(self.ctype, ObjCPtr):
return ExprOp("addr",
ExprOp("[]",
self.base.to_expr(),
ExprInt(self.elems, self.default_size)))
elif isinstance(self.ctype, ObjCArray):
return ExprOp("[]",
self.base.to_expr(),
ExprInt(self.elems, self.default_size))
else:
raise RuntimeError("Strange case")
class CGenDeref(CGen):
"""
C dereference
IN:
- ptr
OUT:
- X* => X
"""
def __init__(self, ptr):
assert isinstance(ptr.ctype, ObjCPtr)
self._ptr = ptr
super(CGenDeref, self).__init__(ptr.ctype.objtype)
@property
def ptr(self):
"""Pointer object"""
return self._ptr
def __hash__(self):
return hash((super(CGenDeref, self).__hash__(), self._ptr))
def __eq__(self, other):
return (super(CGenField, self).__eq__(other) and
self._ptr == other.ptr)
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__,
self.ptr)
def to_c(self):
"""Generate corresponding C"""
if not isinstance(self.ptr.ctype, ObjCPtr):
raise RuntimeError()
return "*(%s)" % (self.ptr.to_c())
def to_expr(self):
"""Generate Miasm expression representing the C access"""
if not isinstance(self.ptr.ctype, ObjCPtr):
raise RuntimeError()
return ExprOp("deref", self.ptr.to_expr())
def ast_get_c_access_expr(ast, expr_types, lvl=0):
"""Transform C ast object into a C Miasm expression
@ast: parsed pycparser.c_ast object
@expr_types: a dictionary linking ID names to their types
@lvl: actual recursion level
Example:
IN:
StructRef: ->
ID: ptr_Test
ID: a
OUT:
ExprOp('->', ExprId('ptr_Test', 64), ExprId('a', 64))
"""
if isinstance(ast, c_ast.Constant):
obj = ExprInt(int(ast.value), 64)
elif isinstance(ast, c_ast.StructRef):
name, field = ast.name, ast.field.name
name = ast_get_c_access_expr(name, expr_types)
if ast.type == "->":
s_name = name
s_field = ExprId(field, 64)
obj = ExprOp('->', s_name, s_field)
elif ast.type == ".":
s_name = name
s_field = ExprId(field, 64)
obj = ExprOp("field", s_name, s_field)
else:
raise RuntimeError("Unknown struct access")
elif isinstance(ast, c_ast.UnaryOp) and ast.op == "&":
tmp = ast_get_c_access_expr(ast.expr, expr_types, lvl + 1)
obj = ExprOp("addr", tmp)
elif isinstance(ast, c_ast.ArrayRef):
tmp = ast_get_c_access_expr(ast.name, expr_types, lvl + 1)
index = ast_get_c_access_expr(ast.subscript, expr_types, lvl + 1)
obj = ExprOp("[]", tmp, index)
elif isinstance(ast, c_ast.ID):
assert ast.name in expr_types
obj = ExprId(ast.name, 64)
elif isinstance(ast, c_ast.UnaryOp) and ast.op == "*":
tmp = ast_get_c_access_expr(ast.expr, expr_types, lvl + 1)
obj = ExprOp("deref", tmp)
else:
raise NotImplementedError("Unknown type")
return obj
def parse_access(c_access):
"""Parse C access
@c_access: C access string
"""
main = '''
int main() {
%s;
}
''' % c_access
parser = c_parser.CParser()
node = parser.parse(main, filename='<stdin>')
access = node.ext[-1].body.block_items[0]
return access
class ExprToAccessC(ExprReducer):
"""
Generate the C access object(s) for a given native Miasm expression
Example:
IN:
@32[ptr_Test]
OUT:
[<CGenDeref <CGenArray <CGenField <CGenDeref <CGenId ptr_Test>> a>>>]
An expression may be represented by multiple accessor (due to unions).
"""
def __init__(self, expr_types, types_mngr, enforce_strict_access=True):
"""Init GenCAccess
@expr_types: a dictionary linking ID names to their types
@types_mngr: types manager
@enforce_strict_access: If false, generate access even on expression
pointing to a middle of an object. If true, raise exception if such a
pointer is encountered
"""
self.expr_types = expr_types
self.types_mngr = types_mngr
self.enforce_strict_access = enforce_strict_access
def updt_expr_types(self, expr_types):
"""Update expr_types
@expr_types: Dictionary associating name to type
"""
self.expr_types = expr_types
def cgen_access(self, cgenobj, base_type, offset, deref, lvl=0):
"""Return the access(es) which lead to the element at @offset of an
object of type @base_type
In case of no @deref, stops recursion as soon as we reached the base of
an object.
In other cases, we need to go down to the final dereferenced object
@cgenobj: current object access
@base_type: type of main object
@offset: offset (in bytes) of the target sub object
@deref: get type for a pointer or a deref
@lvl: actual recursion level
IN:
- base_type: struct Toto{
int a
int b
}
- base_name: var
- 4
OUT:
- CGenField(var, b)
IN:
- base_type: int a
- 0
OUT:
- CGenAddr(a)
IN:
- base_type: X = int* a
- 0
OUT:
- CGenAddr(X)
IN:
- X = int* a
- 8
OUT:
- ASSERT
IN:
- struct toto{
int a
int b[10]
}
- 8
OUT:
- CGenArray(CGenField(toto, b), 1)
"""
if base_type.size == 0:
missing_definition(base_type)
return set()
void_type = self.types_mngr.void_ptr
if isinstance(base_type, ObjCStruct):
if not 0 <= offset < base_type.size:
return set()
if offset == 0 and not deref:
# In this case, return the struct*
return set([cgenobj])
for fieldname, subtype, field_offset, size in base_type.fields:
if not field_offset <= offset < field_offset + size:
continue
fieldptr = CGenField(CGenDeref(cgenobj), fieldname, subtype,
void_type.align, void_type.size)
new_type = self.cgen_access(fieldptr, subtype,
offset - field_offset,
deref, lvl + 1)
break
else:
return set()
elif isinstance(base_type, ObjCArray):
if base_type.objtype.size == 0:
missing_definition(base_type.objtype)
return set()
element_num = offset // (base_type.objtype.size)
field_offset = offset % base_type.objtype.size
if element_num >= base_type.elems:
return set()
if offset == 0 and not deref:
# In this case, return the array
return set([cgenobj])
curobj = CGenArray(cgenobj, element_num,
void_type.align,
void_type.size)
if field_offset == 0:
# We point to the start of the sub object,
# return it directly
return set([curobj])
new_type = self.cgen_access(curobj, base_type.objtype,
field_offset, deref, lvl + 1)
elif isinstance(base_type, ObjCDecl):
if self.enforce_strict_access and offset % base_type.size != 0:
return set()
elem_num = offset // base_type.size
nobj = CGenArray(cgenobj, elem_num,
void_type.align, void_type.size)
new_type = set([nobj])
elif isinstance(base_type, ObjCUnion):
if offset == 0 and not deref:
# In this case, return the struct*
return set([cgenobj])
out = set()
for fieldname, objtype, field_offset, size in base_type.fields:
if not field_offset <= offset < field_offset + size:
continue
field = CGenField(CGenDeref(cgenobj), fieldname, objtype,
void_type.align, void_type.size)
out.update(self.cgen_access(field, objtype,
offset - field_offset,
deref, lvl + 1))
new_type = out
elif isinstance(base_type, ObjCPtr):
elem_num = offset // base_type.size
if self.enforce_strict_access and offset % base_type.size != 0:
return set()
nobj = CGenArray(cgenobj, elem_num,
void_type.align, void_type.size)
new_type = set([nobj])
else:
raise NotImplementedError("deref type %r" % base_type)
return new_type
def reduce_known_expr(self, node, ctxt, **kwargs):
"""Generate access for known expr"""
if node.expr in ctxt:
objcs = ctxt[node.expr]
return set(CGenId(objc, str(node.expr)) for objc in objcs)
return None
def reduce_int(self, node, **kwargs):
"""Generate access for ExprInt"""
if not isinstance(node.expr, ExprInt):
return None
return set([CGenInt(int(node.expr))])
def get_solo_type(self, node):
"""Return the type of the @node if it has only one possible type,
different from not None. In other cases, return None.
"""
if node.info is None or len(node.info) != 1:
return None
return type(list(node.info)[0].ctype)
def reduce_op(self, node, lvl=0, **kwargs):
"""Generate access for ExprOp"""
if not (node.expr.is_op("+") or is_op_segm(node.expr)) \
or len(node.args) != 2:
return None
type_arg1 = self.get_solo_type(node.args[1])
if type_arg1 != ObjCInt:
return None
arg0, arg1 = node.args
if arg0.info is None:
return None
void_type = self.types_mngr.void_ptr
out = set()
if not arg1.expr.is_int():
return None
ptr_offset = int(arg1.expr)
for info in arg0.info:
if isinstance(info.ctype, ObjCArray):
field_type = info.ctype
elif isinstance(info.ctype, ObjCPtr):
field_type = info.ctype.objtype
else:
continue
target_type = info.ctype.objtype
# Array-like: int* ptr; ptr[1] = X
out.update(self.cgen_access(info, field_type, ptr_offset, False, lvl))
return out
def reduce_mem(self, node, lvl=0, **kwargs):
"""Generate access for ExprMem:
* @NN[ptr<elem>] -> elem (type)
* @64[ptr<ptr<elem>>] -> ptr<elem>
* @32[ptr<struct>] -> struct.00
"""
if not isinstance(node.expr, ExprMem):
return None
if node.ptr.info is None:
return None
assert isinstance(node.ptr.info, set)
void_type = self.types_mngr.void_ptr
found = set()
for subcgenobj in node.ptr.info:
if isinstance(subcgenobj.ctype, ObjCArray):
nobj = CGenArray(subcgenobj, 0,
void_type.align,
void_type.size)
target = nobj.ctype.objtype
for finalcgenobj in self.cgen_access(nobj, target, 0, True, lvl):
assert isinstance(finalcgenobj.ctype, ObjCPtr)
if self.enforce_strict_access and finalcgenobj.ctype.objtype.size != node.expr.size // 8:
continue
found.add(CGenDeref(finalcgenobj))
elif isinstance(subcgenobj.ctype, ObjCPtr):
target = subcgenobj.ctype.objtype
# target : type(elem)
if isinstance(target, (ObjCStruct, ObjCUnion)):
for finalcgenobj in self.cgen_access(subcgenobj, target, 0, True, lvl):
target = finalcgenobj.ctype.objtype
if self.enforce_strict_access and target.size != node.expr.size // 8:
continue
found.add(CGenDeref(finalcgenobj))
elif isinstance(target, ObjCArray):
if self.enforce_strict_access and subcgenobj.ctype.size != node.expr.size // 8:
continue
found.update(self.cgen_access(CGenDeref(subcgenobj), target,
0, False, lvl))
else:
if self.enforce_strict_access and target.size != node.expr.size // 8:
continue
found.add(CGenDeref(subcgenobj))
if not found:
return None
return found
reduction_rules = [reduce_known_expr,
reduce_int,
reduce_op,
reduce_mem,
]
def get_accesses(self, expr, expr_context=None):
"""Generate C access(es) for the native Miasm expression @expr
@expr: native Miasm expression
@expr_context: a dictionary linking known expressions to their
types. An expression is linked to a tuple of types.
"""
if expr_context is None:
expr_context = self.expr_types
ret = self.reduce(expr, ctxt=expr_context)
if ret.info is None:
return set()
return ret.info
class ExprCToExpr(ExprReducer):
"""Translate a Miasm expression (representing a C access) into a native
Miasm expression and its C type:
Example:
IN: ((ptr_struct -> f_mini) field x)
OUT: @32[ptr_struct + 0x80], int
Tricky cases:
Struct S0 {
int x;
int y[0x10];
}
Struct S1 {
int a;
S0 toto;
}
S1* ptr;
Case 1:
ptr->toto => ptr + 0x4
&(ptr->toto) => ptr + 0x4
Case 2:
(ptr->toto).x => @32[ptr + 0x4]
&((ptr->toto).x) => ptr + 0x4
Case 3:
(ptr->toto).y => ptr + 0x8
&((ptr->toto).y) => ptr + 0x8
Case 4:
(ptr->toto).y[1] => @32[ptr + 0x8 + 0x4]
&((ptr->toto).y[1]) => ptr + 0x8 + 0x4
"""
def __init__(self, expr_types, types_mngr):
"""Init ExprCAccess
@expr_types: a dictionary linking ID names to their types
@types_mngr: types manager
"""
self.expr_types = expr_types
self.types_mngr = types_mngr
def updt_expr_types(self, expr_types):
"""Update expr_types
@expr_types: Dictionary associating name to type
"""
self.expr_types = expr_types
CST = "CST"
def reduce_known_expr(self, node, ctxt, **kwargs):
"""Reduce known expressions"""
if str(node.expr) in ctxt:
objc = ctxt[str(node.expr)]
out = (node.expr, objc)
elif node.expr.is_id():
out = (node.expr, None)
else:
out = None
return out
def reduce_int(self, node, **kwargs):
"""Reduce ExprInt"""
if not isinstance(node.expr, ExprInt):
return None
return self.CST
def reduce_op_memberof(self, node, **kwargs):
"""Reduce -> operator"""
if not node.expr.is_op('->'):
return None
assert len(node.args) == 2
out = []
assert isinstance(node.args[1].expr, ExprId)
field = node.args[1].expr.name
src, src_type = node.args[0].info
if src_type is None:
return None
assert isinstance(src_type, (ObjCPtr, ObjCArray))
struct_dst = src_type.objtype
assert isinstance(struct_dst, ObjCStruct)
found = False
for name, objtype, offset, _ in struct_dst.fields:
if name != field:
continue
expr = src + ExprInt(offset, src.size)
if isinstance(objtype, (ObjCArray, ObjCStruct, ObjCUnion)):
pass
else:
expr = ExprMem(expr, objtype.size * 8)
assert not found
found = True
out = (expr, objtype)
assert found
return out
def reduce_op_field(self, node, **kwargs):
"""Reduce field operator (Struct or Union)"""
if not node.expr.is_op('field'):
return None
assert len(node.args) == 2
out = []
assert isinstance(node.args[1].expr, ExprId)
field = node.args[1].expr.name
src, src_type = node.args[0].info
struct_dst = src_type
if isinstance(struct_dst, ObjCStruct):
found = False
for name, objtype, offset, _ in struct_dst.fields:
if name != field:
continue
expr = src + ExprInt(offset, src.size)
if isinstance(objtype, ObjCArray):
# Case 4
pass
elif isinstance(objtype, (ObjCStruct, ObjCUnion)):
# Case 1
pass
else:
# Case 2
expr = ExprMem(expr, objtype.size * 8)
assert not found
found = True
out = (expr, objtype)
elif isinstance(struct_dst, ObjCUnion):
found = False
for name, objtype, offset, _ in struct_dst.fields:
if name != field:
continue
expr = src + ExprInt(offset, src.size)
if isinstance(objtype, ObjCArray):
# Case 4
pass
elif isinstance(objtype, (ObjCStruct, ObjCUnion)):
# Case 1
pass
else:
# Case 2
expr = ExprMem(expr, objtype.size * 8)
assert not found
found = True
out = (expr, objtype)
else:
raise NotImplementedError("unknown ObjC")
assert found
return out
def reduce_op_array(self, node, **kwargs):
"""Reduce array operator"""
if not node.expr.is_op('[]'):
return None
assert len(node.args) == 2
out = []
assert isinstance(node.args[1].expr, ExprInt)
cst = node.args[1].expr
src, src_type = node.args[0].info
objtype = src_type.objtype
expr = src + cst * ExprInt(objtype.size, cst.size)
if isinstance(src_type, ObjCPtr):
if isinstance(objtype, ObjCArray):
final = objtype.objtype
expr = src + cst * ExprInt(final.size, cst.size)
objtype = final
expr = ExprMem(expr, final.size * 8)
found = True
else:
expr = ExprMem(expr, objtype.size * 8)
found = True
elif isinstance(src_type, ObjCArray):
if isinstance(objtype, ObjCArray):
final = objtype
found = True
elif isinstance(objtype, ObjCStruct):
found = True
else:
expr = ExprMem(expr, objtype.size * 8)
found = True
else:
raise NotImplementedError("Unknown access" % node.expr)
assert found
out = (expr, objtype)
return out
def reduce_op_addr(self, node, **kwargs):
"""Reduce addr operator"""
if not node.expr.is_op('addr'):
return None
assert len(node.args) == 1
out = []
src, src_type = node.args[0].info
void_type = self.types_mngr.void_ptr
if isinstance(src_type, ObjCArray):
out = (src.arg, ObjCPtr(src_type.objtype,
void_type.align, void_type.size))
elif isinstance(src, ExprMem):
out = (src.ptr, ObjCPtr(src_type,
void_type.align, void_type.size))
elif isinstance(src_type, ObjCStruct):
out = (src, ObjCPtr(src_type,
void_type.align, void_type.size))
elif isinstance(src_type, ObjCUnion):
out = (src, ObjCPtr(src_type,
void_type.align, void_type.size))
else:
raise NotImplementedError("unk type")
return out
def reduce_op_deref(self, node, **kwargs):
"""Reduce deref operator"""
if not node.expr.is_op('deref'):
return None
out = []
src, src_type = node.args[0].info
assert isinstance(src_type, (ObjCPtr, ObjCArray))
void_type = self.types_mngr.void_ptr
if isinstance(src_type, ObjCPtr):
if isinstance(src_type.objtype, ObjCArray):
size = void_type.size*8
else:
size = src_type.objtype.size * 8
out = (ExprMem(src, size), (src_type.objtype))
else:
size = src_type.objtype.size * 8
out = (ExprMem(src, size), (src_type.objtype))
return out
reduction_rules = [reduce_known_expr,
reduce_int,
reduce_op_memberof,
reduce_op_field,
reduce_op_array,
reduce_op_addr,
reduce_op_deref,
]
def get_expr(self, expr, c_context):
"""Translate a Miasm expression @expr (representing a C access) into a
tuple composed of a native Miasm expression and its C type.
@expr: Miasm expression (representing a C access)
@c_context: a dictionary linking known tokens (strings) to their
types. A token is linked to only one type.
"""
ret = self.reduce(expr, ctxt=c_context)
if ret.info is None:
return (None, None)
return ret.info
class CTypesManager(object):
"""Represent a C object, without any layout information"""
def __init__(self, types_ast, leaf_types):
self.types_ast = types_ast
self.leaf_types = leaf_types
@property
def void_ptr(self):
"""Retrieve a void* objc"""
return self.leaf_types.types.get(CTypePtr(CTypeId('void')))
@property
def padding(self):
"""Retrieve a padding ctype"""
return CTypeId(PADDING_TYPE_NAME)
def _get_objc(self, type_id, resolved=None, to_fix=None, lvl=0):
if resolved is None:
resolved = {}
if to_fix is None:
to_fix = []
if type_id in resolved:
return resolved[type_id]
type_id = self.types_ast.get_type(type_id)
fixed = True
if isinstance(type_id, CTypeId):
out = self.leaf_types.types.get(type_id, None)
assert out is not None
elif isinstance(type_id, CTypeUnion):
args = []
align_max, size_max = 0, 0
for name, field in type_id.fields:
objc = self._get_objc(field, resolved, to_fix, lvl + 1)
resolved[field] = objc
align_max = max(align_max, objc.align)
size_max = max(size_max, objc.size)
args.append((name, objc, 0, objc.size))
align, size = self.union_compute_align_size(align_max, size_max)
out = ObjCUnion(type_id.name, align, size, args)
elif isinstance(type_id, CTypeStruct):
align_max, size_max = 0, 0
args = []
offset, align_max = 0, 1
pad_index = 0
for name, field in type_id.fields:
objc = self._get_objc(field, resolved, to_fix, lvl + 1)
resolved[field] = objc
align_max = max(align_max, objc.align)
new_offset = self.struct_compute_field_offset(objc, offset)
if new_offset - offset:
pad_name = "__PAD__%d__" % pad_index
pad_index += 1
size = new_offset - offset
pad_objc = self._get_objc(CTypeArray(self.padding, size), resolved, to_fix, lvl + 1)
args.append((pad_name, pad_objc, offset, pad_objc.size))
offset = new_offset
args.append((name, objc, offset, objc.size))
offset += objc.size
align, size = self.struct_compute_align_size(align_max, offset)
out = ObjCStruct(type_id.name, align, size, args)
elif isinstance(type_id, CTypePtr):
target = type_id.target
out = ObjCPtr(None, self.void_ptr.align, self.void_ptr.size)
fixed = False
elif isinstance(type_id, CTypeArray):
target = type_id.target
objc = self._get_objc(target, resolved, to_fix, lvl + 1)
resolved[target] = objc
if type_id.size is None:
# case: toto[]
# return ObjCPtr
out = ObjCPtr(objc, self.void_ptr.align, self.void_ptr.size)
else:
size = self.size_to_int(type_id.size)
if size is None:
raise RuntimeError('Enable to compute objc size')
else:
out = ObjCArray(objc, size)
assert out.size is not None and out.align is not None
elif isinstance(type_id, CTypeEnum):
# Enum are integer
return self.leaf_types.types.get(CTypeId('int'))
elif isinstance(type_id, CTypeFunc):
type_ret = self._get_objc(
type_id.type_ret, resolved, to_fix, lvl + 1)
resolved[type_id.type_ret] = type_ret
args = []
for name, arg in type_id.args:
objc = self._get_objc(arg, resolved, to_fix, lvl + 1)
resolved[arg] = objc
args.append((name, objc))
out = ObjCFunc(type_id.name, type_id.abi, type_ret, args,
self.void_ptr.align, self.void_ptr.size)
elif isinstance(type_id, CTypeEllipsis):
out = ObjCEllipsis()
else:
raise TypeError("Unknown type %r" % type_id.__class__)
if not isinstance(out, ObjCEllipsis):
assert out.align is not None and out.size is not None
if fixed:
resolved[type_id] = out
else:
to_fix.append((type_id, out))
return out
def get_objc(self, type_id):
"""Get the ObjC corresponding to the CType @type_id
@type_id: CTypeBase instance"""
resolved = {}
to_fix = []
out = self._get_objc(type_id, resolved, to_fix)
# Fix sub objects
while to_fix:
type_id, objc_to_fix = to_fix.pop()
objc = self._get_objc(type_id.target, resolved, to_fix)
objc_to_fix.objtype = objc
self.check_objc(out)
return out
def check_objc(self, objc, done=None):
"""Ensure each sub ObjC is resolved
@objc: ObjC instance"""
if done is None:
done = set()
if objc in done:
return True
done.add(objc)
if isinstance(objc, (ObjCDecl, ObjCInt, ObjCEllipsis)):
return True
elif isinstance(objc, (ObjCPtr, ObjCArray)):
assert self.check_objc(objc.objtype, done)
return True
elif isinstance(objc, (ObjCStruct, ObjCUnion)):
for _, field, _, _ in objc.fields:
assert self.check_objc(field, done)
return True
elif isinstance(objc, ObjCFunc):
assert self.check_objc(objc.type_ret, done)
for name, arg in objc.args:
assert self.check_objc(arg, done)
return True
else:
assert False
def size_to_int(self, size):
"""Resolve an array size
@size: CTypeOp or integer"""
if isinstance(size, CTypeOp):
assert len(size.args) == 2
arg0, arg1 = [self.size_to_int(arg) for arg in size.args]
if size.operator == "+":
return arg0 + arg1
elif size.operator == "-":
return arg0 - arg1
elif size.operator == "*":
return arg0 * arg1
elif size.operator == "/":
return arg0 // arg1
elif size.operator == "<<":
return arg0 << arg1
elif size.operator == ">>":
return arg0 >> arg1
else:
raise ValueError("Unknown operator %s" % size.operator)
elif isinstance(size, int_types):
return size
elif isinstance(size, CTypeSizeof):
obj = self._get_objc(size.target)
return obj.size
else:
raise TypeError("Unknown size type")
def struct_compute_field_offset(self, obj, offset):
"""Compute the offset of the field @obj in the current structure"""
raise NotImplementedError("Abstract method")
def struct_compute_align_size(self, align_max, size):
"""Compute the alignment and size of the current structure"""
raise NotImplementedError("Abstract method")
def union_compute_align_size(self, align_max, size):
"""Compute the alignment and size of the current union"""
raise NotImplementedError("Abstract method")
class CTypesManagerNotPacked(CTypesManager):
"""Store defined C types (not packed)"""
def struct_compute_field_offset(self, obj, offset):
"""Compute the offset of the field @obj in the current structure
(not packed)"""
if obj.align > 1:
offset = (offset + obj.align - 1) & ~(obj.align - 1)
return offset
def struct_compute_align_size(self, align_max, size):
"""Compute the alignment and size of the current structure
(not packed)"""
if align_max > 1:
size = (size + align_max - 1) & ~(align_max - 1)
return align_max, size
def union_compute_align_size(self, align_max, size):
"""Compute the alignment and size of the current union
(not packed)"""
return align_max, size
class CTypesManagerPacked(CTypesManager):
"""Store defined C types (packed form)"""
def struct_compute_field_offset(self, _, offset):
"""Compute the offset of the field @obj in the current structure
(packed form)"""
return offset
def struct_compute_align_size(self, _, size):
"""Compute the alignment and size of the current structure
(packed form)"""
return 1, size
def union_compute_align_size(self, align_max, size):
"""Compute the alignment and size of the current union
(packed form)"""
return 1, size
class CHandler(object):
"""
C manipulator for Miasm
Miasm expr <-> C
"""
exprCToExpr_cls = ExprCToExpr
exprToAccessC_cls = ExprToAccessC
def __init__(self, types_mngr, expr_types=None,
C_types=None,
simplify_c=access_simplifier,
enforce_strict_access=True):
self.exprc2expr = self.exprCToExpr_cls(expr_types, types_mngr)
self.access_c_gen = self.exprToAccessC_cls(expr_types,
types_mngr,
enforce_strict_access)
self.types_mngr = types_mngr
self.simplify_c = simplify_c
if expr_types is None:
expr_types = {}
self.expr_types = expr_types
if C_types is None:
C_types = {}
self.C_types = C_types
def updt_expr_types(self, expr_types):
"""Update expr_types
@expr_types: Dictionary associating name to type
"""
self.expr_types = expr_types
self.exprc2expr.updt_expr_types(expr_types)
self.access_c_gen.updt_expr_types(expr_types)
def expr_to_c_access(self, expr, expr_context=None):
"""Generate the C access object(s) for a given native Miasm expression.
@expr: Miasm expression
@expr_context: a dictionary linking known expressions to a set of types
"""
if expr_context is None:
expr_context = self.expr_types
return self.access_c_gen.get_accesses(expr, expr_context)
def expr_to_c_and_types(self, expr, expr_context=None):
"""Generate the C access string and corresponding type for a given
native Miasm expression.
@expr_context: a dictionary linking known expressions to a set of types
"""
accesses = set()
for access in self.expr_to_c_access(expr, expr_context):
c_str = access_str(access.to_expr().visit(self.simplify_c))
accesses.add((c_str, access.ctype))
return accesses
def expr_to_c(self, expr, expr_context=None):
"""Convert a Miasm @expr into it's C equivalent string
@expr_context: a dictionary linking known expressions to a set of types
"""
return set(access[0]
for access in self.expr_to_c_and_types(expr, expr_context))
def expr_to_types(self, expr, expr_context=None):
"""Get the possible types of the Miasm @expr
@expr_context: a dictionary linking known expressions to a set of types
"""
return set(access.ctype
for access in self.expr_to_c_access(expr, expr_context))
def c_to_expr_and_type(self, c_str, c_context=None):
"""Convert a C string expression to a Miasm expression and it's
corresponding c type
@c_str: C string
@c_context: (optional) dictionary linking known tokens (strings) to its
type.
"""
ast = parse_access(c_str)
if c_context is None:
c_context = self.C_types
access_c = ast_get_c_access_expr(ast, c_context)
return self.exprc2expr.get_expr(access_c, c_context)
def c_to_expr(self, c_str, c_context=None):
"""Convert a C string expression to a Miasm expression
@c_str: C string
@c_context: (optional) dictionary linking known tokens (strings) to its
type.
"""
if c_context is None:
c_context = self.C_types
expr, _ = self.c_to_expr_and_type(c_str, c_context)
return expr
def c_to_type(self, c_str, c_context=None):
"""Get the type of a C string expression
@expr: Miasm expression
@c_context: (optional) dictionary linking known tokens (strings) to its
type.
"""
if c_context is None:
c_context = self.C_types
_, ctype = self.c_to_expr_and_type(c_str, c_context)
return ctype
class CLeafTypes(object):
"""Define C types sizes/alignment for a given architecture"""
pass