cea-sec/miasm

View on GitHub
example/expression/export_llvm.py

Summary

Maintainability
A
0 mins
Test Coverage
from future.utils import viewitems, viewvalues

from argparse import ArgumentParser
from miasm.analysis.binary import Container
from miasm.analysis.machine import Machine
from miasm.jitter.llvmconvert import LLVMType, LLVMContext_IRCompilation, LLVMFunction_IRCompilation
from llvmlite import ir as llvm_ir
from miasm.expression.simplifications import expr_simp_high_to_explicit
from miasm.core.locationdb import LocationDB

parser = ArgumentParser("LLVM export example")
parser.add_argument("target", help="Target binary")
parser.add_argument("addr", help="Target address")
parser.add_argument("--architecture", "-a", help="Force architecture")
args = parser.parse_args()
loc_db = LocationDB()
# This part focus on obtaining an IRCFG to transform #
cont = Container.from_stream(open(args.target, 'rb'), loc_db)
machine = Machine(args.architecture if args.architecture else cont.arch)
lifter = machine.lifter(loc_db)
dis = machine.dis_engine(cont.bin_stream, loc_db=loc_db)
asmcfg = dis.dis_multiblock(int(args.addr, 0))
ircfg = lifter.new_ircfg_from_asmcfg(asmcfg)
ircfg.simplify(expr_simp_high_to_explicit)
######################################################

# Instantiate a context and the function to fill
context = LLVMContext_IRCompilation()
context.lifter = lifter

func = LLVMFunction_IRCompilation(context, name="test")
func.ret_type = llvm_ir.VoidType()
func.init_fc()

# Here, as an example, we arbitrarily represent registers with global
# variables. Locals allocas are used for the computation during the function,
# and is finally saved in the aforementioned global variable.

# In other words, for each registers:
# entry:
#     ...
#     %reg_val_in = load i32 @REG
#     %REG = alloca i32
#     store i32 %reg_val_in, i32* %REG
#     ...
# exit:
#     ...
#     %reg_val_out = load i32 %REG
#     store i32 %reg_val_out, i32* @REG
#     ...

all_regs = set()
for block in viewvalues(ircfg.blocks):
    for irs in block.assignblks:
        for dst, src in viewitems(irs.get_rw(mem_read=True)):
            elem = src.union(set([dst]))
            all_regs.update(
                x for x in elem
                if x.is_id()
            )

reg2glob = {}
for var in all_regs:
    # alloca reg = global reg
    data = context.mod.globals.get(str(var), None)
    if data is None:
        data = llvm_ir.GlobalVariable(context.mod,  LLVMType.IntType(var.size), name=str(var))
    data.initializer = LLVMType.IntType(var.size)(0)
    value = func.builder.load(data)
    func.local_vars_pointers[var.name] = func.builder.alloca(llvm_ir.IntType(var.size), name=var.name)
    func.builder.store(value, func.local_vars_pointers[var.name])
    reg2glob[var] = data

# IRCFG is imported, without the final "ret void"
func.from_ircfg(ircfg, append_ret=False)

# Finish the saving of registers (temporary version to global)
for reg, glob in viewitems(reg2glob):
    value = func.builder.load(func.local_vars_pointers[reg.name])
    func.builder.store(value, glob)

# Finish the function
func.builder.ret_void()

# Get it back
open("out.ll", "w").write(str(func))
# The optimized CFG can be seen with:
# $ opt -O2 -dot-cfg -S out.ll && xdot cfg.test.dot