cea-sec/miasm

View on GitHub
example/jitter/example_types.py

Summary

Maintainability
A
0 mins
Test Coverage
#! /usr/bin/env python2
"""This script is just a short example of common usages for miasm.core.types.
For a more complete view of what is possible, tests/core/types.py covers
most of the module possibilities, and the module doc gives useful information
as well.
"""
from __future__ import print_function

from miasm.core.utils import iterbytes
from miasm.analysis.machine import Machine
from miasm.core.types import MemStruct, Self, Void, Str, Array, Ptr, \
                              Num, Array, set_allocator
from miasm.os_dep.common import heap
from miasm.core.locationdb import LocationDB

loc_db = LocationDB()

# Instantiate a heap
my_heap = heap()
# And set it as the default memory allocator, to avoid manual allocation and
# explicit address passing to the MemType subclasses (like MemStruct)
# constructor
set_allocator(my_heap.vm_alloc)

# Let's reimplement a simple C generic linked list mapped on a VmMngr.

# All the structures and methods will use the python objects but all the data
# is in fact stored in the VmMngr

class ListNode(MemStruct):
    fields = [
        # The "<I" is the struct-like format of the pointer in memory, in this
        # case a Little Endian 32 bits unsigned int.
        # One way to handle reference to ListNode in ListNode is to use the
        # special marker Self().
        # You could also generate ListNode's fields with ListNode.gen_field
        # after the class declaration, so that the ListNode is defined when
        # fields are generated.
        ("next", Ptr("<I", Self())),
        # Ptr(_, Void()) is analogous to void*, Void() is a kind of "empty type"
        ("data", Ptr("<I", Void())),
    ]

    def get_next(self):
        if self.next.val == 0:
            return None
        return self.next.deref

    def get_data(self, data_type=None):
        if data_type is not None:
            return self.data.deref.cast(data_type)
        else:
            return self.data.deref


class LinkedList(MemStruct):
    fields = [
        # For convenience, either a Type instance (like Self() or Num("I") or a
        # MemStruct subclass can be passed to the Ptr constructor.
        ("head", Ptr("<I", ListNode)),
        ("tail", Ptr("<I", ListNode)),
        # Num can take any one-field struct-like format, including floats and
        # doubles
        ("size", Num("<I")),
    ]

    def get_head(self):
        """Returns the head ListNode instance"""
        if self.head == 0:
            return None
        return self.head.deref

    def get_tail(self):
        """Returns the tail ListNode instance"""
        if self.tail == 0:
            return None
        return self.tail.deref

    def push(self, data):
        """Push a data (MemType instance) to the linked list."""
        # Allocate a new node
        node = ListNode(self._vm)

        # Set the data pointer
        node.data = data.get_addr()

        # re-link
        if self.head != 0:
            # get the head ListNode
            head = self.get_head()
            node.next = head.get_addr()

        # pointer to head assigned to the new node address
        self.head = node.get_addr()

        # Do not forget the tail :)
        if self.tail == 0:
            self.tail = node.get_addr()

        self.size += 1

    def pop(self, data_type=None):
        """Pop one data from the LinkedList."""
        # Nothing to pop
        if self.head == 0:
            return None

        node = self.get_head()
        self.head = node.next

        # empty
        if self.head == 0:
            self.tail = 0

        self.size -= 1

        return node.get_data(data_type)

    def empty(self):
        """True if the list is empty."""
        return self.head == 0

    def __iter__(self):
        if not self.empty():
            cur = self.get_head()
            while cur is not None:
                yield cur.data.deref
                cur = cur.get_next()


# Some data types to put in the LinkedList and play with:

class DataArray(MemStruct):
    fields = [
        ("val1", Num("B")),
        ("val2", Num("B")),
        # Ptr can also be instantiated with a Type instance as an argument, the
        # corresponding Memtype will be returned when dereferencing
        # Here, data_array.array.deref will allow to access an Array
        ("arrayptr", Ptr("<I", Array(Num("B"), 16))),
        # Array of 10 uint8
        ("array", Array(Num("B"), 16)),
    ]

class DataStr(MemStruct):
    fields = [
        ("valshort", Num("<H")),
        # Pointer to an utf16 null terminated string
        ("data", Ptr("<I", Str("utf16"))),
    ]


print("This script demonstrates a LinkedList implementation using the types ")
print("module in the first part, and how to play with some casts in the second.")
print()

# A random jitter
# You can also use miasm.jitter.VmMngr.Vm(), but it does not happen in real
# life scripts, so here is the usual way:
jitter = Machine("x86_32").jitter(loc_db, "python")
vm = jitter.vm

# Auto-allocated by my_heap. If you allocate memory at `addr`,
# `link = LinkedList(vm, addr)` will use this allocation. If you just want
# to read/modify existing struct, you may want to use the (vm, addr) syntax.
link = LinkedList(vm)
# memset the struct (with '\x00' by default)
link.memset()

# Push three uninitialized structures
link.push(DataArray(vm))
link.push(DataArray(vm))
link.push(DataArray(vm))

# Size has been updated
assert link.size == 3
# If you get it directly from the VM, it is updated as well
raw_size = vm.get_mem(link.get_addr("size"), link.get_type()
                                                 .get_field_type("size").size)
assert raw_size == b'\x03\x00\x00\x00'

print("The linked list just built:")
print(repr(link), '\n')

print("Its uninitialized data elements:")
for data in link:
    # __iter__ returns MemVoids here, just cast them to the real data type
    real_data = data.cast(DataArray)
    print(repr(real_data))
print()

# Now let's play with one data
data = link.pop(DataArray)
assert link.size == 2
# Make the Array Ptr point to the data's array field
# Note: this is equivalent to data.arrayptr.val = ...
data.arrayptr = data.get_addr("array")
# Now the pointer dereference is equal to the array field's value
assert data.arrayptr.deref == data.array

# Let's say that it is a DataStr:
datastr = data.cast(DataStr)

print("First element casted to DataStr:")
print(repr(datastr))
print()

# data and datastr really share the same memory:
data.val1 = 0x34
data.val2 = 0x12
assert datastr.valshort == 0x1234
datastr.valshort = 0x1122
assert data.val1 == 0x22 and data.val2 == 0x11

# Let's play with strings
memstr = datastr.data.deref
# Note that memstr is Str("utf16")
memstr.val = 'Miams'

print("Cast data.array to MemStr and set the string value:")
print(repr(memstr))
print()

# If you followed, memstr and data.array point to the same object, so:
raw_miams = 'Miams'.encode('utf-16le') + b'\x00'*2
raw_miams_array = [ord(c) for c in iterbytes(raw_miams)]
assert list(data.array)[:len(raw_miams_array)] == raw_miams_array
assert data.array.cast(Str("utf16")) == memstr
# Default is "ansi"
assert data.array.cast(Str()) != memstr
assert data.array.cast(Str("utf16")).val == memstr.val

print("See that the original array has been modified:")
print(repr(data))
print()

# Some type manipulation examples, for example let's construct an argv for
# a program:
# Let's say that we have two arguments, +1 for the program name and +1 for the
# final null ptr in argv, the array has 4 elements:
argv_t = Array(Ptr("<I", Str()), 4)
print("3 arguments argv type:", argv_t)

# alloc argv somewhere
argv = argv_t.lval(vm)

# Auto alloc with the MemStr.from_str helper
MemStrAnsi = Str().lval
argv[0].val = MemStrAnsi.from_str(vm, "./my-program").get_addr()
argv[1].val = MemStrAnsi.from_str(vm, "arg1").get_addr()
argv[2].val = MemStrAnsi.from_str(vm, "27").get_addr()
argv[3].val = 0

# If you changed your mind on the second arg, you could do:
argv[2].deref.val = "42"

print("An argv instance:", repr(argv))
print("argv values:", repr([val.deref.val for val in argv[:-1]]))
print()

print("See test/core/types.py and the miasm.core.types module doc for ")
print("more information.")