
View on GitHub


Test Coverage

diStorm3 C Library Interface
diStorm3 - Powerful disassembler for X86/AMD64
distorm at gmail dot com
Copyright (C) 2003-2012 Gil Dabah

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>

#include "../distorm.h"
#include "config.h"
#include "decoder.h"
#include "x86defs.h"
#include "textdefs.h"
#include "wstring.h"
#include "../mnemonics.h"

    _DLLEXPORT_ _DecodeResult distorm_decompose64(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
    _DLLEXPORT_ _DecodeResult distorm_decompose32(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
    if (usedInstructionsCount == NULL) {
        return DECRES_SUCCESS;

    /* DECRES_SUCCESS still may indicate we may have something in the result, so zero it first thing. */
    *usedInstructionsCount = 0;

    if ((ci == NULL) ||
        (ci->codeLen < 0) ||
        ((ci->dt != Decode16Bits) && (ci->dt != Decode32Bits) && (ci->dt != Decode64Bits)) ||
        (ci->code == NULL) ||
        (result == NULL) ||
        ((ci->features & (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)) == (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)))
        return DECRES_INPUTERR;

    /* Assume length=0 is success. */
    if (ci->codeLen == 0) {
        return DECRES_SUCCESS;

    return decode_internal(ci, FALSE, result, maxInstructions, usedInstructionsCount);


/* Helper function to concat an explicit size when it's unknown from the operands. */
static void distorm_format_size(_WString* str, const _DInst* di, int opNum)
     * We only have to output the size explicitly if it's not clear from the operands.
     * For example:
     * mov al, [0x1234] -> The size is 8, we know it from the AL register operand.
     * mov [0x1234], 0x11 -> Now we don't know the size. Pam pam pam
     * If given operand number is higher than 2, then output the size anyways.
    if (((opNum >= 2) || ((di->ops[0].type != O_REG) && (di->ops[1].type != O_REG))) ||
         * INS/OUTS are exception, because DX is a port specifier and not a real src/dst register.
         * MOVZX is also an exception, because the source operand can be a memory indirection:
         * MOVZX EBX, [ECX], but which size from ECX was read?
        ((di->opcode == I_INS) || (di->opcode == I_OUTS) || (di->opcode == I_MOVZX))) {
        switch (di->ops[opNum].size)
            case 0: break; /* OT_MEM's unknown size. */
            case 8: strcat_WSN(str, "BYTE "); break;
            case 16: strcat_WSN(str, "WORD "); break;
            case 32: strcat_WSN(str, "DWORD "); break;
            case 64: strcat_WSN(str, "QWORD "); break;
            case 80: strcat_WSN(str, "TBYTE "); break;
            case 128: strcat_WSN(str, "DQWORD "); break;
            case 256: strcat_WSN(str, "YWORD "); break;
            default: /* Big oh uh if it gets here. */ break;

static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t addrMask)
    int64_t tmpDisp64;

    if (di->dispSize) {
        chrcat_WS(str, ((int64_t)di->disp < 0) ? MINUS_DISP_CHR : PLUS_DISP_CHR);
        if ((int64_t)di->disp < 0) tmpDisp64 = -(int64_t)di->disp;
        else tmpDisp64 = di->disp;
        tmpDisp64 &= addrMask;
        str_code_hqw(str, (uint8_t*)&tmpDisp64);

    _DLLEXPORT_ void distorm_format64(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
    _DLLEXPORT_ void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
    _WString* str;
    unsigned int i, isDefault;
    int64_t tmpDisp64;
    uint64_t addrMask = (uint64_t)-1;
    uint8_t segment;
    const _WMnemonic* mnemonic;

    /* Set address mask, when default is for 64bits addresses. */
    if (ci->features & DF_MAXIMUM_ADDR32) addrMask = 0xffffffff;
    else if (ci->features & DF_MAXIMUM_ADDR16) addrMask = 0xffff;

    /* Copy other fields. */
    result->size = di->size;
    result->offset = di->addr & addrMask;

    if (di->flags == FLAG_NOT_DECODABLE) {
        str = &result->mnemonic;
        strcpy_WSN(str, "DB ");
        str_code_hb(str, di->imm.byte);
        str_hex_b(&result->instructionHex, di->imm.byte);
        return; /* Skip to next instruction. */

    str = &result->instructionHex;
    for (i = 0; i < di->size; i++)
        str_hex_b(str, ci->code[(unsigned int)(di->addr - ci->codeOffset + i)]);

    str = &result->mnemonic;
    switch (FLAG_GET_PREFIX(di->flags))
        case FLAG_LOCK:
            strcpy_WSN(str, "LOCK ");
        case FLAG_REP:
            strcpy_WSN(str, "REP ");
        case FLAG_REPNZ:
            strcpy_WSN(str, "REPNZ ");
            /* Init mnemonic string, cause next touch is concatenation. */

    mnemonic = (const _WMnemonic*)&_MNEMONICS[di->opcode];
    memcpy((int8_t*)&str->p[str->length], mnemonic->p, mnemonic->length + 1);
    str->length += mnemonic->length;

    /* Format operands: */
    str = &result->operands;

    /* Special treatment for String instructions. */
    if ((META_GET_ISC(di->meta) == ISC_INTEGER) &&
        ((di->opcode == I_MOVS) ||
         (di->opcode == I_CMPS) ||
         (di->opcode == I_STOS) ||
         (di->opcode == I_LODS) ||
         (di->opcode == I_SCAS)))
         * No operands are needed if the address size is the default one,
         * and no segment is overridden, so add the suffix letter,
         * to indicate size of operation and continue to next instruction.
        if ((FLAG_GET_ADDRSIZE(di->flags) == ci->dt) && (SEGMENT_IS_DEFAULT(di->segment))) {
            str = &result->mnemonic;
            switch (di->ops[0].size)
                case 8: chrcat_WS(str, 'B'); break;
                case 16: chrcat_WS(str, 'W'); break;
                case 32: chrcat_WS(str, 'D'); break;
                case 64: chrcat_WS(str, 'Q'); break;

    for (i = 0; ((i < OPERANDS_NO) && (di->ops[i].type != O_NONE)); i++) {
        if (i > 0) strcat_WSN(str, ", ");
        switch (di->ops[i].type)
            case O_REG:
                strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
            case O_IMM:
                /* If the instruction is 'push', show explicit size (except byte imm). */
                if (di->opcode == I_PUSH && di->ops[i].size != 8) distorm_format_size(str, di, i);
                /* Special fix for negative sign extended immediates. */
                if ((di->flags & FLAG_IMM_SIGNED) && (di->ops[i].size == 8)) {
                    if (di->imm.sbyte < 0) {
                        chrcat_WS(str, MINUS_DISP_CHR);
                        str_code_hb(str, -di->imm.sbyte);
                if (di->ops[i].size == 64) str_code_hqw(str, (uint8_t*)&di->imm.qword);
                else str_code_hdw(str, di->imm.dword);
            case O_IMM1:
                str_code_hdw(str, di->imm.ex.i1);
            case O_IMM2:
                str_code_hdw(str, di->imm.ex.i2);
            case O_DISP:
                distorm_format_size(str, di, i);
                chrcat_WS(str, OPEN_CHR);
                if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
                    strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
                    chrcat_WS(str, SEG_OFF_CHR);
                tmpDisp64 = di->disp & addrMask;
                str_code_hqw(str, (uint8_t*)&tmpDisp64);
                chrcat_WS(str, CLOSE_CHR);
            case O_SMEM:
                distorm_format_size(str, di, i);
                chrcat_WS(str, OPEN_CHR);

                 * This is where we need to take special care for String instructions.
                 * If we got here, it means we need to explicitly show their operands.
                 * The problem with CMPS and MOVS is that they have two(!) memory operands.
                 * So we have to complete it ourselves, since the structure supplies only the segment that can be overridden.
                 * And make the rest of the String operations explicit.
                segment = SEGMENT_GET(di->segment);
                isDefault = SEGMENT_IS_DEFAULT(di->segment);
                switch (di->opcode)
                    case I_MOVS:
                        isDefault = FALSE;
                        if (i == 0) segment = R_ES;
                    case I_CMPS:
                        isDefault = FALSE;
                        if (i == 1) segment = R_ES;
                    case I_INS:
                    case I_LODS:
                    case I_STOS:
                    case I_SCAS: isDefault = FALSE; break;
                if (!isDefault && (segment != R_NONE)) {
                    strcat_WS(str, (const _WString*)&_REGISTERS[segment]);
                    chrcat_WS(str, SEG_OFF_CHR);

                strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);

                distorm_format_signed_disp(str, di, addrMask);
                chrcat_WS(str, CLOSE_CHR);
            case O_MEM:
                distorm_format_size(str, di, i);
                chrcat_WS(str, OPEN_CHR);
                if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
                    strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
                    chrcat_WS(str, SEG_OFF_CHR);
                if (di->base != R_NONE) {
                    strcat_WS(str, (const _WString*)&_REGISTERS[di->base]);
                    chrcat_WS(str, PLUS_DISP_CHR);
                strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
                if (di->scale != 0) {
                    chrcat_WS(str, '*');
                    if (di->scale == 2) chrcat_WS(str, '2');
                    else if (di->scale == 4) chrcat_WS(str, '4');
                    else /* if (di->scale == 8) */ chrcat_WS(str, '8');

                distorm_format_signed_disp(str, di, addrMask);
                chrcat_WS(str, CLOSE_CHR);
            case O_PC:
                str_off64(str, (di->imm.sqword + di->addr + di->size) & addrMask);
                str_code_hdw(str, ((_OffsetType)di->imm.sdword + di->addr + di->size) & (uint32_t)addrMask);
            case O_PTR:
                str_code_hdw(str, di->imm.ptr.seg);
                chrcat_WS(str, SEG_OFF_CHR);
                str_code_hdw(str, di->imm.ptr.off);

    if (di->flags & FLAG_HINT_TAKEN) strcat_WSN(str, " ;TAKEN");
    else if (di->flags & FLAG_HINT_NOT_TAKEN) strcat_WSN(str, " ;NOT TAKEN");

    _DLLEXPORT_ _DecodeResult distorm_decode64(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
    _DLLEXPORT_ _DecodeResult distorm_decode32(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount)
    _DecodeResult res;
    _DInst di;
    _CodeInfo ci;
    unsigned int instsCount = 0, i;

    *usedInstructionsCount = 0;

    /* I use codeLen as a signed variable in order to ease detection of underflow... and besides - */
    if (codeLen < 0) {
        return DECRES_INPUTERR;

    if ((dt != Decode16Bits) && (dt != Decode32Bits) && (dt != Decode64Bits)) {
        return DECRES_INPUTERR;

    if (code == NULL || result == NULL) {
        return DECRES_INPUTERR;

    /* Assume length=0 is success. */
    if (codeLen == 0) {
        return DECRES_SUCCESS;

     * We have to format the result into text. But the interal decoder works with the new structure of _DInst.
     * Therefore, we will pass the result array(!) from the caller and the interal decoder will fill it in with _DInst's.
     * Then we will copy each result to a temporary structure, and use it to reformat that specific result.
     * This is all done to save memory allocation and to work on the same result array in-place!!!
     * It's a bit ugly, I have to admit, but worth it.

    ci.codeOffset = codeOffset;
    ci.code = code;
    ci.codeLen = codeLen;
    ci.dt = dt;
    ci.features = DF_NONE;
    if (dt == Decode16Bits) ci.features = DF_MAXIMUM_ADDR16;
    else if (dt == Decode32Bits) ci.features = DF_MAXIMUM_ADDR32;

    res = decode_internal(&ci, TRUE, (_DInst*)result, maxInstructions, &instsCount);
    for (i = 0; i < instsCount; i++) {
        if ((*usedInstructionsCount + i) >= maxInstructions) return DECRES_MEMORYERR;

        /* Copy the current decomposed result to a temp structure, so we can override the result with text. */
        memcpy(&di, (char*)result + (i * sizeof(_DecodedInst)), sizeof(_DInst));
        distorm_format64(&ci, &di, &result[i]);
        distorm_format32(&ci, &di, &result[i]);

    *usedInstructionsCount = instsCount;
    return res;

#endif /* DISTORM_LIGHT */

_DLLEXPORT_ unsigned int distorm_version()
    return __DISTORMV__;