REhints/HexRaysCodeXplorer

View on GitHub
src/HexRaysCodeXplorer/GCCObjectFormatParser.cpp

Summary

Maintainability
Test Coverage
/*    Copyright (c) 2013-2020
    REhints <info@rehints.com>
    All rights reserved.

    ==============================================================================

    This file is part of HexRaysCodeXplorer

    HexRaysCodeXplorer is free software: you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.

    ==============================================================================
*/




#include "GCCObjectFormatParser.h"
#include "Common.h"
#include "entry.hpp"
#include "Debug.h"
#include "demangle.hpp"
#include "name.hpp"
#include "offset.hpp"
#include "nalt.hpp"
#include "bytes.hpp"
#include "Utility.h"
#include "stddef.h"
#include "GCCVtableInfo.h"
#include "GCCTypeInfo.h"
#include "struct.hpp"
#include "Debug.h"
#include "ReconstructableType.h"
#include <stack>

#define vmi_class_type_info_name "_ZTVN10__cxxabiv121__vmi_class_type_infoE"
#define class_type_info_name "_ZTVN10__cxxabiv117__class_type_infoE"
#define si_class_type_info_name "_ZTVN10__cxxabiv120__si_class_type_infoE"

std::unordered_map<ea_t, GCCVtableInfo *>g_KnownVtables;
std::unordered_map<ea_t, GCCTypeInfo *>g_KnownTypes;
std::unordered_map<std::string, GCCVtableInfo *>g_KnownVtableNames;
std::unordered_map<std::string, GCCTypeInfo *>g_KnownTypeNames;

ea_t class_type_info_vtbl = -1;
ea_t si_class_type_info_vtbl = -1;
ea_t vmi_class_type_info_vtbl = -1;

static void buildReconstructableTypes();

GCCObjectFormatParser::GCCObjectFormatParser()
{
}

GCCObjectFormatParser::~GCCObjectFormatParser()
{
}


static int  import_enum_cb(ea_t ea, const char *name, uval_t ord, void *param) {
    if (name == 0)
        return 1;
    ea += sizeof(GCC_RTTI::__vtable_info); // BUG From IDA. Hello funny imports.
    if (class_type_info_vtbl == BADADDR && !memcmp(class_type_info_name, name, sizeof(class_type_info_name) - 1))
    {
        class_type_info_vtbl = ea;
        set_name(class_type_info_vtbl, "__cxxabiv1::__class_type_info::vtable", SN_NOWARN);
    }

    if (si_class_type_info_vtbl == BADADDR && !memcmp(si_class_type_info_name, name, sizeof(si_class_type_info_name) - 1))
    {
        si_class_type_info_vtbl = ea;
        set_name(si_class_type_info_vtbl, "__cxxabiv1::__si_class_type_info::vtable", SN_NOWARN);
    }

    if (vmi_class_type_info_vtbl == BADADDR && !memcmp(vmi_class_type_info_name, name, sizeof(vmi_class_type_info_name) - 1))
    {
        vmi_class_type_info_vtbl = ea;
        set_name(vmi_class_type_info_vtbl, "__cxxabiv1::__vmi_class_type_info::vtable", SN_NOWARN);
    }

    return 1;
}

static int __get_ea_of_name(size_t index, ea_t *value) {
    ea_t ea = get_nlist_ea(index);
    if (ea == BADADDR)
        return -1;
    *value = ea;
    return 0;
}

static int find_vtbls_by_names(bool force) {
    size_t cnt = get_nlist_size();
    unsigned int found_vtbls = 0;
    ea_t ea;

    if (force)
        class_type_info_vtbl = si_class_type_info_vtbl = vmi_class_type_info_vtbl = BADADDR;
    else {
        if (class_type_info_vtbl != BADADDR)
            ++found_vtbls;
        if (si_class_type_info_vtbl != BADADDR)
            ++found_vtbls;
        if (vmi_class_type_info_vtbl != BADADDR)
            ++found_vtbls;
    }
    for (size_t i = 0; i < cnt && found_vtbls < 3; ++i) {
        const char *name = get_nlist_name(i);
        if (name && memcmp(name, "_ZTVN10__cxxabiv", sizeof("_ZTVN10__cxxabiv")-1) == 0) {
            if (class_type_info_vtbl == BADADDR)
                if (memcmp(name, class_type_info_name, sizeof(class_type_info_name)-1) == 0)
                    if (!__get_ea_of_name(i, &ea))
                    {
                        ea += sizeof(GCC_RTTI::__vtable_info);
                        class_type_info_vtbl = ea;
                        ++found_vtbls;
                        continue;
                    }
            if (si_class_type_info_vtbl == BADADDR)
                if (memcmp(name, si_class_type_info_name, sizeof(si_class_type_info_name)-1) == 0)
                    if (!__get_ea_of_name(i, &ea))
                    {
                        ea += sizeof(GCC_RTTI::__vtable_info);
                        si_class_type_info_vtbl = ea;
                        ++found_vtbls;
                        continue;
                    }
            if (vmi_class_type_info_vtbl == BADADDR)
                if (memcmp(name, vmi_class_type_info_name, sizeof(vmi_class_type_info_name)-1) == 0)
                    if (!__get_ea_of_name(i, &ea))
                    {
                        ea += sizeof(GCC_RTTI::__vtable_info);
                        vmi_class_type_info_vtbl = ea;
                        ++found_vtbls;
                        continue;
                    }
        }
    }
    return 0;
}

int GCCObjectFormatParser::collect_info_vtbls(bool force) {
    size_t count = get_entry_qty();
    qstring buffer;

    /* We already know some values, so lets omit the search. */
    if (!force && (class_type_info_vtbl != -1 ||
        si_class_type_info_vtbl != -1 ||
        vmi_class_type_info_vtbl != -1))
        return 0;
    
    if (force) {
        class_type_info_vtbl = si_class_type_info_vtbl = vmi_class_type_info_vtbl = BADADDR;
    }
    for (int i = 0; i < count; ++i) {
        uval_t ordinal = get_entry_ordinal(i);
        get_entry_name(&buffer, ordinal);
        ea_t ea = get_entry(ordinal);
        ea += sizeof(GCC_RTTI::__vtable_info);

        if (class_type_info_vtbl == BADADDR && !memcmp(class_type_info_name, buffer.c_str(), sizeof(class_type_info_name) - 1))
        {
            class_type_info_vtbl = ea;
            set_name(ea, "__cxxabiv1::__class_type_info::vtable", SN_NOWARN);
        }

        if (si_class_type_info_vtbl == BADADDR && !memcmp(si_class_type_info_name, buffer.c_str(), sizeof(si_class_type_info_name) - 1))
        {
            si_class_type_info_vtbl = ea;
            set_name(ea, "__cxxabiv1::__si_class_type_info::vtable", SN_NOWARN);
        }

        if (vmi_class_type_info_vtbl == BADADDR && !memcmp(vmi_class_type_info_name, buffer.c_str(), sizeof(vmi_class_type_info_name) - 1))
        {
            vmi_class_type_info_vtbl = ea;
            set_name(ea, "__cxxabiv1::__vmi_class_type_info::vtable", SN_NOWARN);
        }
    }

    count = get_import_module_qty();
    for (uint index = 0; index < count; ++index)
        enum_import_names(index, &import_enum_cb, this);
    
    find_vtbls_by_names(false);


    if (class_type_info_vtbl == -1 &&
        si_class_type_info_vtbl == -1 &&
        vmi_class_type_info_vtbl == -1)
        return -1;
    return 0;
}

void GCCObjectFormatParser::get_rtti_info()
{
    collect_info_vtbls();
    if (class_type_info_vtbl == -1 &&
        si_class_type_info_vtbl == -1 &&
        vmi_class_type_info_vtbl == -1)
        return;
        // if no any rtti vtables, we cant read it.
    // now we can scan  segments for vtables.
    int segCount = get_segm_qty();
    for (int i = 0; i < segCount; i++)
    {
        if (segment_t *seg = getnseg(i))
        {
            if (seg->type == SEG_DATA)
            {
                scanSeg4Vftables(seg);
            }
        }
    }

    buildReconstructableTypes();
}

void GCCObjectFormatParser::scanSeg4Vftables(segment_t *seg)
{
    size_t size = (std::max)(sizeof(GCC_RTTI::__vtable_info), sizeof(GCC_RTTI::type_info));
    unsigned char buffer[(std::max)(sizeof(GCC_RTTI::__vtable_info), sizeof(GCC_RTTI::type_info))];

    ea_t startEA = ((seg->start_ea + sizeof(ea_t)) & ~((ea_t)(sizeof(ea_t) - 1)));
    ea_t endEA = (seg->end_ea - sizeof(ea_t));
    ea_t ea = startEA;
    while (ea < endEA)
    {
        if (g_KnownTypes.count(ea)) {
            ea += g_KnownTypes[ea]->size;
            continue;
        }

        if (g_KnownVtables.count(ea)) {
            ea = g_KnownVtables[ea]->ea_end;
            continue;
        }
        if (!get_bytes(buffer, size, ea)) {
            ea += sizeof(ea_t);
            continue;
        }

        GCC_RTTI::type_info *ti = (GCC_RTTI::type_info *)buffer;
        GCC_RTTI::__vtable_info *vt = (GCC_RTTI::__vtable_info *)buffer;
        // do some sanity checks  if it looks like a Virtual Table
        if (vt->ptrdiff == 0) {
            GCCTypeInfo *type = GCCTypeInfo::parseTypeInfo(vt->type_info);
            if (type != 0) {
                GCCVtableInfo * info = GCCVtableInfo::parseVtableInfo(ea);
                if (info)
                {
                    VTBL_info_t vtbl_info;
                    vtbl_info.ea_begin = info->ea_start + sizeof(GCC_RTTI::__vtable_info);
                    vtbl_info.ea_end = info->ea_end;
                    vtbl_info.vtbl_name = info->typeName.c_str();
                    vtbl_info.methods = info->vtables[0].methodsCount;
                    rtti_vftables[ea + sizeof(GCC_RTTI::__vtable_info)] = vtbl_info;
                    ea = info->ea_end;
                    continue;
                }

            }
        }

        GCCTypeInfo *typeInfo = GCCTypeInfo::parseTypeInfo(ea);
        if (typeInfo)
            ea += typeInfo->size;
        else
            ea += sizeof(ea_t);
    }
    return;
}

void GCCObjectFormatParser::clear_info()
{
    g_KnownVtables.clear();
    g_KnownVtableNames.clear();
    g_KnownTypes.clear();
    g_KnownTypeNames.clear();
    assert(false); // reasonable question what to do with ReconstructableTypes.
}

void buildReconstructableTypesRecursive(GCCTypeInfo *type,  std::set <GCCTypeInfo *> &visitedTypes) {
    if (visitedTypes.count(type))
        return;
    // Handle parents first
    if (type->parentsCount)
    {
        for (unsigned long i = 0; i < type->parentsCount; ++i) {
            GCCParentType * parent = type->parentsTypes[i];
            buildReconstructableTypesRecursive(parent->info, visitedTypes);
        }
    }

    ReconstructableType *reType;
    if (g_ReconstractedTypes.count(type->typeName)) {
        reType = g_ReconstractedTypes[type->typeName];
        return;
    }
    else {
        reType = ReconstructableType::getReconstructableType(type->typeName);
        reType->SyncTypeInfo();
    }

    if (type->vtable) // type has vtable;
    {    
        GCCVtableInfo *vtblInfo = type->vtable;
        std::string vtbl_class_name = type->typeName + VTBL_CLSNAME_POSTFIX;
        if (g_ReconstractedTypes.count(vtbl_class_name)) {
            assert(false); // one more assert for the future
        }
        char buffer[256];

        for (unsigned int i = 0; i < vtblInfo->vtablesCount; ++i) {
            unsigned int offset = (unsigned int)(-(signed int)vtblInfo->vtables[i].ptrDiff);
            std::string parentName = vtblInfo->vtables[i].name.c_str();
            std::string vtblName = vtbl_class_name;
            if (i != 0) {
                snprintf(buffer, sizeof(buffer), "%s_%x_of_%s", vtbl_class_name.c_str(), offset, parentName.c_str());
                vtblName = buffer;
            }
            ReconstructableType * reVtbl = ReconstructableTypeVtable::get_reconstructable_type_vtable(vtblName, vtblInfo->ea_start);
            if (i != 0) {
                ReconstructableType *parent;
                if (g_ReconstractedTypes.count(type->parentsTypes[i]->info->typeName))
                {
                    parent = g_ReconstractedTypes[type->parentsTypes[i]->info->typeName];
                    std::map<unsigned int, ReconstructableMember *> pmembers = parent->getOwnMembers();
                    if (parent->getSize() < sizeof(uval_t)) {
                        ReconstructableType *parentVtbl = ReconstructableType::getReconstructableType(parent->name + VTBL_CLSNAME_POSTFIX);
                        if (parentVtbl->getSize() < vtblInfo->vtables[i].methodsCount) {
                            for (unsigned int methodIndx = parentVtbl->getSize(); methodIndx < vtblInfo->vtables[i].methodsCount; ++methodIndx)
                            {
                                ReconstructableMember *pmethod = new ReconstructableMember();
                                pmethod->name = "purecall"; 
                                pmethod->name += std::to_string(methodIndx);
                                pmethod->offset = methodIndx * sizeof(uval_t);
                                tinfo_t info = dummy_ptrtype(sizeof(uval_t), 0);
                                pmethod->memberType = new MemberTypeIDATypeInfoGate(info);
                                parentVtbl->AddMember(pmethod);
                            }
                        }
                        ReconstructableMember *pmember = new ReconstructableMember();
                        pmember->name = "vtable";
                        pmember->offset = 0;
                        pmember->memberType = new MemberTypePointer(parentVtbl->name);
                        parent->AddMember(pmember);
                    }
                }
            }
            

            for (unsigned int j = 0; j < vtblInfo->vtables[i].methodsCount; ++j) {
                ReconstructableMember *member = new ReconstructableMember();
                member->offset = sizeof(uval_t)*j;
                ea_t funcPtr = getEa(vtblInfo->vtables[i].ea + sizeof(uval_t)*j + sizeof(GCC_RTTI::__vtable_info));
                if (funcPtr == 0) {
                    member->name = "purecall";
                    member->name += std::to_string(j);
                }
                    
                else {
                    if (ph.id == PLFM_ARM)
                        funcPtr &= (ea_t)-2;
                    qstring method_name;
                    get_ea_name(&method_name, funcPtr);
                    if (method_name.find("sub_", 0) == 0 || method_name.length() == 0) {
                        // we can rename it.
                        qstring newName;
                        newName.sprnt("%s::refunc_%x", type->typeName.c_str(), funcPtr);
                        if (set_name(funcPtr, newName.c_str(), SN_NOWARN)) {
                            method_name = newName;
                        }    
                    }
                    if (method_name.length() == 0)
                        method_name.sprnt("___refunc_%x", funcPtr);
                    member->name = method_name.c_str();
                }

                tinfo_t info = dummy_ptrtype(sizeof(uval_t), 0);
                member->memberType = new  MemberTypeIDATypeInfoGate(info);
                reVtbl->AddMember(member);
            }
            if (i == 0) {
                // add vtable info
                ReconstructableMember *member = new ReconstructableMember();
                member->name = "vtable";
                member->offset = 0;
                member->memberType = new MemberTypePointer(vtbl_class_name);
                reType->AddMember(member);
            }
            if (i < type->parentsCount && g_ReconstractedTypes.count(type->parentsTypes[i]->info->typeName + VTBL_CLSNAME_POSTFIX)) {
                
                ReconstructableType *parentVtbl = g_ReconstractedTypes[type->parentsTypes[i]->info->typeName + VTBL_CLSNAME_POSTFIX];
                
                ReconstructableMember *dmember = new ReconstructableMember();
                dmember->name = parentVtbl->name;
                dmember->offset = 0;
                dmember->memberType = new ReconstructedMemberReType(parentVtbl);
                reVtbl->AddDerivedMember(dmember);
            }
            reVtbl->SyncTypeInfo();
            // we have vtable, we have it as structure, lets apply its name and type to IDB
            if (i == 0) {
                std::string idb_name = type->typeName + "::_vftable";
                ea_t ea = vtblInfo->vtables[0].ea + 2 * sizeof(uval_t);
                setUnknown(ea, vtblInfo->vtables[0].methodsCount * sizeof(uval_t));
                MakeName(ea, idb_name.c_str());
                tinfo_t tinfo;
                if (tinfo.get_named_type(get_idati(), reVtbl->name.c_str())) {
                    apply_tinfo(ea, tinfo, TINFO_DEFINITE);
                }
            }
        }
    }
    for (unsigned int i = 0; i < type->parentsCount; ++i) {
        assert(g_ReconstractedTypes.count(type->parentsTypes[i]->info->typeName));
        ReconstructableType *parent = g_ReconstractedTypes[type->parentsTypes[i]->info->typeName];
        type->parentsTypes[i];
        ReconstructableMember* member = new ReconstructableMember();
        member->offset = type->parentsTypes[i]->offset;
        member->name = type->parentsTypes[i]->info->typeName;
        member->memberType = new ReconstructedMemberReType(parent);
        reType->AddDerivedMember(member);
    }
    visitedTypes.emplace(type);
}


void fixupRecounstructableTypesId() {
    unsigned long id = 0;
    for (auto iterator = g_ReconstractedTypes.begin(); iterator != g_ReconstractedTypes.end(); iterator++, id++)
    {
        iterator->second->id = id;
    }
}

static void buildReconstructableTypes() {
    std::set <GCCTypeInfo *> visitedTypes;
    SyncTypeInfoMethod curMethod = syncTypeInfoMethod;
    syncTypeInfoMethod = SyncTypeInfo_Names;
    std::unordered_map<ea_t, GCCTypeInfo *>::iterator typesIterator;
    for (typesIterator = g_KnownTypes.begin(); typesIterator != g_KnownTypes.end(); ++typesIterator) {
        GCCTypeInfo *curType = typesIterator->second;
        if (visitedTypes.count(curType))
            continue; // already parsed
        buildReconstructableTypesRecursive(curType, visitedTypes);
    }
    fixupRecounstructableTypesId();
    syncTypeInfoMethod = curMethod;
    for (auto typeIt = g_ReconstractedTypes.begin(); typeIt != g_ReconstractedTypes.end(); ++typeIt) {
        typeIt->second->SyncTypeInfo();
    }

    return;
}