REhints/HexRaysCodeXplorer

View on GitHub
src/HexRaysCodeXplorer/CtreeExtractor.cpp

Summary

Maintainability
Test Coverage
/*    Copyright (c) 2013-2020
    REhints <info@rehints.com>
    All rights reserved.
    
    ==============================================================================
    
    This file is part of HexRaysCodeXplorer

     HexRaysCodeXplorer is free software: you can redistribute it and/or modify it
     under the terms of the GNU General Public License as published by
     the Free Software Foundation, either version 3 of the License, or
     (at your option) any later version.

     This program is distributed in the hope that it will be useful, but
     WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     General Public License for more details.

     You should have received a copy of the GNU General Public License
     along with this program.  If not, see
     <http://www.gnu.org/licenses/>.

    ==============================================================================
*/

#include "Common.h"
#include "TypeReconstructor.h"
#include "CtreeExtractor.h"
#include "Utility.h"
#include "Debug.h"

#if defined (__LINUX__) || defined (__MAC__)
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif

#define MIN_HEURISTIC_FUNC_SIZE_DUMP 0x160
#define MIN_FUNC_SIZE_DUMP 0x60

#define N_FUNCS_TO_DUMP 40
#define N_HEUR_FUNCS_TO_DUMP 60
#define N_CRYPTO_FUNCS_TO_DUMP 30

#define MAX_FUNC_DEPTH 100


bool idaapi ctree_dumper_t::filter_citem(citem_t *item) {
    if (item->is_expr()) {
        auto expr = static_cast<cexpr_t*>(item);
        
        if (item->op == cot_cast)
            return true;
        else if (item->op == cot_helper)
            return true;
        else if ((item->op >= cot_postinc) && (item->op <= cot_predec)) 
            return true;
        else if ((item->op >= cot_idx) && ((item->op <= cot_last)))
            return true;
    } else {
        if (item->op == cit_expr)
            return true;
    }

    return false;
}

void ctree_dumper_t::process_for_hash(citem_t *item)
{
    if (!filter_citem(item)) {
        const char* ctype_name = get_ctype_name(item->op);
        ctree_for_hash.cat_sprnt("%s:", ctype_name);
    }
}

// Process a ctree item
int ctree_dumper_t::process(citem_t *item)
{
    size_t parent_count = parents.size();
    if (parent_count > 1) {
        ctree_dump += "(";
    }

    qstring buf;
    parse_ctree_item(item, buf);
    ctree_dump += buf;

    process_for_hash(item);
    return 0;
}

int ctree_dumper_t::process_leave(citem_t *item)
{
    size_t parent_count = parents.size();
    if (parent_count > 1) {
        ctree_dump += ")";
    }
    return 0;
}

void ctree_dumper_t::parse_ctree_item(citem_t *item, qstring& rv) const
{
    rv.clear();
    // Each node will have the element type at the first line
    if (const auto v = get_ctype_name(item->op))
        rv = v;

    const auto e = static_cast<const cexpr_t*>(item);
    const auto i = static_cast<const cinsn_t*>(item);

    // For some item types, display additional information
    qstring func_name;
    qstring s;
    switch (item->op)
    {
    case cot_call:
        if (e->x->op == cot_obj) {
            if (get_func_name(&func_name, e->x->obj_ea) == 0)
                rv.cat_sprnt(" sub_%a", e->x->obj_ea);
            else 
                rv.cat_sprnt(" %s", func_name.c_str());
        }
        break;
    case cot_ptr: // *x
    case cot_memptr: // x->m
        // Display access size for pointers
        rv.cat_sprnt(".%d", e->ptrsize);
        if (item->op == cot_ptr)
            break;
    case cot_memref: // x.m
        // Display member offset for structure fields
        rv.cat_sprnt(" (m=%d)", e->m);
        break;
    case cot_obj: // v
    case cot_var: // l
        // Display object size for local variables and global data
        rv.cat_sprnt(".%d", e->refwidth);
    case cot_num: // n
    case cot_helper: // arbitrary name
    case cot_str: // string constant
        // Display helper names and number values
        rv.append(' ');
        {
            qstring qbuf;
            print1wrapper(e, &qbuf, nullptr);
            tag_remove(&qbuf);
            rv += qbuf;
        }
        break;
    case cit_goto:
        // Display target label number for gotos
        rv.cat_sprnt(" LABEL_%d", i->cgoto->label_num);
        break;
    case cit_asm:
        // Display instruction block address and size for asm-statements
        rv.cat_sprnt(" %a.%" FMT_Z, *i->casm->begin(), i->casm->size());
        break;
    default:
        break;
    }

    // The second line of the node contains the item address
    rv.cat_sprnt(";ea->%a", item->ea);

    if ( item->is_expr() && !e->type.empty() )
    {
        // For typed expressions, the third line will have
        // the expression type in human readable form
        rv.append(';');
        qstring out;
        if (e->type.print(&out))
        {
            rv += out;
        }
        else
        {    // could not print the type?
            rv.append('?');
        }

        if(e->type.is_ptr())
        {
            const auto ptr_rem = ::remove_pointer(e->type);
            if(ptr_rem.is_struct())
            {
                qstring typenm;
                ptr_rem.print(&typenm, "prefix ", 0, 0, PRTYPE_MULTI | PRTYPE_TYPE | PRTYPE_SEMI);
            }
        }
    }

}

struct ctree_dump_line {
    qvector<ea_t> referres;
    qstring ctree_for_hash;
    qstring ctree_dump;
    qstring func_name;
    int func_depth{};
    ea_t func_start{};
    ea_t func_end{};
    bool heuristic_flag{};
};

struct ctree_dump_line_impl : ctree_dump_line
{
};


int create_open_file(const char* file_name) {
    auto file_id = qopen(file_name, O_BINARY | O_TRUNC | O_CREAT);
    if (file_id == BADADDR)
        file_id = qcreate(file_name, 511);

    return file_id;
}

int get_hash_of_string(const qstring &string_to_hash, qstring &hash) {
    SHA1Context sha;
    uint8_t message_digest[SHA1HashSize];

    auto err = SHA1Reset(&sha);
    if (err == shaSuccess) {
        err = SHA1Input(&sha, (uint8_t *)string_to_hash.c_str(), static_cast<unsigned>(string_to_hash.length()));
        if (err == shaSuccess) {
            err = SHA1Result(&sha, message_digest);
            if (err == shaSuccess) {
                char digest_hex[SHA1HashSize * 2 + 1];
                memset(digest_hex, 0x00, sizeof(digest_hex));
                SHA1MessageDigestToString(message_digest, digest_hex);

                hash = digest_hex;
            }
        }
    }

    return err;
}

void dump_ctrees_in_file(std::map<ea_t, ctree_dump_line> &data_to_dump, const qstring &crypto_prefix) {
    const auto file_id = create_open_file("ctrees.txt");
    if (file_id == -1)
    {
        logmsg(ERROR, "Failed to open file for dumping ctress\r\n");
        return;
    }

    size_t crypt_prefix_len = crypto_prefix.length();

    for (auto ctrees_iter = data_to_dump.begin(); ctrees_iter != data_to_dump.end(); ++ctrees_iter) {
        const auto& cdl = ctrees_iter->second;

        qstring sha_hash;
        auto err = get_hash_of_string(cdl.ctree_for_hash, sha_hash);
        if (err != shaSuccess) {
            logmsg(ERROR, "Error in computing SHA1 hash\r\n");
            continue;
        }

        auto dump_line = sha_hash + ";";
        err = get_hash_of_string(cdl.ctree_dump, sha_hash);
        if (err != shaSuccess) {
            logmsg(ERROR, "Error in computing SHA1 hash\r\n");
            continue;
        }
        dump_line += sha_hash + ";";
        dump_line += cdl.ctree_dump;
        dump_line.cat_sprnt(";%d", cdl.func_depth);
        dump_line.cat_sprnt(";%08X", cdl.func_start);
        dump_line.cat_sprnt(";%08X", cdl.func_end);
        if ((cdl.func_name.length() > crypt_prefix_len) && (crypt_prefix_len > 0) && (cdl.func_name.find(crypto_prefix) == 0))
            dump_line.cat_sprnt(";E");
        else
            dump_line.cat_sprnt(";N");

        if ((cdl.heuristic_flag))
            dump_line.cat_sprnt(";H");
        else
            dump_line.cat_sprnt(";N");

        dump_line += "\n";

        qwrite(file_id, dump_line.c_str(), dump_line.length());
    }

    qclose(file_id);
}


inline bool func_name_has_prefix(const qstring &prefix, const ea_t start_ea) {
    if (prefix.length() <= 0)
        return false;

    qstring func_name;
    if (get_func_name(&func_name, start_ea) <= 0)
        return false;

    if (func_name.empty())
        return false;

    return func_name.find(prefix.c_str(), 0) == 0;
}

bool idaapi dump_funcs_ctree(void *ud, const qstring &crypto_prefix)
{
    logmsg(DEBUG, "dump_funcs_ctree entered\n");

    std::map<ea_t, ctree_dump_line> data_to_dump;

    size_t count = 0, heur_count = 0, crypto_count = 0;
    size_t total_func_qty = get_func_qty();
    for (size_t i = 0 ; i < total_func_qty ; i ++) {
        auto heuristic_flag = false;

        func_t *function = getn_func(i);
        if (function != nullptr) {
            bool crypto_flag = func_name_has_prefix(crypto_prefix, function->start_ea);

            // skip libs that are not marked as crypto
            if ( ((function->flags & FUNC_LIB) != 0) && !crypto_flag )
                continue;

            // From this point on, we have a function outside of lib or a crypto one

            // Ignore functions less than MIN_FUNC_SIZE_DUMP bytes
            if ( ((function->end_ea - function->start_ea) < MIN_FUNC_SIZE_DUMP) && !crypto_flag )
                continue;

            // If function is bigger than MIN_HEURISTIC_FUNC_SIZE_DUMP, mark as being triggered by the heuristic
            if (function->end_ea - function->start_ea > MIN_HEURISTIC_FUNC_SIZE_DUMP)
                heuristic_flag = true;

            // dump up to N_CRYPTO_FUNCS_TO_DUMP crypto functions
            // dump up to N_HEUR_FUNCS_TO_DUMP heuristic functions
            // at least N_FUNCS_TO_DUMP functions will be dumped
            if ((count < N_FUNCS_TO_DUMP) || (crypto_flag && (crypto_count < N_CRYPTO_FUNCS_TO_DUMP)) || (heuristic_flag && (heur_count < N_HEUR_FUNCS_TO_DUMP))) {
                hexrays_failure_t hf;
                cfuncptr_t cfunc = decompile(function, &hf);

                logmsg(DEBUG, "\nafter decompile()\n");
                if (cfunc != nullptr) {
                    ctree_dumper_t ctree_dumper;
                    ctree_dumper.apply_to(&cfunc->body, nullptr);

                    ctree_dump_line func_dump;
                    func_dump.ctree_dump = ctree_dumper.ctree_dump;
                    func_dump.ctree_for_hash = ctree_dumper.ctree_for_hash;

                    func_dump.func_depth = -1;

                    func_dump.func_start = function->start_ea;
                    func_dump.func_end = function->end_ea;

                    qstring func_name;
                    if (get_func_name(&func_name, function->start_ea) != 0) {
                        if (func_name.length() > 0) {
                            func_dump.func_name = func_name;
                        }
                    }

                    func_parent_iterator_t fpi(function);
                    for (ea_t addr = get_first_cref_to(function->start_ea); addr != BADADDR; addr = get_next_cref_to(function->start_ea, addr)) {
                        func_t *referer = get_func(addr);
                        if (referer != nullptr) {
                            func_dump.referres.push_back(referer->start_ea);
                        }
                    }

                    func_dump.heuristic_flag = heuristic_flag; // 0 or 1 depending on code above
                    if (heuristic_flag)
                        heur_count++;

                    if (crypto_flag)
                        crypto_count++;

                    count++;

                    data_to_dump[function->start_ea] = func_dump;
                }
            }
        }
    }

    dump_ctrees_in_file(data_to_dump, crypto_prefix);

    return true;
}

bool idaapi extract_all_ctrees(void *ud)
{
    // default prefix to display in the dialog
    static const qstring kDefaultPrefix = "crypto_";

    va_list va;
    va_end(va);

    auto crypto_prefix = kDefaultPrefix;
    if (!ask_str(&crypto_prefix, 0, "Enter prefix of crypto function names", va))
        return false;

    if(!crypto_prefix.empty()) {
        dump_funcs_ctree(nullptr, crypto_prefix);
    } else {
        warning("Incorrect prefix!!");
    }

    return true;
}


// Ctree Item Form Init
struct func_ctree_info_t
{
    TWidget *widget;
    TWidget *cv;
    TWidget *codeview;
    strvec_t sv;
    explicit func_ctree_info_t(TWidget *f) : widget(f), cv(nullptr), codeview(nullptr){}
};


bool idaapi show_citem_custom_view(void *ud, const qstring& ctree_item, const qstring& item_name)
{
    qstring form_name = "Ctree Item View: ";
    form_name.append(item_name);
    const auto widget = create_empty_widget(form_name.c_str());
    auto si = new func_ctree_info_t(widget);

    istringstream s_citem_str(ctree_item.c_str());
    string tmp_str;
    while (getline(s_citem_str, tmp_str, ';'))
    {
        qstring tmp_qstr = tmp_str.c_str();
        si->sv.push_back(simpleline_t(tmp_qstr));
    }

    simpleline_place_t s1;
    simpleline_place_t s2(static_cast<int>(ctree_item.size()));
    si->cv = create_custom_viewer("", &s1, &s2, &s1, nullptr, &si->sv, nullptr, nullptr, widget);
    si->codeview = create_code_viewer(si->cv, CDVF_NOLINES, widget);
    set_custom_viewer_handlers(si->cv, nullptr, si);
    display_widget(widget, WOPN_RESTORE);

    return false;
}