From 45dd86e289d97676f58f2a890a2392adb097a1d4 Mon Sep 17 00:00:00 2001 From: DrFrugal Date: Sun, 28 Jan 2024 21:38:40 +0100 Subject: [PATCH] refactor to use std::optional instead of ParseResult or EvalResult big refactor for evaluating local variables etc tracking branches and loops FunctionInfo struct has a constructor for parsing from vector of strings --- GhidraParser/FunctionInfo.cpp | 452 ++++++++++++++++++++++++++++++++-- GhidraParser/FunctionInfo.hpp | 29 ++- GhidraParser/GhidraParser.cpp | 269 ++------------------ GhidraParser/Utility.cpp | 23 +- GhidraParser/Utility.hpp | 67 ++++- 5 files changed, 555 insertions(+), 285 deletions(-) diff --git a/GhidraParser/FunctionInfo.cpp b/GhidraParser/FunctionInfo.cpp index dfac6b5..dd0f1ed 100644 --- a/GhidraParser/FunctionInfo.cpp +++ b/GhidraParser/FunctionInfo.cpp @@ -1,11 +1,206 @@ #include "FunctionInfo.hpp" +#include "Utility.hpp" #include #include +#include #include +#include +#include +#include -std::smatch match; +std::regex decl_rgx(R"(^ (?:int|uint|BOOL) (\w+);)"); // currently only considering these types for declarations +std::regex ass_rgx(R"(^ +?(\w+) = (.+);)"); // OwO +std::regex if_rgx(R"(if \((.+)\) )"); +std::regex usg_rgx(R"(,"(Usage: [^;]+).?"[, \)])"); // .? is a workaround to prevent raw string from closing +std::regex usg_vld_rgx(R"(Usage: .+?\((.*?)\))"); +std::regex lua_is_rgx(R"(lua_is(.+?)\(L,(\w+?)\))"); +std::regex lua_to_rgx(R"(lua_to(.+?)\(L,(\w+?)[,\)])"); +std::regex lua_push_rgx(R"(lua_push(.+?)\(())"); +std::regex dowhl_cond_rgx(R"(while \((.+)\);)"); -const std::regex usg_vld_rgx(R"(Usage: .+?\((.*?)\))"); +std::optional prsi_lcls(std::string str, std::unordered_map& lcls) +{ // looks up local variables, before it tries to call prsi + if (lcls.count(str)) return lcls[str]; + return prsi(str); +} + +__forceinline std::optional slv_step(std::stack& vals, std::stack& ops) +{ + int v2 = vals.top(); + vals.pop(); + int v1 = vals.top(); + vals.pop(); + Op op = ops.top(); + ops.pop(); + switch (op) + { + case LOR: + return v1 || v2; + case LAND: + return v1 && v2; + case BOR: + return v1 | v2; + case BXOR: + return v1 ^ v2; + case BAND: + return v1 & v2; + case EQ: + return v1 == v2; + case UEQ: + return v1 != v2; + case LT: + return v1 < v2; + case LTE: + return v1 <= v2; + case BT: + return v1 > v2; + case BTE: + return v1 >= v2; + case PLS: + return v1 + v2; + case MIN: + return v1 - v2; + case MUL: + return v1 * v2; + case DIV: + if (v2 == 0) return std::nullopt; + return v1 / v2; + case MOD: + if (v2 == 0) return std::nullopt; + return v1 % v2; + } + return std::nullopt; // unsupported op +} + +__forceinline std::optional push_op(std::stack& vals, std::stack& ops, Op op) +{ + while (ops.size() >= 1 && op_prec(ops.top()) >= op_prec(op)) + { // last op has greater or same precedence + auto res = slv_step(vals, ops); + if (!res.has_value()) return std::nullopt; + vals.push(res.value()); + } + ops.push(op); +} + +std::regex lua_is_quick(R"(^lua_is\S+?\(L,\S+?\)$)"); +std::regex sstrcmpi_quick(R"(^SStrCmpI\(\S+?,\S+?,\S+?\)$)"); + +std::optional eval(std::string& infix, locals& lcls) +{ + std::smatch match; + if (std::regex_search(infix, match, lua_is_quick)) return 1; + if (std::regex_search(infix, match, sstrcmpi_quick)) return 1; + + if (infix.find_first_of(' ') == std::string::npos) + { // TODO performance escape hatch for simple infix strings - might actually be counter productive... have to check + return prsi_lcls(infix, lcls); + } + + std::stack vals; + std::stack ops; + int tk_start = 0; + std::string tk; + bool was_brr; + for (int tk_end = 0; tk_end < infix.length(); tk_end++) + { + was_brr = false; + switch (infix[tk_end]) + { + case ' ': + tk = infix.substr(tk_start, tk_end - tk_start); + tk_start = tk_end + 1; + break; + case '(': + tk = ""; + ops.push(Op::BRL); + tk_start = tk_end + 1; + continue; + case ')': + tk = infix.substr(tk_start, tk_end - tk_start); + tk_start = tk_end + 1; + was_brr = true; + break; + } + if (tk == "" && tk_end == infix.length() - 1) + { // last token + tk = infix.substr(tk_start, tk_end - tk_start + 1); + } + if (tk == "" && !was_brr) continue; // empty token, nothing to do + + if (tk == "||") push_op(vals, ops, LOR); + else if (tk == "&&") push_op(vals, ops, LAND); + else if (tk == "|") push_op(vals, ops, BOR); + else if (tk == "^") push_op(vals, ops, BXOR); + else if (tk == "&") push_op(vals, ops, BAND); + else if (tk == "==") push_op(vals, ops, EQ); + else if (tk == "!=") push_op(vals, ops, UEQ); + else if (tk == "<") push_op(vals, ops, LT); + else if (tk == "<=") push_op(vals, ops, LTE); + else if (tk == ">") push_op(vals, ops, BT); + else if (tk == ">=") push_op(vals, ops, BTE); + else if (tk == "+") push_op(vals, ops, PLS); + else if (tk == "-") push_op(vals, ops, MIN); + else if (tk == "*") push_op(vals, ops, MUL); + else if (tk == "/") push_op(vals, ops, DIV); + else if (tk == "%") push_op(vals, ops, MOD); + else if (tk != "") + { + auto pr = prsi_lcls(tk, lcls); + if (!pr.has_value()) return std::nullopt; + vals.push(pr.value()); + } + if (was_brr) + { + while (ops.top() != BRL) + { + auto res = slv_step(vals, ops); + if (!res.has_value()) return std::nullopt; + vals.push(res.value()); + } + ops.pop(); // popping left brace + } + tk = ""; + } + while (!ops.empty()) + { + auto res = slv_step(vals, ops); + if (!res.has_value()) return std::nullopt; + vals.push(res.value()); + } + if (vals.size() != 1 || !ops.empty()) return std::nullopt; + return vals.top(); +} + +bool FunctionInfo::prc_varmap_rgx(bool prc_in, const std::string& ln_in, std::regex& rgx, locals& lcls) +{ + std::string ln = ln_in; + bool fnd = false; + varmap& params = prc_in ? in : out; + while (std::regex_search(ln, match, rgx)) + { + fnd = true; + int lua_idx; + std::string lua_type; + lua_type = match[1]; + if (lua_type == "lstring") lua_type = "string"; // lstring is a string! + else if (lua_type == "fstring") lua_type = "string"; // frsting is a string! + if (&rgx == &lua_push_rgx) + { // push parsing uses a global index starting with 1 + lua_idx = out.size() + 1; + } + else lua_idx = prsi_lcls(match[2], lcls).value(); + if (lua_idx == -1) + { // -1 means there wasn't a literal used for accessing the index, so i can not parse it + if (&rgx == &lua_push_rgx) out_cnt = -1; + else in_cnt = -1; // TODO might be possible to parse when evaluating variables + return fnd; + } + params[lua_idx].push_back(lua_type); // always push lua type for now + ln = match.suffix(); + } + return fnd; +} void FunctionInfo::chk_vld() { // run all checks, so we have the full picture @@ -46,19 +241,10 @@ void FunctionInfo::chk_vld() prs_vld = vld; } -bool FunctionInfo::nil_in_varmap(bool proc_in) const +void FunctionInfo::cln_varmap(bool prc_in) { - const varmap& params = proc_in ? in : out; - for (auto& [key, value] : params) - if (std::find(value.begin(), value.end(), "nil") != value.end()) - return true; - return false; -} - -void FunctionInfo::cln_varmap(bool proc_in) -{ - varmap& params = proc_in ? in : out; - int cnt = proc_in ? in_cnt : out_cnt; + varmap& params = prc_in ? in : out; + int cnt = prc_in ? in_cnt : out_cnt; if (cnt == -1) { params.clear(); // dynamic varmap does not need entries @@ -69,7 +255,16 @@ void FunctionInfo::cln_varmap(bool proc_in) std::sort(lua_types.begin(), lua_types.end()); lua_types.erase(std::unique(lua_types.begin(), lua_types.end()), lua_types.end()); } - if (proc_in) in_cnt = in.size(); // input count can only be inferred by in varmap size + if (prc_in) in_cnt = in.size(); // input count can only be inferred by in varmap size +} + +bool FunctionInfo::nil_in_varmap(bool prc_in) const +{ + const varmap& params = prc_in ? in : out; + for (auto& [key, value] : params) + if (std::find(value.begin(), value.end(), "nil") != value.end()) + return true; + return false; } std::string FunctionInfo::str() const @@ -82,9 +277,9 @@ std::string FunctionInfo::str() const return str; } -std::string FunctionInfo::str_varmap(bool proc_in) const +std::string FunctionInfo::str_varmap(bool prc_in) const { - int cnt = proc_in ? in_cnt : out_cnt; + int cnt = prc_in ? in_cnt : out_cnt; switch (cnt) { case -1: @@ -92,7 +287,7 @@ std::string FunctionInfo::str_varmap(bool proc_in) const case 0: return "0 ()"; } - const varmap& params = proc_in ? in : out; + const varmap& params = prc_in ? in : out; if (params.size() > 0) { // cnt and params.size() might differ - lua_push* calls can be undetected std::string str = std::to_string(cnt) + " ("; @@ -108,3 +303,224 @@ std::string FunctionInfo::str_varmap(bool proc_in) const } return std::to_string(cnt) + " ()"; } + +FunctionInfo::FunctionInfo() +{ +} + +FunctionInfo::FunctionInfo(std::vector src) +{ + addr = std::stoi(&src[0][13], 0, 16); // no use of prsi - values occur once -> no caching wanted + nm = src[1].substr(13, src[1].find_first_of('(') - 13); + + std::string ind; // indentation - keeping track of current block level + std::string lp_utl = ""; // currently in a loop until this line is reached + std::string skp_ass_utl = ""; + std::string skp_push_utl = ""; // skip push parsing until this ln has been reached + std::string cond; + std::optional er; // eval result + bool is_if_ln; + bool prc_decl = true; // process variable declarations + bool enc_usg = false; // encountered usg + uint ret_val; // return value + std::string infix; + lp_track lp_track; // keeps track where the loop started + std::unordered_map push_track; // keeps track if on this indent level a push has happened + locals lcls; // local variables + for (int idx = 3; idx < src.size(); idx++) // skip right to the lines which matter + { + std::string& ln = src[idx]; + // reset line tracking variables + is_if_ln = false; + + if (prc_decl) + { // if local variable definition parsing is enabled + if (ln == " ") + { // reached end variable definition block, no further processing required + prc_decl = false; + continue; + } + if (std::regex_search(ln, match, decl_rgx)) lcls[match[1]] = 0; // track with init value 0 + continue; // no need to do further processing + } + int i = 0; + while (i < ln.length()) + { + if (ln[i] != ' ') + { + ind = ln.substr(0, i); + break; + } + i++; + } + switch (ln[i]) + { + case '}': + if (ln == ind + '}') + { // block end, maybe simple while or for loop end + if (lp_track.find(ind) != lp_track.end()) + lp_track.erase(ind); // reached loop end + break; + } + if (std::regex_search(ln, match, dowhl_cond_rgx)) + { + lp_track[ind].iter--; + if (lp_track[ind].iter < 0) + { // max iterations exceeded + prs_msg.push_back(std::format("max iterations exceeded in: {}", idx + 1)); + } + else + { + infix = match[1]; + auto er = eval(infix, lcls); + if (er.has_value() && er.value() != 0) + { // condition was parsable and evaluated to true + + idx = lp_track[ind].idx; + continue; + } + } + if (lp_track.find(ind) != lp_track.end()) lp_track.erase(ind); + break; + } + break; + case 'c': + case 'd': + if (lnsw(ln, i, "do {")) + { // setting loop end, if not already in a loop + lp_track[ind] = { idx + (lnew(ln, "{")) - 1, MAX_ITER}; + break; + } + if (lnsw(ln, i, "case ") || lnsw(ln, i, "default:")) + { + if (push_track[ind]) ssie(skp_push_utl, ind + "}"); + else push_track[ind] = false; // only execute if a previous case didn't already contain a push + break; + } + break; + case 'f': + if (lnsw(ln, i, "for (")) + { + lp_track[ind] = { idx + (lnew(ln, "{")) - 1, MAX_ITER}; + break; + } + break; + case 'i': // check for if + if (lnsw(ln, i, "if ")) + { // enough characters left to be if statement + is_if_ln = true; + break; + } + break; + case 'e': + if (lnsw(ln, i, "else ")) + { // enough characters left to be if statement + if (push_track[ind]) + { + ssie(skp_ass_utl, lnew(ln, "{") ? ind + "}" : "\1"); + ssie(skp_push_utl, lnew(ln, "{") ? ind + "}" : "\1"); + } + else if (lnsw(ln, i + 5, "if ")) is_if_ln = true; + break; + } + break; + case 'g': + if (!out.empty() && lnsw(ln, i, "goto ")) + { + ssie(skp_push_utl, std::string(ln.substr(i + 5, ln.length() - i - 6)) + ":"); + break; + } + break; + case 'r': // check for return + if (out_cnt == -1) break; // this function has a dynamic number of outputs, no need for further processing + if (lnsw(ln, i, "return ")) + { // enough characters left to be the simplest return + cond = std::string(&ln[i + 7], ln.length() - i - 8); + er = eval(cond, lcls); + if (er.has_value()) ret_val = er.value(); + else ret_val = -1; + // check if already encountered return value (except 0) matches; dynamic if not + if (ret_val != 0) + { // TODO probably need to check if i am currently skipping push + out_cnt = (out_cnt == 0 || out_cnt == ret_val) ? ret_val : -1; + if (ret_val == out.size()) + { // found a return statement and return value matches output param count + ssie(skp_push_utl, "\1skip2end"); + } + } + break; + } + break; + case 'w': + if (lnsw(ln, i, "while (")) + { // setting loop end, if not already in a loop + lp_track[ind] = { idx + (lnew(ln, "{")) - 1, MAX_ITER}; + break; + } + break; + } + + if (is_if_ln) + { + std::regex_search(ln, match, if_rgx); + cond = match[1]; + er = eval(cond, lcls); + if(!er.has_value() || er.value() != 0) push_track[ind] = true; + else + { + push_track[ind] = false; + skp_ass_utl = lnew(ln, "{") ? ind + "}" : "\1"; + skp_push_utl = lnew(ln, "{") ? ind + "}" : "\1"; + } + } + + if (skp_ass_utl.empty() && std::regex_search(ln, match, ass_rgx) && lcls.count(match[1])) // only process assignment for variables i still care about + { // assignment regex matched and local variable with the name is tracked + infix = match[2]; + auto er = eval(infix, lcls); + if (er.has_value()) lcls[match[1]] = er.value(); + else lcls.erase(match[1]); + } + + if (!enc_usg) + { // usage string can only occur once anyway + //if (auto match = ctre::match(ln)) + if (std::regex_search(ln, match, usg_rgx)) + { // found usg string + usg = match[1]; + size_t found = -1; // so the first find uses 0 through the increment + while ((found = usg.find("\\\"", found + 1)) != std::string::npos) usg.replace(found, 2, "\""); + enc_usg = true; + ssie(skp_push_utl, "\1"); // sometimes lua_pushfstring is used before lua_error; do not consider this an output! + } + } + + if (in_cnt != -1) + { + prc_varmap_rgx(true, ln, lua_is_rgx, lcls); + prc_varmap_rgx(true, ln, lua_to_rgx, lcls); + } + if (out_cnt != -1 && skp_push_utl.empty() && prc_varmap_rgx(false, ln, lua_push_rgx, lcls)) + { + std::string lvl = is_if_ln ? ind : ind.substr(0, ind.length() - 2); // do not subtract 1 block level, if this was a simple if ln + while (lvl >= " ") + { // if a push was found, track it for this and all lower block levels + push_track[lvl] = true; + lvl = lvl.substr(0, lvl.length() - 2); + } + } + + if (!lp_utl.empty() && ln.starts_with(lp_utl)) lp_utl = ""; + if (lp_utl == "\1") lp_utl = ""; + if (!skp_ass_utl.empty() && ln.starts_with(skp_ass_utl)) skp_ass_utl = ""; + if (skp_ass_utl == "\1") skp_ass_utl = ""; + if (!skp_push_utl.empty() && ln.starts_with(skp_push_utl)) skp_push_utl = ""; // reset skip since ln has been reached now + if (skp_push_utl == "\1") skp_push_utl = ""; // reset temporary skip which was used for 1 ln + + } + + cln_varmap(true); // clean input varmap + cln_varmap(false); // clean output varmap + chk_vld(); // check validity of parsed data + int i = 0; +} diff --git a/GhidraParser/FunctionInfo.hpp b/GhidraParser/FunctionInfo.hpp index a98a529..ddc42b8 100644 --- a/GhidraParser/FunctionInfo.hpp +++ b/GhidraParser/FunctionInfo.hpp @@ -1,13 +1,29 @@ #pragma once +#include "Utility.hpp" #include +#include +#include #include +#include #include +const int MAX_ITER = 256; + +struct LoopInfo +{ + int idx; + int iter; +}; + typedef unsigned int uint; +typedef std::unordered_map lp_track; +typedef std::unordered_map locals; typedef std::map> varmap; struct FunctionInfo { + std::smatch match; + uint addr = 0; std::string nm = ""; std::string usg = ""; @@ -17,9 +33,16 @@ struct FunctionInfo int out_cnt = 0; std::vector prs_msg; bool prs_vld = false; + void chk_vld(); - void cln_varmap(bool proc_in); + void cln_varmap(bool prc_in); + bool nil_in_varmap(bool prc_in) const; + bool prc_varmap_rgx(bool prc_in, const std::string& ln_in, std::regex& rgx, locals& lcls); std::string str() const; - std::string str_varmap(bool proc_in) const; - bool nil_in_varmap(bool proc_in) const; + std::string str_varmap(bool prc_in) const; + + FunctionInfo(); + FunctionInfo(std::vector src); }; + +std::optional eval(std::string& infix, locals& lcls); diff --git a/GhidraParser/GhidraParser.cpp b/GhidraParser/GhidraParser.cpp index ca03eec..a60bb91 100644 --- a/GhidraParser/GhidraParser.cpp +++ b/GhidraParser/GhidraParser.cpp @@ -1,18 +1,13 @@ #include "FunctionInfo.hpp" #include "Utility.hpp" -#include #include -#include #include #include -#include #include #include -#include #include -#include -std::string fl_path; + bool dbg; bool ivld_only; bool use_all = true; @@ -25,54 +20,12 @@ bool use_all = true; * 123 ivld */ - -#define PL(msg) std::cout << msg << '\n' // print line #define PDBG(msg) if(!dbg) PL(msg) // print line if debug flag is set -std::smatch match1; // TODO remove later - -std::regex usage_regex(R"(,"(Usage: [^;]+).?"[, \)])"); // .? is just a workaround, since )" in the raw string would terminate it immediately -std::regex lua_push_regex(R"(lua_push(.+?)\(())"); -std::regex lua_is_regex(R"(lua_is(.+?)\(L,(\w+?)\))"); -std::regex lua_to_regex(R"(lua_to(.+?)\(L,(\w+?)[,\)])"); -std::regex var_decl_regex(R"(^ (?:int|uint|BOOL) (\w+);)"); // currently only considering these types for declarations -std::regex var_ass_regex(R"(^ +?(\w+?) = (\w+) ?(.)? ?(\w+?){0,1};)"); - -FunctionInfo fi; -int lua_index; -std::string lua_type; -int out_index; std::unordered_map fmap; std::chrono::steady_clock::time_point tstart; std::chrono::steady_clock::time_point tend; -bool process_varmap_regex(std::map>& varmap, std::string ln, std::regex* regex) -{ - bool found = false; - while (std::regex_search(ln, match1, *regex)) - { - found = true; - lua_type = match1[1]; - if (lua_type == "lstring") lua_type = "string"; // lstring is a string! - else if (lua_type == "fstring") lua_type = "string"; // frsting is a string! - if (regex == &lua_push_regex) - { // push parsing uses a global index starting with 1 - lua_index = out_index; - out_index++; - } - else lua_index = prsi(match1[2]).val; - if (lua_index == -1) - { // -1 means there wasn't a literal used for accessing the index, so i can not parse it - if (regex == &lua_push_regex) fi.out_cnt = -1; - else fi.in_cnt = -1; // TODO might be possible to parse when evaluating variables - return found; - } - varmap[lua_index].push_back(lua_type); // always push lua type for now - ln = match1.suffix(); - } - return found; -} - void pfmap(bool ivld_only) { for (const auto& [name, fi] : fmap) @@ -116,8 +69,9 @@ int main() * skip scanning for output params, if unparsable index (not into int) has been found for lua_push* (dynamic outputs) * skip scanning for output params, if returns with different non-0 values have been found (or it can not be parsed as int) */ - - tstart = std::chrono::high_resolution_clock::now(); + + std::string fl_path; + if (use_all) { dbg = true; @@ -131,219 +85,36 @@ int main() fl_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.test.txt)"; } - - std::ifstream source_file(fl_path); if (!source_file.is_open()) { std::cerr << "Unable to open file " << fl_path << std::endl; return 1; } - std::unordered_map func_vars; - std::string skp_push_utl = ""; // skip push parsing until this ln has been reached - std::string lp_utl = ""; - std::unordered_map push_track; - - bool in_func = false; - bool enc_usg = false; // encountered usg - bool prc_def = false; - uint ret_val; // return value - bool is_if_ln; - std::string ind; // indent + tstart = std::chrono::high_resolution_clock::now(); + bool rec_src = false; // record source + std::vector src; std::string ln; - - std::vector::iterator eol; + int cnt = 0; while (std::getline(source_file, ln)) { if (ln.empty()) continue; // skip empty lines if (ln.starts_with("/*")) continue; // skip block comment lines - if (!in_func) - { // searching for next function - if (ln.starts_with("// ADDRESS - ")) - { // found addr ln - create new FunctionInfo and reset variables - fi = {}; - fi.addr = std::stoi(&ln[13], 0, 16); // do not use prsi, these values prb occur only once, so no caching wanted - out_index = 0; - enc_usg = false; - lp_utl = ""; - skp_push_utl = ""; - push_track = {}; - func_vars = {}; - continue; - } - if (ln.starts_with("uint lua_wow_")) - { // found ln with function signature - fi.nm = ln.substr(13, ln.find_first_of("(") - 13); // get part of real function name - in_func = true; // start function parsing from now on - continue; - } + if (ln.starts_with("// ADDRESS - ")) + { + rec_src = true; // found starting line of function soure } - else - { // processing current function - is_if_ln = false; - if (ln == "{") - { // this is the starting block of the function - prc_def = true; // enable regex handling for local variable definitions - continue; - } - if (ln == "}") - { // end of function found - in_func = false; - fi.cln_varmap(true); - fi.cln_varmap(false); - fi.chk_vld(); - fmap[fi.nm] = fi; - continue; - } - if (prc_def) - { // if local variable definition parsing is enabled - if (ln == " ") - { // reached end variable definition block, no further processing required - prc_def = false; - continue; - } - if (std::regex_search(ln, match1, var_decl_regex)) - { // found local variable, track it with init value 0 - func_vars[match1[1]] = 0; - } - - } - int i = 0; - while (i < ln.length()) - { - if (ln[i] != ' ') - { - ind = ln.substr(0, i); - break; - } - i++; - } - switch (ln[i]) - { - case 'c': - case 'd': - if (lnsw(ln, i, "do {")) - { // setting loop end, if not already in a loop - ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln do-while loop (who would even program such a thing!? - break; - } - if (lnsw(ln, i, "case ") || lnsw(ln, i, "default:")) - { - if (push_track[ind]) ssie(skp_push_utl, ind + "}"); - else push_track[ind] = false; // only execute if a previous case didn't already contain a push - break; - } - break; - case 'f': - if (lnsw(ln, i, "for (")) - { - ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln for loop - break; - } - break; - case 'i': // check for if - if (lnsw(ln, i, "if ")) - { // enough characters left to be if statement - is_if_ln = true; - push_track[ind] = false; - break; - } - break; - case 'e': - if (lnsw(ln, i, "else ")) - { // enough characters left to be if statement - if (push_track[ind]) ssie(skp_push_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); - break; - } - break; - case 'g': - if (!fi.out.empty() && lnsw(ln, i, "goto ")) - { - ssie(skp_push_utl, std::string(ln.substr(i + 5, ln.length() - i - 6)) + ":"); - break; - } - break; - case 'r': // check for return - if (fi.out_cnt == -1) break; // this function has a dynamic number of outputs, no need for further processing - if (lnsw(ln, i, "return ")) - { // enough characters left to be the simplest return - ret_val = prsi(std::string(&ln[i + 7], ln.length() - i - 8)).val; - // check if already encountered return value (except 0) matches; dynamic if not - if (ret_val != 0) - { // TODO probably need to check if i am currently skipping push - fi.out_cnt = (fi.out_cnt == 0 || fi.out_cnt == ret_val) ? ret_val : -1; - if (ret_val == fi.out.size()) - { // found a return statement and return value matches output param count - ssie(skp_push_utl, "\1skip2end"); - } - } - break; - } - break; - case 'w': - if (lnsw(ln, i, "while (")) - { // setting loop end, if not already in a loop - ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln while loop - break; - } - break; - } - //pdbg(ln); - //pdbg(ind + "<-"); - //if (auto match = ctre::match(ln)) - - if (std::regex_search(ln, match1, var_ass_regex)) - { - - int i = 0; - //auto test = match.get<1>(); - //pdbg("variable assignment happens for " + match.get<1>().to_string()); - } - - if (!enc_usg) - { // usage string can only occur once anyway - //if (auto match = ctre::match(ln)) - if (std::regex_search(ln, match1, usage_regex)) - { // found usg string - fi.usg = match1[1]; - size_t found = -1; // so the first find uses 0 through the increment - while ((found = fi.usg.find("\\\"", found + 1)) != std::string::npos) fi.usg.replace(found, 2, "\""); - enc_usg = true; - skp_push_utl = !skp_push_utl.empty() ? skp_push_utl : "\1"; // sometimes lua_pushfstring is used before lua_error; do not consider this an output! - } - } - - if (fi.in_cnt != -1) - { - process_varmap_regex(fi.in, ln, &lua_is_regex); - process_varmap_regex(fi.in, ln, &lua_to_regex); - } - if (fi.out_cnt != -1 && skp_push_utl.empty() && process_varmap_regex(fi.out, ln, &lua_push_regex)) - { - if (!lp_utl.empty()) - { // found lua_push* inside a loop - fi.out_cnt = -1; - } - else - { // normal processing - - std::string lvl = is_if_ln ? ind : ind.substr(0, ind.length() - 2); // do not subtract 1 block level, if this was a simple if ln - while (lvl >= " ") - { // if a push was found, track it for this and all lower block levels - push_track[lvl] = true; - lvl = lvl.substr(0, lvl.length() - 2); - } - } - - } - - if (!lp_utl.empty() && ln.starts_with(lp_utl)) lp_utl = ""; - if (lp_utl == "\1") lp_utl = ""; - if (!skp_push_utl.empty() && ln.starts_with(skp_push_utl)) skp_push_utl = ""; // reset skip since ln has been reached now - if (skp_push_utl == "\1") skp_push_utl = ""; // reset temporary skip which was used for 1 ln + if (rec_src) src.push_back(ln); + if (ln == "}") + { // end of function code reached + auto fi = FunctionInfo(src); + fmap[fi.nm] = fi; // there is no handling for duplicate entries bc of performance - there shoulnd't be any anyway + //PL(fi.nm); + cnt++; + src.clear(); + rec_src = false; } - } tend = std::chrono::high_resolution_clock::now(); diff --git a/GhidraParser/Utility.cpp b/GhidraParser/Utility.cpp index a49bb44..23da081 100644 --- a/GhidraParser/Utility.cpp +++ b/GhidraParser/Utility.cpp @@ -6,26 +6,26 @@ #include #include -std::unordered_map prs_cache -{ - { "FALSE", { true, 0 } }, - { "TRUE", { true, 1 } }, +std::unordered_map> prs_cache +{ // initialize with constants + { "FALSE", 0 }, + { "TRUE", 1 }, }; -ParseResult prsi(std::string str) +std::optional prsi(std::string str) { auto found = prs_cache.find(str); if (found != prs_cache.end()) return found->second; // cache hit try { // cache miss, trying to parse and caching the result int v; - if (str.starts_with("0x")) v = std::stoi(str.data(), 0, 16); // hex parsing - else v = std::stoi(str.data()); - prs_cache[str] = { true, v }; + if (str.starts_with("0x")) v = std::stoul(str.data(), 0, 16); // hex parsing + else v = std::stoul(str.data()); + prs_cache[str] = v; } catch (std::invalid_argument e) { // was not parsable, cache negative parse result - prs_cache[str] = { false, -1 }; // value doesn't really matter here + prs_cache[str] = std::nullopt; // value doesn't matter } return prs_cache[str]; } @@ -49,6 +49,11 @@ std::vector* lf(const char* path) return vec; } +bool lnew(std::string& ln, const char* ew) +{ + return ln.ends_with(ew); +} + bool lnsw(std::string& ln, int idx, const char* sw) { int len = strlen(sw); diff --git a/GhidraParser/Utility.hpp b/GhidraParser/Utility.hpp index 50e5726..b40b260 100644 --- a/GhidraParser/Utility.hpp +++ b/GhidraParser/Utility.hpp @@ -1,16 +1,71 @@ #pragma once +#include +#include #include #include -struct ParseResult -{ - bool prsbl; // was parsable - int val; // value which it was parsed into +#define PL(msg) std::cout << msg << '\n' // print line + +enum Op +{ // https://en.cppreference.com/w/cpp/language/operator_precedence + BRL, + BRR, + LOR, + LAND, + BOR, + BXOR, + BAND, + EQ, + UEQ, + LT, + LTE, + BT, + BTE, + PLS, + MIN, + MUL, + DIV, + MOD, }; -ParseResult prsi(std::string str); +__forceinline int op_prec(Op op) +{ + switch (op) + { + case BRL: + return -9000; + case LOR: + return -15; + case LAND: + return -14; + case BOR: + return -13; + case BXOR: + return -12; + case BAND: + return -11; + case EQ: + case UEQ: + return -10; + case LT: + case LTE: + case BT: + case BTE: + return -9; + case PLS: + case MIN: + return -6; + case MUL: + case DIV: + case MOD: + return -5; + } + return -1; // unsupporeted op +} + +std::optional prsi(std::string str); std::vector* lf(std::string& path); std::vector* lf(const char* path); +bool lnew(std::string& ln, const char* ew); bool lnsw(std::string& ln, int idx, const char* sw); void ssie(std::string& str, std::string repl); -void pdbg(std::string& msg); \ No newline at end of file