From a0ff3fde4d62a2008dc391a8f1a3ff0cdf94016a Mon Sep 17 00:00:00 2001 From: DrFrugal Date: Tue, 30 Jan 2024 08:37:42 +0100 Subject: [PATCH] improving performance by refactoring: -> declaration lookup (it's own loop at the start) -> indent check (starting at index 2, jumpting by 2 steps, since block levels increase by 2 spaces each time) -> removed function start and end bracket from src vector --- GhidraParser/FunctionInfo.cpp | 68 ++++++++++++++++++----------------- GhidraParser/GhidraParser.cpp | 11 +++--- 2 files changed, 42 insertions(+), 37 deletions(-) diff --git a/GhidraParser/FunctionInfo.cpp b/GhidraParser/FunctionInfo.cpp index dd0f1ed..cedeec6 100644 --- a/GhidraParser/FunctionInfo.cpp +++ b/GhidraParser/FunctionInfo.cpp @@ -4,11 +4,12 @@ #include #include #include +#include #include #include #include -std::regex decl_rgx(R"(^ (?:int|uint|BOOL) (\w+);)"); // currently only considering these types for declarations +std::regex decl_rgx(R"(^ (\S+) (\S+)(?: \[)?(\d+?)?\]?;$)"); // capturing type, name and array size std::regex ass_rgx(R"(^ +?(\w+) = (.+);)"); // OwO std::regex if_rgx(R"(if \((.+)\) )"); std::regex usg_rgx(R"(,"(Usage: [^;]+).?"[, \)])"); // .? is a workaround to prevent raw string from closing @@ -175,8 +176,8 @@ std::optional eval(std::string& infix, locals& lcls) bool FunctionInfo::prc_varmap_rgx(bool prc_in, const std::string& ln_in, std::regex& rgx, locals& lcls) { std::string ln = ln_in; - bool fnd = false; varmap& params = prc_in ? in : out; + bool fnd = false; while (std::regex_search(ln, match, rgx)) { fnd = true; @@ -313,6 +314,24 @@ FunctionInfo::FunctionInfo(std::vector src) addr = std::stoi(&src[0][13], 0, 16); // no use of prsi - values occur once -> no caching wanted nm = src[1].substr(13, src[1].find_first_of('(') - 13); + locals lcls; // local variables + std::set track_types = { "BOOL", "int", "uint", "undefined4"}; // only track locals of these types + std::string type; + int idx = 2; + if (!src[idx].starts_with(" return ")) + { // process local variable declarations + for (; idx < src.size(); idx++) + { + std::string& ln = src[idx]; + if (std::regex_search(ln, match, decl_rgx)) + { + if (!track_types.count(match[1])) continue; // not interested in type + lcls[match[2]] = 0; // track with init value 0 + } + else break; // found no more declaration + } + } + std::string ind; // indentation - keeping track of current block level std::string lp_utl = ""; // currently in a loop until this line is reached std::string skp_ass_utl = ""; @@ -320,47 +339,34 @@ FunctionInfo::FunctionInfo(std::vector src) std::string cond; std::optional er; // eval result bool is_if_ln; - bool prc_decl = true; // process variable declarations bool enc_usg = false; // encountered usg uint ret_val; // return value std::string infix; lp_track lp_track; // keeps track where the loop started std::unordered_map push_track; // keeps track if on this indent level a push has happened - locals lcls; // local variables - for (int idx = 3; idx < src.size(); idx++) // skip right to the lines which matter + for (; idx < src.size(); idx++) // skip right to the lines which matter { std::string& ln = src[idx]; - // reset line tracking variables is_if_ln = false; - if (prc_decl) - { // if local variable definition parsing is enabled - if (ln == " ") - { // reached end variable definition block, no further processing required - prc_decl = false; - continue; - } - if (std::regex_search(ln, match, decl_rgx)) lcls[match[1]] = 0; // track with init value 0 - continue; // no need to do further processing - } - int i = 0; + int i = 2; while (i < ln.length()) { - if (ln[i] != ' ') - { - ind = ln.substr(0, i); - break; - } - i++; + if (ln[i] != ' ') break; + i += 2; } + ind = ln.substr(0, i); + switch (ln[i]) { case '}': - if (ln == ind + '}') - { // block end, maybe simple while or for loop end - if (lp_track.find(ind) != lp_track.end()) - lp_track.erase(ind); // reached loop end - break; + if (lp_track.find(ind) != lp_track.end()) + { // closing bracket of a loop + lp_track[ind].iter--; + if (lp_track[ind].iter < 0) + { // max iterations exceeded + prs_msg.push_back(std::format("max iterations exceeded in: {}", idx + 1)); + } } if (std::regex_search(ln, match, dowhl_cond_rgx)) { @@ -516,11 +522,9 @@ FunctionInfo::FunctionInfo(std::vector src) if (skp_ass_utl == "\1") skp_ass_utl = ""; if (!skp_push_utl.empty() && ln.starts_with(skp_push_utl)) skp_push_utl = ""; // reset skip since ln has been reached now if (skp_push_utl == "\1") skp_push_utl = ""; // reset temporary skip which was used for 1 ln - } - cln_varmap(true); // clean input varmap - cln_varmap(false); // clean output varmap + cln_varmap(true); // clean inputs + cln_varmap(false); // clean outputs chk_vld(); // check validity of parsed data - int i = 0; } diff --git a/GhidraParser/GhidraParser.cpp b/GhidraParser/GhidraParser.cpp index a60bb91..2db9b7a 100644 --- a/GhidraParser/GhidraParser.cpp +++ b/GhidraParser/GhidraParser.cpp @@ -101,11 +101,10 @@ int main() { if (ln.empty()) continue; // skip empty lines if (ln.starts_with("/*")) continue; // skip block comment lines - if (ln.starts_with("// ADDRESS - ")) - { - rec_src = true; // found starting line of function soure - } - if (rec_src) src.push_back(ln); + if (ln == "{") continue; // skip function opening bracked + + if (ln.starts_with("// ADDRESS - ")) rec_src = true; // found starting line of function soure + if (ln == "}") { // end of function code reached auto fi = FunctionInfo(src); @@ -114,7 +113,9 @@ int main() cnt++; src.clear(); rec_src = false; + continue; } + if (rec_src) src.push_back(ln); } tend = std::chrono::high_resolution_clock::now();