diff --git a/GhidraParser/FunctionInfo.cpp b/GhidraParser/FunctionInfo.cpp new file mode 100644 index 0000000..dfac6b5 --- /dev/null +++ b/GhidraParser/FunctionInfo.cpp @@ -0,0 +1,110 @@ +#include "FunctionInfo.hpp" +#include +#include +#include + +std::smatch match; + +const std::regex usg_vld_rgx(R"(Usage: .+?\((.*?)\))"); + +void FunctionInfo::chk_vld() +{ // run all checks, so we have the full picture + bool vld = true; + if (prs_msg.size() > 0) vld = false; // parsing messages up until this point are hard errors + if (in_cnt > -1) + { // in varmap is not dynamic + if (!usg.empty()) + { // check against usage string + if (std::regex_search(usg, match, usg_vld_rgx)) + { + std::string usg_params = match[1]; + int comma_cnt = 0; + for (char c : usg_params) comma_cnt += c == ','; + if (in_cnt == 0 && !usg_params.empty() || in_cnt != comma_cnt + 1) + { + prs_msg.push_back(std::format("in param count ({}) does not match usage string ({})", in_cnt, !usg_params.empty() * (comma_cnt + 1))); + vld = false; + } + } + else prs_msg.push_back("usage string is malformed"); // do not consider this for validity + } + for (int i = 1; i <= in_cnt; i++) + { // lua indexes start with 1 + if (!in.contains(i)) + { + prs_msg.push_back("in param index not in order"); + vld = false; + break; + } + } + } + if (out_cnt != -1 && out_cnt != out.size()) + { + prs_msg.push_back(std::format("out param count ({}) does not match return value ({})", out.size(), out_cnt)); + vld = false; + } + prs_vld = vld; +} + +bool FunctionInfo::nil_in_varmap(bool proc_in) const +{ + const varmap& params = proc_in ? in : out; + for (auto& [key, value] : params) + if (std::find(value.begin(), value.end(), "nil") != value.end()) + return true; + return false; +} + +void FunctionInfo::cln_varmap(bool proc_in) +{ + varmap& params = proc_in ? in : out; + int cnt = proc_in ? in_cnt : out_cnt; + if (cnt == -1) + { + params.clear(); // dynamic varmap does not need entries + return; + } + for (auto& [lua_index, lua_types] : params) + { + std::sort(lua_types.begin(), lua_types.end()); + lua_types.erase(std::unique(lua_types.begin(), lua_types.end()), lua_types.end()); + } + if (proc_in) in_cnt = in.size(); // input count can only be inferred by in varmap size +} + +std::string FunctionInfo::str() const +{ + std::string str = std::format("{:X} {}{}\n", addr, nm, prs_vld ? "" : " (invalid)"); + if (!usg.empty()) str += " " + usg + "\n"; + str += " in: " + str_varmap(true) + "\n"; + str += " out: " + str_varmap(false); + for (const std::string& err : prs_msg) str += "\n " + err; + return str; +} + +std::string FunctionInfo::str_varmap(bool proc_in) const +{ + int cnt = proc_in ? in_cnt : out_cnt; + switch (cnt) + { + case -1: + return "dynamic"; + case 0: + return "0 ()"; + } + const varmap& params = proc_in ? in : out; + if (params.size() > 0) + { // cnt and params.size() might differ - lua_push* calls can be undetected + std::string str = std::to_string(cnt) + " ("; + for (const auto& [lua_index, lua_types] : params) + { + for (const auto& lua_type : lua_types) str += lua_type + "/"; + str.pop_back(); // remove last slash + str += ", "; + } + str[str.length() - 2] = ')'; // replace last comma with closing bracket + str.pop_back(); // remove space after last comma + return str; + } + return std::to_string(cnt) + " ()"; +} diff --git a/GhidraParser/FunctionInfo.hpp b/GhidraParser/FunctionInfo.hpp new file mode 100644 index 0000000..a98a529 --- /dev/null +++ b/GhidraParser/FunctionInfo.hpp @@ -0,0 +1,25 @@ +#pragma once +#include +#include +#include + +typedef unsigned int uint; +typedef std::map> varmap; + +struct FunctionInfo +{ + uint addr = 0; + std::string nm = ""; + std::string usg = ""; + varmap in{}; + int in_cnt = 0; + varmap out{}; + int out_cnt = 0; + std::vector prs_msg; + bool prs_vld = false; + void chk_vld(); + void cln_varmap(bool proc_in); + std::string str() const; + std::string str_varmap(bool proc_in) const; + bool nil_in_varmap(bool proc_in) const; +}; diff --git a/GhidraParser/GhidraParser.cpp b/GhidraParser/GhidraParser.cpp index 334573d..ca03eec 100644 --- a/GhidraParser/GhidraParser.cpp +++ b/GhidraParser/GhidraParser.cpp @@ -1,3 +1,5 @@ +#include "FunctionInfo.hpp" +#include "Utility.hpp" #include #include #include @@ -6,174 +8,96 @@ #include #include #include +#include #include #include -#include "GhidraParser.hpp" -std::string file_path; +std::string fl_path; bool dbg; -bool dbg_prnt_invld_only; -bool use_all = true; // 26 invalid parses +bool ivld_only; +bool use_all = true; +/* +* 907 cln +* 25 ivld +*/ +/* +* 1742 cln +* 123 ivld +*/ + + +#define PL(msg) std::cout << msg << '\n' // print line +#define PDBG(msg) if(!dbg) PL(msg) // print line if debug flag is set + +std::smatch match1; // TODO remove later + +std::regex usage_regex(R"(,"(Usage: [^;]+).?"[, \)])"); // .? is just a workaround, since )" in the raw string would terminate it immediately +std::regex lua_push_regex(R"(lua_push(.+?)\(())"); +std::regex lua_is_regex(R"(lua_is(.+?)\(L,(\w+?)\))"); +std::regex lua_to_regex(R"(lua_to(.+?)\(L,(\w+?)[,\)])"); +std::regex var_decl_regex(R"(^ (?:int|uint|BOOL) (\w+);)"); // currently only considering these types for declarations +std::regex var_ass_regex(R"(^ +?(\w+?) = (\w+) ?(.)? ?(\w+?){0,1};)"); FunctionInfo fi; -std::string func; -std::smatch match; -std::string lua_type; int lua_index; +std::string lua_type; int out_index; -std::regex lua_push_regex(R"(lua_push(.+?)\()"); -std::regex usage_validity_regex(R"(Usage: .+?\((.*?)\))"); - std::unordered_map fmap; +std::chrono::steady_clock::time_point tstart; +std::chrono::steady_clock::time_point tend; -std::string get_varmap_info(std::map> varmap) -{ - if (varmap.size() == 0) return "()"; - std::string str = "("; - for (auto &[key, values] : varmap) - { - for (auto &value : values) - { - str += value + "/"; - } - str = str.substr(0, str.length() - 1); - str += ", "; - } - str = str.substr(0, str.length() - 2); - str += ")"; - return str; -} - -int parse_int(std::string input) -{ - try - { - if (input.starts_with("0x")) return std::stoi(input, 0, 16); - return std::stoi(input); - } - catch (std::invalid_argument e) {} - return -1; // couldn't parse; -1 is used for dynamic return -} - -void pdbg(std::string msg) -{ - if (!dbg) return; - std::cout << msg << '\n'; -} - -void pfi(FunctionInfo& fi) -{ // print FunctionInfo - char endl = '\n'; - std::cout << std::format("{:X}", fi.address) << " " << fi.func << (fi.valid_parse ? "" : " (invalid)") << endl; - if (!fi.usg.empty()) std::cout << fi.usg << endl; - if (fi.param_in_cnt == -1) std::cout << "in: dynamic" << endl; - else std::cout << "in: " << (fi.param_in_cnt == 0 ? "0" : std::to_string(fi.param_in_cnt) + " " + get_varmap_info(fi.param_in)) << endl; - if (fi.param_out_cnt == -1) std::cout << "out: dynamic" << endl; - else std::cout << "out: " << (fi.param_out_cnt == 0 ? "0" : std::to_string(fi.param_out_cnt) + " " + get_varmap_info(fi.param_out)) << endl; - for (auto &error : fi.parse_errors) std::cout << error << endl; - std::cout << std::endl; -} - -bool process_varmap_regex(std::map> *varmap, std::string line, std::regex* regex) +bool process_varmap_regex(std::map>& varmap, std::string ln, std::regex* regex) { bool found = false; - while (std::regex_search(line, match, *regex)) + while (std::regex_search(ln, match1, *regex)) { found = true; - lua_type = match[1]; + lua_type = match1[1]; if (lua_type == "lstring") lua_type = "string"; // lstring is a string! - if (lua_type == "fstring") lua_type = "string"; // frsting is a string! + else if (lua_type == "fstring") lua_type = "string"; // frsting is a string! if (regex == &lua_push_regex) { // push parsing uses a global index starting with 1 - //if (lua_type == "nil") return false; // skipping lua_pushnil lua_index = out_index; out_index++; } - else lua_index = parse_int(match[2]); + else lua_index = prsi(match1[2]).val; if (lua_index == -1) { // -1 means there wasn't a literal used for accessing the index, so i can not parse it - if (regex == &lua_push_regex) fi.param_out_cnt = -1; - else fi.param_in_cnt = -1; + if (regex == &lua_push_regex) fi.out_cnt = -1; + else fi.in_cnt = -1; // TODO might be possible to parse when evaluating variables return found; } - (*varmap)[lua_index].push_back(lua_type); // always push lua type for now - //if (varmap->count(lua_index) == 0) (*varmap)[lua_index].push_back(lua_type); // new entry - //else if ((*varmap)[lua_index] != lua_type) - //{ // already got an entry, check if it has the same type - // fi.parse_errors.push_back("parameter type mismatch for index " + std::to_string(lua_index) + " - " + (*varmap)[lua_index] + " vs " + lua_type); - //} - line = match.suffix(); + varmap[lua_index].push_back(lua_type); // always push lua type for now + ln = match1.suffix(); } return found; } -void chk_vld(FunctionInfo* fi) -{ // check validity of parsed info - default value is false, so i simply return if invalid - - bool valid = true; - if (fi->parse_errors.size() > 0) valid = false; // any parsing errors occurred - if (fi->param_out_cnt != -1 && fi->param_out_cnt != fi->param_out.size()) +void pfmap(bool ivld_only) +{ + for (const auto& [name, fi] : fmap) { - fi->parse_errors.push_back("output param mismatch between found number of return values (" + std::to_string(fi->param_out_cnt) +") and lua_push* calls (" + std::to_string(fi->param_out.size()) + ")"); - valid = false; + if (!ivld_only || !fi.prs_vld) + PL(fi.str() + "\n"); } - if (fi->param_in_cnt != -1 && !fi->usg.empty()) - { // do extra check against usg string - std::string usage_params; - if (std::regex_search(fi->usg, match, usage_validity_regex)) - { - usage_params = match[1]; - if (fi->param_in_cnt == 0 && !usage_params.empty()) - { - valid = false; - fi->parse_errors.push_back("input param count does not match usage string"); - } - else - { - int comma_cnt = 0; - for (auto ch : usage_params) if (ch == ',') comma_cnt++; - if (fi->param_in_cnt != comma_cnt + 1) - { - valid = false; - fi->parse_errors.push_back("input param count does not match usage string"); - } - } - } - else fi->parse_errors.push_back("usage string malformed"); // if the usg string is malformed, do not consider it for validity - } - if (fi->param_in_cnt > 0) - { // check if input param indexes are in order - for (int i = 1; i <= fi->param_in_cnt; i++) - { // lua indexes start with 1 - if (!fi->param_in.contains(i)) - { - fi->parse_errors.push_back("input param index not in order"); - valid = false; - break; - } - } - } - fi->valid_parse = valid; } -void sort_prune_varmap(std::map> *varmap) -{ - for (auto &[lua_index, lua_types] : *varmap) +void pstats() +{ // print parsing statistics + int ttl = 0; + int cln = 0; + int ivld = 0; + for (const auto& [name, fi] : fmap) { - lua_types.erase(std::unique(lua_types.begin(), lua_types.end()), lua_types.end()); - std::sort(lua_types.begin(), lua_types.end()); + ttl++; + cln += fi.prs_vld && fi.in_cnt != -1 && fi.out_cnt != -1 && !fi.nil_in_varmap(false); + ivld += !fi.prs_vld; } -} - -__forceinline bool ln_is(std::string &ln, int idx, const char* lk_for) -{ - int len = strlen(lk_for); - return ln.length() - idx + 1 > len && ln.substr(idx, len) == lk_for; -} - -__forceinline void set_str_if_empty(std::string *init, std::string repl) -{ - *init = init->empty() ? repl : *init; + auto dur = duration_cast(tend - tstart); + PL(std::format("Functions parsed: {}", ttl)); + PL(std::format("Clean: {}", cln)); + PL(std::format("Invalid: {}", ivld)); + PL(std::format("Duration: {} ms", dur.count())); } int main() @@ -193,82 +117,98 @@ int main() * skip scanning for output params, if returns with different non-0 values have been found (or it can not be parsed as int) */ + tstart = std::chrono::high_resolution_clock::now(); if (use_all) { dbg = true; - dbg_prnt_invld_only = true; - file_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.txt)"; + ivld_only = true; + fl_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.txt)"; } else { dbg = true; - dbg_prnt_invld_only = false; - file_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.test.txt)"; + ivld_only = false; + fl_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.test.txt)"; } - auto tstart = std::chrono::high_resolution_clock::now(); + - std::ifstream source_file(file_path); + std::ifstream source_file(fl_path); if (!source_file.is_open()) { - std::cerr << "Unable to open file " << file_path << std::endl; + std::cerr << "Unable to open file " << fl_path << std::endl; return 1; } + std::unordered_map func_vars; - std::regex usage_regex(R"(,"(Usage: [^;]+)N{0}"[, \)])"); // N{0} is just a workaround, since )" in the raw string would terminate it immediately - std::regex lua_is_regex(R"(lua_is(.+?)\()"); - std::regex lua_to_regex(R"(lua_to(.+?)\()"); - - std::string skp_push_utl = ""; // skip push parsing until this line has been reached + std::string skp_push_utl = ""; // skip push parsing until this ln has been reached std::string lp_utl = ""; - std::map push_track; + std::unordered_map push_track; + bool in_func = false; bool enc_usg = false; // encountered usg + bool prc_def = false; uint ret_val; // return value bool is_if_ln; std::string ind; // indent - std::string ln; // line + std::string ln; + + std::vector::iterator eol; while (std::getline(source_file, ln)) { - { // check for lines which can instantly be discarded - if (ln.empty()) continue; // skip empty lines - if (ln.starts_with("/*")) continue; // skip comment lines - } + if (ln.empty()) continue; // skip empty lines + if (ln.starts_with("/*")) continue; // skip block comment lines if (!in_func) { // searching for next function if (ln.starts_with("// ADDRESS - ")) - { // found address ln - create new FunctionInfo and reset variables + { // found addr ln - create new FunctionInfo and reset variables fi = {}; - fi.address = parse_int(ln.substr(13)); + fi.addr = std::stoi(&ln[13], 0, 16); // do not use prsi, these values prb occur only once, so no caching wanted out_index = 0; enc_usg = false; lp_utl = ""; skp_push_utl = ""; push_track = {}; + func_vars = {}; continue; } if (ln.starts_with("uint lua_wow_")) { // found ln with function signature - fi.func = ln.substr(13, ln.find_first_of("(") - 13); // get part of real function name + fi.nm = ln.substr(13, ln.find_first_of("(") - 13); // get part of real function name in_func = true; // start function parsing from now on continue; } } else { // processing current function + is_if_ln = false; + if (ln == "{") + { // this is the starting block of the function + prc_def = true; // enable regex handling for local variable definitions + continue; + } if (ln == "}") { // end of function found in_func = false; - if (fi.param_in_cnt == -1) fi.param_in.clear(); - else fi.param_in_cnt = fi.param_in.size(); - if (fi.param_out_cnt == -1) fi.param_out.clear(); - sort_prune_varmap(&fi.param_in); - chk_vld(&fi); - fmap[fi.func] = fi; - //pdbg("# END OF " + fi.func); + fi.cln_varmap(true); + fi.cln_varmap(false); + fi.chk_vld(); + fmap[fi.nm] = fi; continue; } - is_if_ln = false; + if (prc_def) + { // if local variable definition parsing is enabled + if (ln == " ") + { // reached end variable definition block, no further processing required + prc_def = false; + continue; + } + if (std::regex_search(ln, match1, var_decl_regex)) + { // found local variable, track it with init value 0 + func_vars[match1[1]] = 0; + } + + } int i = 0; while (i < ln.length()) { @@ -283,135 +223,133 @@ int main() { case 'c': case 'd': - if (ln_is(ln, i, "do {")) + if (lnsw(ln, i, "do {")) { // setting loop end, if not already in a loop - set_str_if_empty(&lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single line do-while loop (who would even program such a thing!? + ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln do-while loop (who would even program such a thing!? break; } - if (ln_is(ln, i, "case ") || ln_is(ln, i, "default:")) + if (lnsw(ln, i, "case ") || lnsw(ln, i, "default:")) { - if (push_track[ind.length() / 2]) set_str_if_empty(&skp_push_utl, ind + "}"); - else push_track[ind.length() / 2] = false; // only execute if a previous case didn't already contain a push + if (push_track[ind]) ssie(skp_push_utl, ind + "}"); + else push_track[ind] = false; // only execute if a previous case didn't already contain a push break; } break; case 'f': - if (ln_is(ln, i, "for (")) + if (lnsw(ln, i, "for (")) { - set_str_if_empty(&lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single line for loop + ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln for loop break; } break; case 'i': // check for if - if (ln_is(ln, i, "if ")) + if (lnsw(ln, i, "if ")) { // enough characters left to be if statement is_if_ln = true; - push_track[ind.length() / 2] = false; + push_track[ind] = false; break; } break; case 'e': - if (ln_is(ln, i, "else ")) + if (lnsw(ln, i, "else ")) { // enough characters left to be if statement - if (push_track[ind.length() / 2]) set_str_if_empty(&skp_push_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); + if (push_track[ind]) ssie(skp_push_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); break; } break; case 'g': - if (!fi.param_out.empty() && ln_is(ln, i, "goto ")) + if (!fi.out.empty() && lnsw(ln, i, "goto ")) { - set_str_if_empty(&skp_push_utl, ln.substr(i + 5, ln.length() - i - 6) + ":"); + ssie(skp_push_utl, std::string(ln.substr(i + 5, ln.length() - i - 6)) + ":"); break; } break; case 'r': // check for return - if (fi.param_out_cnt == -1) break; // this function has a dynamic number of outputs, no need for further processing - if (ln_is(ln, i, "return ")) + if (fi.out_cnt == -1) break; // this function has a dynamic number of outputs, no need for further processing + if (lnsw(ln, i, "return ")) { // enough characters left to be the simplest return - ret_val = parse_int(ln.substr(i + 7, ln.length() - i - 8)); + ret_val = prsi(std::string(&ln[i + 7], ln.length() - i - 8)).val; // check if already encountered return value (except 0) matches; dynamic if not if (ret_val != 0) { // TODO probably need to check if i am currently skipping push - fi.param_out_cnt = (fi.param_out_cnt == 0 || fi.param_out_cnt == ret_val) ? ret_val : -1; - if (ret_val == fi.param_out.size()) + fi.out_cnt = (fi.out_cnt == 0 || fi.out_cnt == ret_val) ? ret_val : -1; + if (ret_val == fi.out.size()) { // found a return statement and return value matches output param count - set_str_if_empty(&skp_push_utl, "\1skip2end"); + ssie(skp_push_utl, "\1skip2end"); } } break; } break; case 'w': - if (ln_is(ln, i, "while (")) + if (lnsw(ln, i, "while (")) { // setting loop end, if not already in a loop - set_str_if_empty(&lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single line while loop + ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln while loop break; } break; } //pdbg(ln); //pdbg(ind + "<-"); + //if (auto match = ctre::match(ln)) + + if (std::regex_search(ln, match1, var_ass_regex)) + { + + int i = 0; + //auto test = match.get<1>(); + //pdbg("variable assignment happens for " + match.get<1>().to_string()); + } + if (!enc_usg) { // usage string can only occur once anyway - if (std::regex_search(ln, match, usage_regex)) + //if (auto match = ctre::match(ln)) + if (std::regex_search(ln, match1, usage_regex)) { // found usg string - fi.usg = match[1]; + fi.usg = match1[1]; size_t found = -1; // so the first find uses 0 through the increment while ((found = fi.usg.find("\\\"", found + 1)) != std::string::npos) fi.usg.replace(found, 2, "\""); enc_usg = true; skp_push_utl = !skp_push_utl.empty() ? skp_push_utl : "\1"; // sometimes lua_pushfstring is used before lua_error; do not consider this an output! } + } - } - if (fi.param_in_cnt != -1) + if (fi.in_cnt != -1) { - process_varmap_regex(&fi.param_in, ln, &lua_is_regex); - process_varmap_regex(&fi.param_in, ln, &lua_to_regex); + process_varmap_regex(fi.in, ln, &lua_is_regex); + process_varmap_regex(fi.in, ln, &lua_to_regex); } - if (fi.param_out_cnt != -1 && skp_push_utl.empty() && process_varmap_regex(&fi.param_out, ln, &lua_push_regex)) + if (fi.out_cnt != -1 && skp_push_utl.empty() && process_varmap_regex(fi.out, ln, &lua_push_regex)) { if (!lp_utl.empty()) { // found lua_push* inside a loop - fi.param_out_cnt = -1; + fi.out_cnt = -1; } else { // normal processing - int lvl = is_if_ln ? ind.length() / 2 : (ind.length() - 2) / 2; // do not subtract 1 block level, if this was a simple if line - while (lvl >= 1) + + std::string lvl = is_if_ln ? ind : ind.substr(0, ind.length() - 2); // do not subtract 1 block level, if this was a simple if ln + while (lvl >= " ") { // if a push was found, track it for this and all lower block levels push_track[lvl] = true; - lvl--; + lvl = lvl.substr(0, lvl.length() - 2); } } - + } - if (!lp_utl.empty() && ln.starts_with(lp_utl)) - lp_utl = ""; - if (lp_utl == "\1") - lp_utl = ""; - if (!skp_push_utl.empty() && ln.starts_with(skp_push_utl)) - skp_push_utl = ""; // reset skip since line has been reached now - if (skp_push_utl == "\1") - skp_push_utl = ""; // reset temporary skip which was used for 1 line + if (!lp_utl.empty() && ln.starts_with(lp_utl)) lp_utl = ""; + if (lp_utl == "\1") lp_utl = ""; + if (!skp_push_utl.empty() && ln.starts_with(skp_push_utl)) skp_push_utl = ""; // reset skip since ln has been reached now + if (skp_push_utl == "\1") skp_push_utl = ""; // reset temporary skip which was used for 1 ln } - - } - - auto tend = std::chrono::high_resolution_clock::now(); - auto duration = duration_cast(tend - tstart); - int cnt_invalid = 0; - int cnt_total = 0; - for(auto &[name, fi] : fmap) - { - if (dbg && (!dbg_prnt_invld_only || !fi.valid_parse)) pfi(fi); - cnt_invalid += fi.valid_parse ? 0 : 1; - cnt_total++; + } + + tend = std::chrono::high_resolution_clock::now(); + if (dbg) pfmap(ivld_only); std::cout << "GhidraParser is done..." << std::endl; - std::cout << "Functions parsed: " << std::to_string(cnt_total) << std::endl; - std::cout << "Invalid parses: " << std::to_string(cnt_invalid) << std::endl; - std::cout << "duration: " << duration.count() << " ms" << std::endl; + pstats(); std::cin.ignore(); return 0; } diff --git a/GhidraParser/GhidraParser.hpp b/GhidraParser/GhidraParser.hpp index 91a7b8a..6f70f09 100644 --- a/GhidraParser/GhidraParser.hpp +++ b/GhidraParser/GhidraParser.hpp @@ -1,21 +1 @@ #pragma once -#include -#include - -typedef unsigned int uint; // i am lazy -typedef unsigned char byte; - -//#pragma pack(push, 1) -struct FunctionInfo -{ - uint address = 0; - std::string func = ""; - std::string usg = ""; - std::map> param_in{}; - int param_in_cnt = 0; - std::map> param_out{}; - int param_out_cnt = 0; - std::vector parse_errors; - bool valid_parse = false; -}; -//#pragma pack(pop) diff --git a/GhidraParser/GhidraParser.vcxproj b/GhidraParser/GhidraParser.vcxproj index ae18ace..2a5f6fa 100644 --- a/GhidraParser/GhidraParser.vcxproj +++ b/GhidraParser/GhidraParser.vcxproj @@ -106,6 +106,8 @@ _DEBUG;_CONSOLE;%(PreprocessorDefinitions) true stdcpp20 + + Console @@ -121,6 +123,7 @@ NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true stdcpp20 + C:\Users\alphaomega\.vcpkg\installed\x64-windows\include;%(AdditionalIncludeDirectories) Console @@ -130,12 +133,15 @@ + + + + - - + \ No newline at end of file diff --git a/GhidraParser/GhidraParser.vcxproj.filters b/GhidraParser/GhidraParser.vcxproj.filters index 7243d0e..04fa1d4 100644 --- a/GhidraParser/GhidraParser.vcxproj.filters +++ b/GhidraParser/GhidraParser.vcxproj.filters @@ -18,10 +18,22 @@ Source Files + + Source Files + + + Source Files + Header Files + + Header Files + + + Header Files + \ No newline at end of file diff --git a/GhidraParser/Utility.cpp b/GhidraParser/Utility.cpp new file mode 100644 index 0000000..a49bb44 --- /dev/null +++ b/GhidraParser/Utility.cpp @@ -0,0 +1,61 @@ +#include "Utility.hpp" +#include +#include +#include +#include +#include +#include + +std::unordered_map prs_cache +{ + { "FALSE", { true, 0 } }, + { "TRUE", { true, 1 } }, +}; + +ParseResult prsi(std::string str) +{ + auto found = prs_cache.find(str); + if (found != prs_cache.end()) return found->second; // cache hit + try + { // cache miss, trying to parse and caching the result + int v; + if (str.starts_with("0x")) v = std::stoi(str.data(), 0, 16); // hex parsing + else v = std::stoi(str.data()); + prs_cache[str] = { true, v }; + } + catch (std::invalid_argument e) + { // was not parsable, cache negative parse result + prs_cache[str] = { false, -1 }; // value doesn't really matter here + } + return prs_cache[str]; +} + +std::vector* lf(std::string& path) +{ + return lf(path.c_str()); +} + +std::vector* lf(const char* path) +{ + std::ifstream file(path, std::ios::binary); + if (!file.is_open()) return nullptr; + file.seekg(0, std::ios::end); + auto vec = new std::vector(file.tellg()); + if (vec->capacity() > 0) + { + file.seekg(0, std::ios::beg); + file.read(vec->data(), vec->capacity()); + } + return vec; +} + +bool lnsw(std::string& ln, int idx, const char* sw) +{ + int len = strlen(sw); + return ln.length() - idx + 1 > len && ln.substr(idx, len) == sw; +} + +void ssie(std::string& str, std::string repl) +{ + if (str.empty()) str = repl; +} diff --git a/GhidraParser/Utility.hpp b/GhidraParser/Utility.hpp new file mode 100644 index 0000000..50e5726 --- /dev/null +++ b/GhidraParser/Utility.hpp @@ -0,0 +1,16 @@ +#pragma once +#include +#include + +struct ParseResult +{ + bool prsbl; // was parsable + int val; // value which it was parsed into +}; + +ParseResult prsi(std::string str); +std::vector* lf(std::string& path); +std::vector* lf(const char* path); +bool lnsw(std::string& ln, int idx, const char* sw); +void ssie(std::string& str, std::string repl); +void pdbg(std::string& msg); \ No newline at end of file