#include "FunctionInfo.hpp" #include "Utility.hpp" #include #include #include #include #include #include #include #include #include #include #include std::string fl_path; bool dbg; bool ivld_only; bool use_all = true; /* * 907 cln * 25 ivld */ /* * 1742 cln * 123 ivld */ #define PL(msg) std::cout << msg << '\n' // print line #define PDBG(msg) if(!dbg) PL(msg) // print line if debug flag is set std::smatch match1; // TODO remove later std::regex usage_regex(R"(,"(Usage: [^;]+).?"[, \)])"); // .? is just a workaround, since )" in the raw string would terminate it immediately std::regex lua_push_regex(R"(lua_push(.+?)\(())"); std::regex lua_is_regex(R"(lua_is(.+?)\(L,(\w+?)\))"); std::regex lua_to_regex(R"(lua_to(.+?)\(L,(\w+?)[,\)])"); std::regex var_decl_regex(R"(^ (?:int|uint|BOOL) (\w+);)"); // currently only considering these types for declarations std::regex var_ass_regex(R"(^ +?(\w+?) = (\w+) ?(.)? ?(\w+?){0,1};)"); FunctionInfo fi; int lua_index; std::string lua_type; int out_index; std::unordered_map fmap; std::chrono::steady_clock::time_point tstart; std::chrono::steady_clock::time_point tend; bool process_varmap_regex(std::map>& varmap, std::string ln, std::regex* regex) { bool found = false; while (std::regex_search(ln, match1, *regex)) { found = true; lua_type = match1[1]; if (lua_type == "lstring") lua_type = "string"; // lstring is a string! else if (lua_type == "fstring") lua_type = "string"; // frsting is a string! if (regex == &lua_push_regex) { // push parsing uses a global index starting with 1 lua_index = out_index; out_index++; } else lua_index = prsi(match1[2]).val; if (lua_index == -1) { // -1 means there wasn't a literal used for accessing the index, so i can not parse it if (regex == &lua_push_regex) fi.out_cnt = -1; else fi.in_cnt = -1; // TODO might be possible to parse when evaluating variables return found; } varmap[lua_index].push_back(lua_type); // always push lua type for now ln = match1.suffix(); } return found; } void pfmap(bool ivld_only) { for (const auto& [name, fi] : fmap) { if (!ivld_only || !fi.prs_vld) PL(fi.str() + "\n"); } } void pstats() { // print parsing statistics int ttl = 0; int cln = 0; int ivld = 0; for (const auto& [name, fi] : fmap) { ttl++; cln += fi.prs_vld && fi.in_cnt != -1 && fi.out_cnt != -1 && !fi.nil_in_varmap(false); ivld += !fi.prs_vld; } auto dur = duration_cast(tend - tstart); PL(std::format("Functions parsed: {}", ttl)); PL(std::format("Clean: {}", cln)); PL(std::format("Invalid: {}", ivld)); PL(std::format("Duration: {} ms", dur.count())); } int main() { /* * PARSING RULES * lua_is* calls can occur in if-lines * lua_to* calls can occur in if-lines * lua_push* calls can NOT occur in if-lines * lua_push* calls are not called with an index as param, unlike lua_is* / lua_to* - the index is inferred by the order of calls * lua_push* calls can occur in branch structures (if/else, switch) - they must only be considered in one of them * wow functions return the number of outputs - 0 when they error or there are none; consider any non-0 return as the real number of outputs * wow functions may return a dynamic number of outputs - there is no way to parse this into a single result * skip scanning for the usg string if it already has been encountered in the function, there can only be 1 * skip scanning for input params, if unparsable index (not into int) has been found for lua_is* or lua_to* (dynamic inputs) * skip scanning for output params, if unparsable index (not into int) has been found for lua_push* (dynamic outputs) * skip scanning for output params, if returns with different non-0 values have been found (or it can not be parsed as int) */ tstart = std::chrono::high_resolution_clock::now(); if (use_all) { dbg = true; ivld_only = true; fl_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.txt)"; } else { dbg = true; ivld_only = false; fl_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.test.txt)"; } std::ifstream source_file(fl_path); if (!source_file.is_open()) { std::cerr << "Unable to open file " << fl_path << std::endl; return 1; } std::unordered_map func_vars; std::string skp_push_utl = ""; // skip push parsing until this ln has been reached std::string lp_utl = ""; std::unordered_map push_track; bool in_func = false; bool enc_usg = false; // encountered usg bool prc_def = false; uint ret_val; // return value bool is_if_ln; std::string ind; // indent std::string ln; std::vector::iterator eol; while (std::getline(source_file, ln)) { if (ln.empty()) continue; // skip empty lines if (ln.starts_with("/*")) continue; // skip block comment lines if (!in_func) { // searching for next function if (ln.starts_with("// ADDRESS - ")) { // found addr ln - create new FunctionInfo and reset variables fi = {}; fi.addr = std::stoi(&ln[13], 0, 16); // do not use prsi, these values prb occur only once, so no caching wanted out_index = 0; enc_usg = false; lp_utl = ""; skp_push_utl = ""; push_track = {}; func_vars = {}; continue; } if (ln.starts_with("uint lua_wow_")) { // found ln with function signature fi.nm = ln.substr(13, ln.find_first_of("(") - 13); // get part of real function name in_func = true; // start function parsing from now on continue; } } else { // processing current function is_if_ln = false; if (ln == "{") { // this is the starting block of the function prc_def = true; // enable regex handling for local variable definitions continue; } if (ln == "}") { // end of function found in_func = false; fi.cln_varmap(true); fi.cln_varmap(false); fi.chk_vld(); fmap[fi.nm] = fi; continue; } if (prc_def) { // if local variable definition parsing is enabled if (ln == " ") { // reached end variable definition block, no further processing required prc_def = false; continue; } if (std::regex_search(ln, match1, var_decl_regex)) { // found local variable, track it with init value 0 func_vars[match1[1]] = 0; } } int i = 0; while (i < ln.length()) { if (ln[i] != ' ') { ind = ln.substr(0, i); break; } i++; } switch (ln[i]) { case 'c': case 'd': if (lnsw(ln, i, "do {")) { // setting loop end, if not already in a loop ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln do-while loop (who would even program such a thing!? break; } if (lnsw(ln, i, "case ") || lnsw(ln, i, "default:")) { if (push_track[ind]) ssie(skp_push_utl, ind + "}"); else push_track[ind] = false; // only execute if a previous case didn't already contain a push break; } break; case 'f': if (lnsw(ln, i, "for (")) { ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln for loop break; } break; case 'i': // check for if if (lnsw(ln, i, "if ")) { // enough characters left to be if statement is_if_ln = true; push_track[ind] = false; break; } break; case 'e': if (lnsw(ln, i, "else ")) { // enough characters left to be if statement if (push_track[ind]) ssie(skp_push_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); break; } break; case 'g': if (!fi.out.empty() && lnsw(ln, i, "goto ")) { ssie(skp_push_utl, std::string(ln.substr(i + 5, ln.length() - i - 6)) + ":"); break; } break; case 'r': // check for return if (fi.out_cnt == -1) break; // this function has a dynamic number of outputs, no need for further processing if (lnsw(ln, i, "return ")) { // enough characters left to be the simplest return ret_val = prsi(std::string(&ln[i + 7], ln.length() - i - 8)).val; // check if already encountered return value (except 0) matches; dynamic if not if (ret_val != 0) { // TODO probably need to check if i am currently skipping push fi.out_cnt = (fi.out_cnt == 0 || fi.out_cnt == ret_val) ? ret_val : -1; if (ret_val == fi.out.size()) { // found a return statement and return value matches output param count ssie(skp_push_utl, "\1skip2end"); } } break; } break; case 'w': if (lnsw(ln, i, "while (")) { // setting loop end, if not already in a loop ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln while loop break; } break; } //pdbg(ln); //pdbg(ind + "<-"); //if (auto match = ctre::match(ln)) if (std::regex_search(ln, match1, var_ass_regex)) { int i = 0; //auto test = match.get<1>(); //pdbg("variable assignment happens for " + match.get<1>().to_string()); } if (!enc_usg) { // usage string can only occur once anyway //if (auto match = ctre::match(ln)) if (std::regex_search(ln, match1, usage_regex)) { // found usg string fi.usg = match1[1]; size_t found = -1; // so the first find uses 0 through the increment while ((found = fi.usg.find("\\\"", found + 1)) != std::string::npos) fi.usg.replace(found, 2, "\""); enc_usg = true; skp_push_utl = !skp_push_utl.empty() ? skp_push_utl : "\1"; // sometimes lua_pushfstring is used before lua_error; do not consider this an output! } } if (fi.in_cnt != -1) { process_varmap_regex(fi.in, ln, &lua_is_regex); process_varmap_regex(fi.in, ln, &lua_to_regex); } if (fi.out_cnt != -1 && skp_push_utl.empty() && process_varmap_regex(fi.out, ln, &lua_push_regex)) { if (!lp_utl.empty()) { // found lua_push* inside a loop fi.out_cnt = -1; } else { // normal processing std::string lvl = is_if_ln ? ind : ind.substr(0, ind.length() - 2); // do not subtract 1 block level, if this was a simple if ln while (lvl >= " ") { // if a push was found, track it for this and all lower block levels push_track[lvl] = true; lvl = lvl.substr(0, lvl.length() - 2); } } } if (!lp_utl.empty() && ln.starts_with(lp_utl)) lp_utl = ""; if (lp_utl == "\1") lp_utl = ""; if (!skp_push_utl.empty() && ln.starts_with(skp_push_utl)) skp_push_utl = ""; // reset skip since ln has been reached now if (skp_push_utl == "\1") skp_push_utl = ""; // reset temporary skip which was used for 1 ln } } tend = std::chrono::high_resolution_clock::now(); if (dbg) pfmap(ivld_only); std::cout << "GhidraParser is done..." << std::endl; pstats(); std::cin.ignore(); return 0; }