#include "FunctionInfo.hpp" #include "Utility.hpp" #include #include #include #include #include #include #include #include std::regex decl_rgx(R"(^ (\S+) (\S+)(?: \[)?(\d+?)?\]?;$)"); // capturing type, name and array size std::regex ass_rgx(R"(^ +?(\w+) = (.+);)"); // OwO std::regex if_rgx(R"(if \((.+)\) )"); std::regex usg_rgx(R"(,"(Usage: [^;]+).?"[, \)])"); // .? is a workaround to prevent raw string from closing std::regex usg_vld_rgx(R"(Usage: .+?\((.*?)\))"); std::regex lua_is_rgx(R"(lua_is(.+?)\(L,(\w+?)\))"); std::regex lua_to_rgx(R"(lua_to(.+?)\(L,(\w+?)[,\)])"); std::regex lua_push_rgx(R"(lua_push(.+?)\(())"); std::regex dowhl_cond_rgx(R"(while \((.+)\);)"); std::optional prsi_lcls(std::string str, std::unordered_map& lcls) { // looks up local variables, before it tries to call prsi if (lcls.count(str)) return lcls[str]; return prsi(str); } __forceinline std::optional slv_step(std::stack& vals, std::stack& ops) { int v2 = vals.top(); vals.pop(); int v1 = vals.top(); vals.pop(); Op op = ops.top(); ops.pop(); switch (op) { case LOR: return v1 || v2; case LAND: return v1 && v2; case BOR: return v1 | v2; case BXOR: return v1 ^ v2; case BAND: return v1 & v2; case EQ: return v1 == v2; case UEQ: return v1 != v2; case LT: return v1 < v2; case LTE: return v1 <= v2; case BT: return v1 > v2; case BTE: return v1 >= v2; case PLS: return v1 + v2; case MIN: return v1 - v2; case MUL: return v1 * v2; case DIV: if (v2 == 0) return std::nullopt; return v1 / v2; case MOD: if (v2 == 0) return std::nullopt; return v1 % v2; } return std::nullopt; // unsupported op } __forceinline std::optional push_op(std::stack& vals, std::stack& ops, Op op) { while (ops.size() >= 1 && op_prec(ops.top()) >= op_prec(op)) { // last op has greater or same precedence auto res = slv_step(vals, ops); if (!res.has_value()) return std::nullopt; vals.push(res.value()); } ops.push(op); } std::regex lua_is_quick(R"(^lua_is\S+?\(L,\S+?\)$)"); std::regex sstrcmpi_quick(R"(^SStrCmpI\(\S+?,\S+?,\S+?\)$)"); std::optional eval(std::string& infix, locals& lcls) { std::smatch match; if (std::regex_search(infix, match, lua_is_quick)) return 1; if (std::regex_search(infix, match, sstrcmpi_quick)) return 1; if (infix.find_first_of(' ') == std::string::npos) { // TODO performance escape hatch for simple infix strings - might actually be counter productive... have to check return prsi_lcls(infix, lcls); } std::stack vals; std::stack ops; int tk_start = 0; std::string tk; bool was_brr; for (int tk_end = 0; tk_end < infix.length(); tk_end++) { was_brr = false; switch (infix[tk_end]) { case ' ': tk = infix.substr(tk_start, tk_end - tk_start); tk_start = tk_end + 1; break; case '(': tk = ""; ops.push(Op::BRL); tk_start = tk_end + 1; continue; case ')': tk = infix.substr(tk_start, tk_end - tk_start); tk_start = tk_end + 1; was_brr = true; break; } if (tk == "" && tk_end == infix.length() - 1) { // last token tk = infix.substr(tk_start, tk_end - tk_start + 1); } if (tk == "" && !was_brr) continue; // empty token, nothing to do if (tk == "||") push_op(vals, ops, LOR); else if (tk == "&&") push_op(vals, ops, LAND); else if (tk == "|") push_op(vals, ops, BOR); else if (tk == "^") push_op(vals, ops, BXOR); else if (tk == "&") push_op(vals, ops, BAND); else if (tk == "==") push_op(vals, ops, EQ); else if (tk == "!=") push_op(vals, ops, UEQ); else if (tk == "<") push_op(vals, ops, LT); else if (tk == "<=") push_op(vals, ops, LTE); else if (tk == ">") push_op(vals, ops, BT); else if (tk == ">=") push_op(vals, ops, BTE); else if (tk == "+") push_op(vals, ops, PLS); else if (tk == "-") push_op(vals, ops, MIN); else if (tk == "*") push_op(vals, ops, MUL); else if (tk == "/") push_op(vals, ops, DIV); else if (tk == "%") push_op(vals, ops, MOD); else if (tk != "") { auto pr = prsi_lcls(tk, lcls); if (!pr.has_value()) return std::nullopt; vals.push(pr.value()); } if (was_brr) { while (ops.top() != BRL) { auto res = slv_step(vals, ops); if (!res.has_value()) return std::nullopt; vals.push(res.value()); } ops.pop(); // popping left brace } tk = ""; } while (!ops.empty()) { auto res = slv_step(vals, ops); if (!res.has_value()) return std::nullopt; vals.push(res.value()); } if (vals.size() != 1 || !ops.empty()) return std::nullopt; return vals.top(); } bool FunctionInfo::prc_varmap_rgx(bool prc_in, const std::string& ln_in, std::regex& rgx, locals& lcls) { std::string ln = ln_in; varmap& params = prc_in ? in : out; bool fnd = false; while (std::regex_search(ln, match, rgx)) { fnd = true; int lua_idx; std::string lua_type; lua_type = match[1]; if (lua_type == "lstring") lua_type = "string"; // lstring is a string! else if (lua_type == "fstring") lua_type = "string"; // frsting is a string! if (&rgx == &lua_push_rgx) { // push parsing uses a global index starting with 1 lua_idx = out.size() + 1; } else lua_idx = prsi_lcls(match[2], lcls).value(); if (lua_idx == -1) { // -1 means there wasn't a literal used for accessing the index, so i can not parse it if (&rgx == &lua_push_rgx) out_cnt = -1; else in_cnt = -1; // TODO might be possible to parse when evaluating variables return fnd; } params[lua_idx].push_back(lua_type); // always push lua type for now ln = match.suffix(); } return fnd; } void FunctionInfo::chk_vld() { // run all checks, so we have the full picture bool vld = true; if (prs_msg.size() > 0) vld = false; // parsing messages up until this point are hard errors if (in_cnt > -1) { // in varmap is not dynamic if (!usg.empty()) { // check against usage string if (std::regex_search(usg, match, usg_vld_rgx)) { std::string usg_params = match[1]; int comma_cnt = 0; for (char c : usg_params) comma_cnt += c == ','; if (in_cnt == 0 && !usg_params.empty() || in_cnt != comma_cnt + 1) { prs_msg.push_back(std::format("in param count ({}) does not match usage string ({})", in_cnt, !usg_params.empty() * (comma_cnt + 1))); vld = false; } } else prs_msg.push_back("usage string is malformed"); // do not consider this for validity } for (int i = 1; i <= in_cnt; i++) { // lua indexes start with 1 if (!in.contains(i)) { prs_msg.push_back("in param index not in order"); vld = false; break; } } } if (out_cnt != -1 && out_cnt != out.size()) { prs_msg.push_back(std::format("out param count ({}) does not match return value ({})", out.size(), out_cnt)); vld = false; } prs_vld = vld; } void FunctionInfo::cln_varmap(bool prc_in) { varmap& params = prc_in ? in : out; int cnt = prc_in ? in_cnt : out_cnt; if (cnt == -1) { params.clear(); // dynamic varmap does not need entries return; } for (auto& [lua_index, lua_types] : params) { std::sort(lua_types.begin(), lua_types.end()); lua_types.erase(std::unique(lua_types.begin(), lua_types.end()), lua_types.end()); } if (prc_in) in_cnt = in.size(); // input count can only be inferred by in varmap size } bool FunctionInfo::nil_in_varmap(bool prc_in) const { const varmap& params = prc_in ? in : out; for (auto& [key, value] : params) if (std::find(value.begin(), value.end(), "nil") != value.end()) return true; return false; } std::string FunctionInfo::str() const { std::string str = std::format("{:X} {}{}\n", addr, nm, prs_vld ? "" : " (invalid)"); if (!usg.empty()) str += " " + usg + "\n"; str += " in: " + str_varmap(true) + "\n"; str += " out: " + str_varmap(false); for (const std::string& err : prs_msg) str += "\n " + err; return str; } std::string FunctionInfo::str_varmap(bool prc_in) const { int cnt = prc_in ? in_cnt : out_cnt; switch (cnt) { case -1: return "dynamic"; case 0: return "0 ()"; } const varmap& params = prc_in ? in : out; if (params.size() > 0) { // cnt and params.size() might differ - lua_push* calls can be undetected std::string str = std::to_string(cnt) + " ("; for (const auto& [lua_index, lua_types] : params) { for (const auto& lua_type : lua_types) str += lua_type + "/"; str.pop_back(); // remove last slash str += ", "; } str[str.length() - 2] = ')'; // replace last comma with closing bracket str.pop_back(); // remove space after last comma return str; } return std::to_string(cnt) + " ()"; } FunctionInfo::FunctionInfo() { } FunctionInfo::FunctionInfo(std::vector src) { addr = std::stoi(&src[0][13], 0, 16); // no use of prsi - values occur once -> no caching wanted nm = src[1].substr(13, src[1].find_first_of('(') - 13); locals lcls; // local variables std::set track_types = { "BOOL", "int", "uint", "undefined4"}; // only track locals of these types std::string type; int idx = 2; if (!src[idx].starts_with(" return ")) { // process local variable declarations for (; idx < src.size(); idx++) { std::string& ln = src[idx]; if (std::regex_search(ln, match, decl_rgx)) { if (!track_types.count(match[1])) continue; // not interested in type lcls[match[2]] = 0; // track with init value 0 } else break; // found no more declaration } } std::string ind; // indentation - keeping track of current block level std::string lp_utl = ""; // currently in a loop until this line is reached std::string skp_ass_utl = ""; std::string skp_push_utl = ""; // skip push parsing until this ln has been reached std::string cond; std::optional er; // eval result bool is_if_ln; bool enc_usg = false; // encountered usg uint ret_val; // return value std::string infix; lp_track lp_track; // keeps track where the loop started std::unordered_map push_track; // keeps track if on this indent level a push has happened for (; idx < src.size(); idx++) // skip right to the lines which matter { std::string& ln = src[idx]; is_if_ln = false; int i = 2; while (i < ln.length()) { if (ln[i] != ' ') break; i += 2; } ind = ln.substr(0, i); switch (ln[i]) { case '}': if (lp_track.find(ind) != lp_track.end()) { // closing bracket of a loop lp_track[ind].iter--; if (lp_track[ind].iter < 0) { // max iterations exceeded prs_msg.push_back(std::format("max iterations exceeded in: {}", idx + 1)); } } if (std::regex_search(ln, match, dowhl_cond_rgx)) { lp_track[ind].iter--; if (lp_track[ind].iter < 0) { // max iterations exceeded prs_msg.push_back(std::format("max iterations exceeded in: {}", idx + 1)); } else { infix = match[1]; auto er = eval(infix, lcls); if (er.has_value() && er.value() != 0) { // condition was parsable and evaluated to true idx = lp_track[ind].idx; continue; } } if (lp_track.find(ind) != lp_track.end()) lp_track.erase(ind); break; } break; case 'c': case 'd': if (lnsw(ln, i, "do {")) { // setting loop end, if not already in a loop lp_track[ind] = { idx + (lnew(ln, "{")) - 1, MAX_ITER}; break; } if (lnsw(ln, i, "case ") || lnsw(ln, i, "default:")) { if (push_track[ind]) ssie(skp_push_utl, ind + "}"); else push_track[ind] = false; // only execute if a previous case didn't already contain a push break; } break; case 'f': if (lnsw(ln, i, "for (")) { lp_track[ind] = { idx + (lnew(ln, "{")) - 1, MAX_ITER}; break; } break; case 'i': // check for if if (lnsw(ln, i, "if ")) { // enough characters left to be if statement is_if_ln = true; break; } break; case 'e': if (lnsw(ln, i, "else ")) { // enough characters left to be if statement if (push_track[ind]) { ssie(skp_ass_utl, lnew(ln, "{") ? ind + "}" : "\1"); ssie(skp_push_utl, lnew(ln, "{") ? ind + "}" : "\1"); } else if (lnsw(ln, i + 5, "if ")) is_if_ln = true; break; } break; case 'g': if (!out.empty() && lnsw(ln, i, "goto ")) { ssie(skp_push_utl, std::string(ln.substr(i + 5, ln.length() - i - 6)) + ":"); break; } break; case 'r': // check for return if (out_cnt == -1) break; // this function has a dynamic number of outputs, no need for further processing if (lnsw(ln, i, "return ")) { // enough characters left to be the simplest return cond = std::string(&ln[i + 7], ln.length() - i - 8); er = eval(cond, lcls); if (er.has_value()) ret_val = er.value(); else ret_val = -1; // check if already encountered return value (except 0) matches; dynamic if not if (ret_val != 0) { // TODO probably need to check if i am currently skipping push out_cnt = (out_cnt == 0 || out_cnt == ret_val) ? ret_val : -1; if (ret_val == out.size()) { // found a return statement and return value matches output param count ssie(skp_push_utl, "\1skip2end"); } } break; } break; case 'w': if (lnsw(ln, i, "while (")) { // setting loop end, if not already in a loop lp_track[ind] = { idx + (lnew(ln, "{")) - 1, MAX_ITER}; break; } break; } if (is_if_ln) { std::regex_search(ln, match, if_rgx); cond = match[1]; er = eval(cond, lcls); if(!er.has_value() || er.value() != 0) push_track[ind] = true; else { push_track[ind] = false; skp_ass_utl = lnew(ln, "{") ? ind + "}" : "\1"; skp_push_utl = lnew(ln, "{") ? ind + "}" : "\1"; } } if (skp_ass_utl.empty() && std::regex_search(ln, match, ass_rgx) && lcls.count(match[1])) // only process assignment for variables i still care about { // assignment regex matched and local variable with the name is tracked infix = match[2]; auto er = eval(infix, lcls); if (er.has_value()) lcls[match[1]] = er.value(); else lcls.erase(match[1]); } if (!enc_usg) { // usage string can only occur once anyway //if (auto match = ctre::match(ln)) if (std::regex_search(ln, match, usg_rgx)) { // found usg string usg = match[1]; size_t found = -1; // so the first find uses 0 through the increment while ((found = usg.find("\\\"", found + 1)) != std::string::npos) usg.replace(found, 2, "\""); enc_usg = true; ssie(skp_push_utl, "\1"); // sometimes lua_pushfstring is used before lua_error; do not consider this an output! } } if (in_cnt != -1) { prc_varmap_rgx(true, ln, lua_is_rgx, lcls); prc_varmap_rgx(true, ln, lua_to_rgx, lcls); } if (out_cnt != -1 && skp_push_utl.empty() && prc_varmap_rgx(false, ln, lua_push_rgx, lcls)) { std::string lvl = is_if_ln ? ind : ind.substr(0, ind.length() - 2); // do not subtract 1 block level, if this was a simple if ln while (lvl >= " ") { // if a push was found, track it for this and all lower block levels push_track[lvl] = true; lvl = lvl.substr(0, lvl.length() - 2); } } if (!lp_utl.empty() && ln.starts_with(lp_utl)) lp_utl = ""; if (lp_utl == "\1") lp_utl = ""; if (!skp_ass_utl.empty() && ln.starts_with(skp_ass_utl)) skp_ass_utl = ""; if (skp_ass_utl == "\1") skp_ass_utl = ""; if (!skp_push_utl.empty() && ln.starts_with(skp_push_utl)) skp_push_utl = ""; // reset skip since ln has been reached now if (skp_push_utl == "\1") skp_push_utl = ""; // reset temporary skip which was used for 1 ln } cln_varmap(true); // clean inputs cln_varmap(false); // clean outputs chk_vld(); // check validity of parsed data }