refactor to use std::optional instead of ParseResult or EvalResult
big refactor for evaluating local variables etc tracking branches and loops FunctionInfo struct has a constructor for parsing from vector of strings
This commit is contained in:
parent
82f367f068
commit
45dd86e289
|
@ -1,11 +1,206 @@
|
|||
#include "FunctionInfo.hpp"
|
||||
#include "Utility.hpp"
|
||||
#include <algorithm>
|
||||
#include <format>
|
||||
#include <optional>
|
||||
#include <regex>
|
||||
#include <stack>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
std::smatch match;
|
||||
std::regex decl_rgx(R"(^ (?:int|uint|BOOL) (\w+);)"); // currently only considering these types for declarations
|
||||
std::regex ass_rgx(R"(^ +?(\w+) = (.+);)"); // OwO
|
||||
std::regex if_rgx(R"(if \((.+)\) )");
|
||||
std::regex usg_rgx(R"(,"(Usage: [^;]+).?"[, \)])"); // .? is a workaround to prevent raw string from closing
|
||||
std::regex usg_vld_rgx(R"(Usage: .+?\((.*?)\))");
|
||||
std::regex lua_is_rgx(R"(lua_is(.+?)\(L,(\w+?)\))");
|
||||
std::regex lua_to_rgx(R"(lua_to(.+?)\(L,(\w+?)[,\)])");
|
||||
std::regex lua_push_rgx(R"(lua_push(.+?)\(())");
|
||||
std::regex dowhl_cond_rgx(R"(while \((.+)\);)");
|
||||
|
||||
const std::regex usg_vld_rgx(R"(Usage: .+?\((.*?)\))");
|
||||
std::optional<int> prsi_lcls(std::string str, std::unordered_map<std::string, int>& lcls)
|
||||
{ // looks up local variables, before it tries to call prsi
|
||||
if (lcls.count(str)) return lcls[str];
|
||||
return prsi(str);
|
||||
}
|
||||
|
||||
__forceinline std::optional<int> slv_step(std::stack<int>& vals, std::stack<Op>& ops)
|
||||
{
|
||||
int v2 = vals.top();
|
||||
vals.pop();
|
||||
int v1 = vals.top();
|
||||
vals.pop();
|
||||
Op op = ops.top();
|
||||
ops.pop();
|
||||
switch (op)
|
||||
{
|
||||
case LOR:
|
||||
return v1 || v2;
|
||||
case LAND:
|
||||
return v1 && v2;
|
||||
case BOR:
|
||||
return v1 | v2;
|
||||
case BXOR:
|
||||
return v1 ^ v2;
|
||||
case BAND:
|
||||
return v1 & v2;
|
||||
case EQ:
|
||||
return v1 == v2;
|
||||
case UEQ:
|
||||
return v1 != v2;
|
||||
case LT:
|
||||
return v1 < v2;
|
||||
case LTE:
|
||||
return v1 <= v2;
|
||||
case BT:
|
||||
return v1 > v2;
|
||||
case BTE:
|
||||
return v1 >= v2;
|
||||
case PLS:
|
||||
return v1 + v2;
|
||||
case MIN:
|
||||
return v1 - v2;
|
||||
case MUL:
|
||||
return v1 * v2;
|
||||
case DIV:
|
||||
if (v2 == 0) return std::nullopt;
|
||||
return v1 / v2;
|
||||
case MOD:
|
||||
if (v2 == 0) return std::nullopt;
|
||||
return v1 % v2;
|
||||
}
|
||||
return std::nullopt; // unsupported op
|
||||
}
|
||||
|
||||
__forceinline std::optional<int> push_op(std::stack<int>& vals, std::stack<Op>& ops, Op op)
|
||||
{
|
||||
while (ops.size() >= 1 && op_prec(ops.top()) >= op_prec(op))
|
||||
{ // last op has greater or same precedence
|
||||
auto res = slv_step(vals, ops);
|
||||
if (!res.has_value()) return std::nullopt;
|
||||
vals.push(res.value());
|
||||
}
|
||||
ops.push(op);
|
||||
}
|
||||
|
||||
std::regex lua_is_quick(R"(^lua_is\S+?\(L,\S+?\)$)");
|
||||
std::regex sstrcmpi_quick(R"(^SStrCmpI\(\S+?,\S+?,\S+?\)$)");
|
||||
|
||||
std::optional<int> eval(std::string& infix, locals& lcls)
|
||||
{
|
||||
std::smatch match;
|
||||
if (std::regex_search(infix, match, lua_is_quick)) return 1;
|
||||
if (std::regex_search(infix, match, sstrcmpi_quick)) return 1;
|
||||
|
||||
if (infix.find_first_of(' ') == std::string::npos)
|
||||
{ // TODO performance escape hatch for simple infix strings - might actually be counter productive... have to check
|
||||
return prsi_lcls(infix, lcls);
|
||||
}
|
||||
|
||||
std::stack<int> vals;
|
||||
std::stack<Op> ops;
|
||||
int tk_start = 0;
|
||||
std::string tk;
|
||||
bool was_brr;
|
||||
for (int tk_end = 0; tk_end < infix.length(); tk_end++)
|
||||
{
|
||||
was_brr = false;
|
||||
switch (infix[tk_end])
|
||||
{
|
||||
case ' ':
|
||||
tk = infix.substr(tk_start, tk_end - tk_start);
|
||||
tk_start = tk_end + 1;
|
||||
break;
|
||||
case '(':
|
||||
tk = "";
|
||||
ops.push(Op::BRL);
|
||||
tk_start = tk_end + 1;
|
||||
continue;
|
||||
case ')':
|
||||
tk = infix.substr(tk_start, tk_end - tk_start);
|
||||
tk_start = tk_end + 1;
|
||||
was_brr = true;
|
||||
break;
|
||||
}
|
||||
if (tk == "" && tk_end == infix.length() - 1)
|
||||
{ // last token
|
||||
tk = infix.substr(tk_start, tk_end - tk_start + 1);
|
||||
}
|
||||
if (tk == "" && !was_brr) continue; // empty token, nothing to do
|
||||
|
||||
if (tk == "||") push_op(vals, ops, LOR);
|
||||
else if (tk == "&&") push_op(vals, ops, LAND);
|
||||
else if (tk == "|") push_op(vals, ops, BOR);
|
||||
else if (tk == "^") push_op(vals, ops, BXOR);
|
||||
else if (tk == "&") push_op(vals, ops, BAND);
|
||||
else if (tk == "==") push_op(vals, ops, EQ);
|
||||
else if (tk == "!=") push_op(vals, ops, UEQ);
|
||||
else if (tk == "<") push_op(vals, ops, LT);
|
||||
else if (tk == "<=") push_op(vals, ops, LTE);
|
||||
else if (tk == ">") push_op(vals, ops, BT);
|
||||
else if (tk == ">=") push_op(vals, ops, BTE);
|
||||
else if (tk == "+") push_op(vals, ops, PLS);
|
||||
else if (tk == "-") push_op(vals, ops, MIN);
|
||||
else if (tk == "*") push_op(vals, ops, MUL);
|
||||
else if (tk == "/") push_op(vals, ops, DIV);
|
||||
else if (tk == "%") push_op(vals, ops, MOD);
|
||||
else if (tk != "")
|
||||
{
|
||||
auto pr = prsi_lcls(tk, lcls);
|
||||
if (!pr.has_value()) return std::nullopt;
|
||||
vals.push(pr.value());
|
||||
}
|
||||
if (was_brr)
|
||||
{
|
||||
while (ops.top() != BRL)
|
||||
{
|
||||
auto res = slv_step(vals, ops);
|
||||
if (!res.has_value()) return std::nullopt;
|
||||
vals.push(res.value());
|
||||
}
|
||||
ops.pop(); // popping left brace
|
||||
}
|
||||
tk = "";
|
||||
}
|
||||
while (!ops.empty())
|
||||
{
|
||||
auto res = slv_step(vals, ops);
|
||||
if (!res.has_value()) return std::nullopt;
|
||||
vals.push(res.value());
|
||||
}
|
||||
if (vals.size() != 1 || !ops.empty()) return std::nullopt;
|
||||
return vals.top();
|
||||
}
|
||||
|
||||
bool FunctionInfo::prc_varmap_rgx(bool prc_in, const std::string& ln_in, std::regex& rgx, locals& lcls)
|
||||
{
|
||||
std::string ln = ln_in;
|
||||
bool fnd = false;
|
||||
varmap& params = prc_in ? in : out;
|
||||
while (std::regex_search(ln, match, rgx))
|
||||
{
|
||||
fnd = true;
|
||||
int lua_idx;
|
||||
std::string lua_type;
|
||||
lua_type = match[1];
|
||||
if (lua_type == "lstring") lua_type = "string"; // lstring is a string!
|
||||
else if (lua_type == "fstring") lua_type = "string"; // frsting is a string!
|
||||
if (&rgx == &lua_push_rgx)
|
||||
{ // push parsing uses a global index starting with 1
|
||||
lua_idx = out.size() + 1;
|
||||
}
|
||||
else lua_idx = prsi_lcls(match[2], lcls).value();
|
||||
if (lua_idx == -1)
|
||||
{ // -1 means there wasn't a literal used for accessing the index, so i can not parse it
|
||||
if (&rgx == &lua_push_rgx) out_cnt = -1;
|
||||
else in_cnt = -1; // TODO might be possible to parse when evaluating variables
|
||||
return fnd;
|
||||
}
|
||||
params[lua_idx].push_back(lua_type); // always push lua type for now
|
||||
ln = match.suffix();
|
||||
}
|
||||
return fnd;
|
||||
}
|
||||
|
||||
void FunctionInfo::chk_vld()
|
||||
{ // run all checks, so we have the full picture
|
||||
|
@ -46,19 +241,10 @@ void FunctionInfo::chk_vld()
|
|||
prs_vld = vld;
|
||||
}
|
||||
|
||||
bool FunctionInfo::nil_in_varmap(bool proc_in) const
|
||||
void FunctionInfo::cln_varmap(bool prc_in)
|
||||
{
|
||||
const varmap& params = proc_in ? in : out;
|
||||
for (auto& [key, value] : params)
|
||||
if (std::find(value.begin(), value.end(), "nil") != value.end())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void FunctionInfo::cln_varmap(bool proc_in)
|
||||
{
|
||||
varmap& params = proc_in ? in : out;
|
||||
int cnt = proc_in ? in_cnt : out_cnt;
|
||||
varmap& params = prc_in ? in : out;
|
||||
int cnt = prc_in ? in_cnt : out_cnt;
|
||||
if (cnt == -1)
|
||||
{
|
||||
params.clear(); // dynamic varmap does not need entries
|
||||
|
@ -69,7 +255,16 @@ void FunctionInfo::cln_varmap(bool proc_in)
|
|||
std::sort(lua_types.begin(), lua_types.end());
|
||||
lua_types.erase(std::unique(lua_types.begin(), lua_types.end()), lua_types.end());
|
||||
}
|
||||
if (proc_in) in_cnt = in.size(); // input count can only be inferred by in varmap size
|
||||
if (prc_in) in_cnt = in.size(); // input count can only be inferred by in varmap size
|
||||
}
|
||||
|
||||
bool FunctionInfo::nil_in_varmap(bool prc_in) const
|
||||
{
|
||||
const varmap& params = prc_in ? in : out;
|
||||
for (auto& [key, value] : params)
|
||||
if (std::find(value.begin(), value.end(), "nil") != value.end())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string FunctionInfo::str() const
|
||||
|
@ -82,9 +277,9 @@ std::string FunctionInfo::str() const
|
|||
return str;
|
||||
}
|
||||
|
||||
std::string FunctionInfo::str_varmap(bool proc_in) const
|
||||
std::string FunctionInfo::str_varmap(bool prc_in) const
|
||||
{
|
||||
int cnt = proc_in ? in_cnt : out_cnt;
|
||||
int cnt = prc_in ? in_cnt : out_cnt;
|
||||
switch (cnt)
|
||||
{
|
||||
case -1:
|
||||
|
@ -92,7 +287,7 @@ std::string FunctionInfo::str_varmap(bool proc_in) const
|
|||
case 0:
|
||||
return "0 ()";
|
||||
}
|
||||
const varmap& params = proc_in ? in : out;
|
||||
const varmap& params = prc_in ? in : out;
|
||||
if (params.size() > 0)
|
||||
{ // cnt and params.size() might differ - lua_push* calls can be undetected
|
||||
std::string str = std::to_string(cnt) + " (";
|
||||
|
@ -108,3 +303,224 @@ std::string FunctionInfo::str_varmap(bool proc_in) const
|
|||
}
|
||||
return std::to_string(cnt) + " ()";
|
||||
}
|
||||
|
||||
FunctionInfo::FunctionInfo()
|
||||
{
|
||||
}
|
||||
|
||||
FunctionInfo::FunctionInfo(std::vector<std::string> src)
|
||||
{
|
||||
addr = std::stoi(&src[0][13], 0, 16); // no use of prsi - values occur once -> no caching wanted
|
||||
nm = src[1].substr(13, src[1].find_first_of('(') - 13);
|
||||
|
||||
std::string ind; // indentation - keeping track of current block level
|
||||
std::string lp_utl = ""; // currently in a loop until this line is reached
|
||||
std::string skp_ass_utl = "";
|
||||
std::string skp_push_utl = ""; // skip push parsing until this ln has been reached
|
||||
std::string cond;
|
||||
std::optional<int> er; // eval result
|
||||
bool is_if_ln;
|
||||
bool prc_decl = true; // process variable declarations
|
||||
bool enc_usg = false; // encountered usg
|
||||
uint ret_val; // return value
|
||||
std::string infix;
|
||||
lp_track lp_track; // keeps track where the loop started
|
||||
std::unordered_map<std::string, bool> push_track; // keeps track if on this indent level a push has happened
|
||||
locals lcls; // local variables
|
||||
for (int idx = 3; idx < src.size(); idx++) // skip right to the lines which matter
|
||||
{
|
||||
std::string& ln = src[idx];
|
||||
// reset line tracking variables
|
||||
is_if_ln = false;
|
||||
|
||||
if (prc_decl)
|
||||
{ // if local variable definition parsing is enabled
|
||||
if (ln == " ")
|
||||
{ // reached end variable definition block, no further processing required
|
||||
prc_decl = false;
|
||||
continue;
|
||||
}
|
||||
if (std::regex_search(ln, match, decl_rgx)) lcls[match[1]] = 0; // track with init value 0
|
||||
continue; // no need to do further processing
|
||||
}
|
||||
int i = 0;
|
||||
while (i < ln.length())
|
||||
{
|
||||
if (ln[i] != ' ')
|
||||
{
|
||||
ind = ln.substr(0, i);
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
switch (ln[i])
|
||||
{
|
||||
case '}':
|
||||
if (ln == ind + '}')
|
||||
{ // block end, maybe simple while or for loop end
|
||||
if (lp_track.find(ind) != lp_track.end())
|
||||
lp_track.erase(ind); // reached loop end
|
||||
break;
|
||||
}
|
||||
if (std::regex_search(ln, match, dowhl_cond_rgx))
|
||||
{
|
||||
lp_track[ind].iter--;
|
||||
if (lp_track[ind].iter < 0)
|
||||
{ // max iterations exceeded
|
||||
prs_msg.push_back(std::format("max iterations exceeded in: {}", idx + 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
infix = match[1];
|
||||
auto er = eval(infix, lcls);
|
||||
if (er.has_value() && er.value() != 0)
|
||||
{ // condition was parsable and evaluated to true
|
||||
|
||||
idx = lp_track[ind].idx;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (lp_track.find(ind) != lp_track.end()) lp_track.erase(ind);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
case 'd':
|
||||
if (lnsw(ln, i, "do {"))
|
||||
{ // setting loop end, if not already in a loop
|
||||
lp_track[ind] = { idx + (lnew(ln, "{")) - 1, MAX_ITER};
|
||||
break;
|
||||
}
|
||||
if (lnsw(ln, i, "case ") || lnsw(ln, i, "default:"))
|
||||
{
|
||||
if (push_track[ind]) ssie(skp_push_utl, ind + "}");
|
||||
else push_track[ind] = false; // only execute if a previous case didn't already contain a push
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'f':
|
||||
if (lnsw(ln, i, "for ("))
|
||||
{
|
||||
lp_track[ind] = { idx + (lnew(ln, "{")) - 1, MAX_ITER};
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'i': // check for if
|
||||
if (lnsw(ln, i, "if "))
|
||||
{ // enough characters left to be if statement
|
||||
is_if_ln = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
if (lnsw(ln, i, "else "))
|
||||
{ // enough characters left to be if statement
|
||||
if (push_track[ind])
|
||||
{
|
||||
ssie(skp_ass_utl, lnew(ln, "{") ? ind + "}" : "\1");
|
||||
ssie(skp_push_utl, lnew(ln, "{") ? ind + "}" : "\1");
|
||||
}
|
||||
else if (lnsw(ln, i + 5, "if ")) is_if_ln = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'g':
|
||||
if (!out.empty() && lnsw(ln, i, "goto "))
|
||||
{
|
||||
ssie(skp_push_utl, std::string(ln.substr(i + 5, ln.length() - i - 6)) + ":");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'r': // check for return
|
||||
if (out_cnt == -1) break; // this function has a dynamic number of outputs, no need for further processing
|
||||
if (lnsw(ln, i, "return "))
|
||||
{ // enough characters left to be the simplest return
|
||||
cond = std::string(&ln[i + 7], ln.length() - i - 8);
|
||||
er = eval(cond, lcls);
|
||||
if (er.has_value()) ret_val = er.value();
|
||||
else ret_val = -1;
|
||||
// check if already encountered return value (except 0) matches; dynamic if not
|
||||
if (ret_val != 0)
|
||||
{ // TODO probably need to check if i am currently skipping push
|
||||
out_cnt = (out_cnt == 0 || out_cnt == ret_val) ? ret_val : -1;
|
||||
if (ret_val == out.size())
|
||||
{ // found a return statement and return value matches output param count
|
||||
ssie(skp_push_utl, "\1skip2end");
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'w':
|
||||
if (lnsw(ln, i, "while ("))
|
||||
{ // setting loop end, if not already in a loop
|
||||
lp_track[ind] = { idx + (lnew(ln, "{")) - 1, MAX_ITER};
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_if_ln)
|
||||
{
|
||||
std::regex_search(ln, match, if_rgx);
|
||||
cond = match[1];
|
||||
er = eval(cond, lcls);
|
||||
if(!er.has_value() || er.value() != 0) push_track[ind] = true;
|
||||
else
|
||||
{
|
||||
push_track[ind] = false;
|
||||
skp_ass_utl = lnew(ln, "{") ? ind + "}" : "\1";
|
||||
skp_push_utl = lnew(ln, "{") ? ind + "}" : "\1";
|
||||
}
|
||||
}
|
||||
|
||||
if (skp_ass_utl.empty() && std::regex_search(ln, match, ass_rgx) && lcls.count(match[1])) // only process assignment for variables i still care about
|
||||
{ // assignment regex matched and local variable with the name is tracked
|
||||
infix = match[2];
|
||||
auto er = eval(infix, lcls);
|
||||
if (er.has_value()) lcls[match[1]] = er.value();
|
||||
else lcls.erase(match[1]);
|
||||
}
|
||||
|
||||
if (!enc_usg)
|
||||
{ // usage string can only occur once anyway
|
||||
//if (auto match = ctre::match<usage_regex>(ln))
|
||||
if (std::regex_search(ln, match, usg_rgx))
|
||||
{ // found usg string
|
||||
usg = match[1];
|
||||
size_t found = -1; // so the first find uses 0 through the increment
|
||||
while ((found = usg.find("\\\"", found + 1)) != std::string::npos) usg.replace(found, 2, "\"");
|
||||
enc_usg = true;
|
||||
ssie(skp_push_utl, "\1"); // sometimes lua_pushfstring is used before lua_error; do not consider this an output!
|
||||
}
|
||||
}
|
||||
|
||||
if (in_cnt != -1)
|
||||
{
|
||||
prc_varmap_rgx(true, ln, lua_is_rgx, lcls);
|
||||
prc_varmap_rgx(true, ln, lua_to_rgx, lcls);
|
||||
}
|
||||
if (out_cnt != -1 && skp_push_utl.empty() && prc_varmap_rgx(false, ln, lua_push_rgx, lcls))
|
||||
{
|
||||
std::string lvl = is_if_ln ? ind : ind.substr(0, ind.length() - 2); // do not subtract 1 block level, if this was a simple if ln
|
||||
while (lvl >= " ")
|
||||
{ // if a push was found, track it for this and all lower block levels
|
||||
push_track[lvl] = true;
|
||||
lvl = lvl.substr(0, lvl.length() - 2);
|
||||
}
|
||||
}
|
||||
|
||||
if (!lp_utl.empty() && ln.starts_with(lp_utl)) lp_utl = "";
|
||||
if (lp_utl == "\1") lp_utl = "";
|
||||
if (!skp_ass_utl.empty() && ln.starts_with(skp_ass_utl)) skp_ass_utl = "";
|
||||
if (skp_ass_utl == "\1") skp_ass_utl = "";
|
||||
if (!skp_push_utl.empty() && ln.starts_with(skp_push_utl)) skp_push_utl = ""; // reset skip since ln has been reached now
|
||||
if (skp_push_utl == "\1") skp_push_utl = ""; // reset temporary skip which was used for 1 ln
|
||||
|
||||
}
|
||||
|
||||
cln_varmap(true); // clean input varmap
|
||||
cln_varmap(false); // clean output varmap
|
||||
chk_vld(); // check validity of parsed data
|
||||
int i = 0;
|
||||
}
|
||||
|
|
|
@ -1,13 +1,29 @@
|
|||
#pragma once
|
||||
#include "Utility.hpp"
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
const int MAX_ITER = 256;
|
||||
|
||||
struct LoopInfo
|
||||
{
|
||||
int idx;
|
||||
int iter;
|
||||
};
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef std::unordered_map<std::string, LoopInfo> lp_track;
|
||||
typedef std::unordered_map<std::string, int> locals;
|
||||
typedef std::map<int, std::vector<std::string>> varmap;
|
||||
|
||||
struct FunctionInfo
|
||||
{
|
||||
std::smatch match;
|
||||
|
||||
uint addr = 0;
|
||||
std::string nm = "";
|
||||
std::string usg = "";
|
||||
|
@ -17,9 +33,16 @@ struct FunctionInfo
|
|||
int out_cnt = 0;
|
||||
std::vector<std::string> prs_msg;
|
||||
bool prs_vld = false;
|
||||
|
||||
void chk_vld();
|
||||
void cln_varmap(bool proc_in);
|
||||
void cln_varmap(bool prc_in);
|
||||
bool nil_in_varmap(bool prc_in) const;
|
||||
bool prc_varmap_rgx(bool prc_in, const std::string& ln_in, std::regex& rgx, locals& lcls);
|
||||
std::string str() const;
|
||||
std::string str_varmap(bool proc_in) const;
|
||||
bool nil_in_varmap(bool proc_in) const;
|
||||
std::string str_varmap(bool prc_in) const;
|
||||
|
||||
FunctionInfo();
|
||||
FunctionInfo(std::vector<std::string> src);
|
||||
};
|
||||
|
||||
std::optional<int> eval(std::string& infix, locals& lcls);
|
||||
|
|
|
@ -1,18 +1,13 @@
|
|||
#include "FunctionInfo.hpp"
|
||||
#include "Utility.hpp"
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
|
||||
std::string fl_path;
|
||||
|
||||
bool dbg;
|
||||
bool ivld_only;
|
||||
bool use_all = true;
|
||||
|
@ -25,54 +20,12 @@ bool use_all = true;
|
|||
* 123 ivld
|
||||
*/
|
||||
|
||||
|
||||
#define PL(msg) std::cout << msg << '\n' // print line
|
||||
#define PDBG(msg) if(!dbg) PL(msg) // print line if debug flag is set
|
||||
|
||||
std::smatch match1; // TODO remove later
|
||||
|
||||
std::regex usage_regex(R"(,"(Usage: [^;]+).?"[, \)])"); // .? is just a workaround, since )" in the raw string would terminate it immediately
|
||||
std::regex lua_push_regex(R"(lua_push(.+?)\(())");
|
||||
std::regex lua_is_regex(R"(lua_is(.+?)\(L,(\w+?)\))");
|
||||
std::regex lua_to_regex(R"(lua_to(.+?)\(L,(\w+?)[,\)])");
|
||||
std::regex var_decl_regex(R"(^ (?:int|uint|BOOL) (\w+);)"); // currently only considering these types for declarations
|
||||
std::regex var_ass_regex(R"(^ +?(\w+?) = (\w+) ?(.)? ?(\w+?){0,1};)");
|
||||
|
||||
FunctionInfo fi;
|
||||
int lua_index;
|
||||
std::string lua_type;
|
||||
int out_index;
|
||||
std::unordered_map<std::string, FunctionInfo> fmap;
|
||||
std::chrono::steady_clock::time_point tstart;
|
||||
std::chrono::steady_clock::time_point tend;
|
||||
|
||||
bool process_varmap_regex(std::map<int, std::vector<std::string>>& varmap, std::string ln, std::regex* regex)
|
||||
{
|
||||
bool found = false;
|
||||
while (std::regex_search(ln, match1, *regex))
|
||||
{
|
||||
found = true;
|
||||
lua_type = match1[1];
|
||||
if (lua_type == "lstring") lua_type = "string"; // lstring is a string!
|
||||
else if (lua_type == "fstring") lua_type = "string"; // frsting is a string!
|
||||
if (regex == &lua_push_regex)
|
||||
{ // push parsing uses a global index starting with 1
|
||||
lua_index = out_index;
|
||||
out_index++;
|
||||
}
|
||||
else lua_index = prsi(match1[2]).val;
|
||||
if (lua_index == -1)
|
||||
{ // -1 means there wasn't a literal used for accessing the index, so i can not parse it
|
||||
if (regex == &lua_push_regex) fi.out_cnt = -1;
|
||||
else fi.in_cnt = -1; // TODO might be possible to parse when evaluating variables
|
||||
return found;
|
||||
}
|
||||
varmap[lua_index].push_back(lua_type); // always push lua type for now
|
||||
ln = match1.suffix();
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
void pfmap(bool ivld_only)
|
||||
{
|
||||
for (const auto& [name, fi] : fmap)
|
||||
|
@ -116,8 +69,9 @@ int main()
|
|||
* skip scanning for output params, if unparsable index (not into int) has been found for lua_push* (dynamic outputs)
|
||||
* skip scanning for output params, if returns with different non-0 values have been found (or it can not be parsed as int)
|
||||
*/
|
||||
|
||||
tstart = std::chrono::high_resolution_clock::now();
|
||||
|
||||
std::string fl_path;
|
||||
|
||||
if (use_all)
|
||||
{
|
||||
dbg = true;
|
||||
|
@ -131,219 +85,36 @@ int main()
|
|||
fl_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.test.txt)";
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::ifstream source_file(fl_path);
|
||||
if (!source_file.is_open())
|
||||
{
|
||||
std::cerr << "Unable to open file " << fl_path << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::unordered_map<std::string, int> func_vars;
|
||||
|
||||
std::string skp_push_utl = ""; // skip push parsing until this ln has been reached
|
||||
std::string lp_utl = "";
|
||||
std::unordered_map<std::string, bool> push_track;
|
||||
|
||||
bool in_func = false;
|
||||
bool enc_usg = false; // encountered usg
|
||||
bool prc_def = false;
|
||||
uint ret_val; // return value
|
||||
bool is_if_ln;
|
||||
std::string ind; // indent
|
||||
tstart = std::chrono::high_resolution_clock::now();
|
||||
bool rec_src = false; // record source
|
||||
std::vector<std::string> src;
|
||||
std::string ln;
|
||||
|
||||
std::vector<char>::iterator eol;
|
||||
int cnt = 0;
|
||||
while (std::getline(source_file, ln))
|
||||
{
|
||||
if (ln.empty()) continue; // skip empty lines
|
||||
if (ln.starts_with("/*")) continue; // skip block comment lines
|
||||
if (!in_func)
|
||||
{ // searching for next function
|
||||
if (ln.starts_with("// ADDRESS - "))
|
||||
{ // found addr ln - create new FunctionInfo and reset variables
|
||||
fi = {};
|
||||
fi.addr = std::stoi(&ln[13], 0, 16); // do not use prsi, these values prb occur only once, so no caching wanted
|
||||
out_index = 0;
|
||||
enc_usg = false;
|
||||
lp_utl = "";
|
||||
skp_push_utl = "";
|
||||
push_track = {};
|
||||
func_vars = {};
|
||||
continue;
|
||||
}
|
||||
if (ln.starts_with("uint lua_wow_"))
|
||||
{ // found ln with function signature
|
||||
fi.nm = ln.substr(13, ln.find_first_of("(") - 13); // get part of real function name
|
||||
in_func = true; // start function parsing from now on
|
||||
continue;
|
||||
}
|
||||
if (ln.starts_with("// ADDRESS - "))
|
||||
{
|
||||
rec_src = true; // found starting line of function soure
|
||||
}
|
||||
else
|
||||
{ // processing current function
|
||||
is_if_ln = false;
|
||||
if (ln == "{")
|
||||
{ // this is the starting block of the function
|
||||
prc_def = true; // enable regex handling for local variable definitions
|
||||
continue;
|
||||
}
|
||||
if (ln == "}")
|
||||
{ // end of function found
|
||||
in_func = false;
|
||||
fi.cln_varmap(true);
|
||||
fi.cln_varmap(false);
|
||||
fi.chk_vld();
|
||||
fmap[fi.nm] = fi;
|
||||
continue;
|
||||
}
|
||||
if (prc_def)
|
||||
{ // if local variable definition parsing is enabled
|
||||
if (ln == " ")
|
||||
{ // reached end variable definition block, no further processing required
|
||||
prc_def = false;
|
||||
continue;
|
||||
}
|
||||
if (std::regex_search(ln, match1, var_decl_regex))
|
||||
{ // found local variable, track it with init value 0
|
||||
func_vars[match1[1]] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
int i = 0;
|
||||
while (i < ln.length())
|
||||
{
|
||||
if (ln[i] != ' ')
|
||||
{
|
||||
ind = ln.substr(0, i);
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
switch (ln[i])
|
||||
{
|
||||
case 'c':
|
||||
case 'd':
|
||||
if (lnsw(ln, i, "do {"))
|
||||
{ // setting loop end, if not already in a loop
|
||||
ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln do-while loop (who would even program such a thing!?
|
||||
break;
|
||||
}
|
||||
if (lnsw(ln, i, "case ") || lnsw(ln, i, "default:"))
|
||||
{
|
||||
if (push_track[ind]) ssie(skp_push_utl, ind + "}");
|
||||
else push_track[ind] = false; // only execute if a previous case didn't already contain a push
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'f':
|
||||
if (lnsw(ln, i, "for ("))
|
||||
{
|
||||
ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln for loop
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'i': // check for if
|
||||
if (lnsw(ln, i, "if "))
|
||||
{ // enough characters left to be if statement
|
||||
is_if_ln = true;
|
||||
push_track[ind] = false;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
if (lnsw(ln, i, "else "))
|
||||
{ // enough characters left to be if statement
|
||||
if (push_track[ind]) ssie(skp_push_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'g':
|
||||
if (!fi.out.empty() && lnsw(ln, i, "goto "))
|
||||
{
|
||||
ssie(skp_push_utl, std::string(ln.substr(i + 5, ln.length() - i - 6)) + ":");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'r': // check for return
|
||||
if (fi.out_cnt == -1) break; // this function has a dynamic number of outputs, no need for further processing
|
||||
if (lnsw(ln, i, "return "))
|
||||
{ // enough characters left to be the simplest return
|
||||
ret_val = prsi(std::string(&ln[i + 7], ln.length() - i - 8)).val;
|
||||
// check if already encountered return value (except 0) matches; dynamic if not
|
||||
if (ret_val != 0)
|
||||
{ // TODO probably need to check if i am currently skipping push
|
||||
fi.out_cnt = (fi.out_cnt == 0 || fi.out_cnt == ret_val) ? ret_val : -1;
|
||||
if (ret_val == fi.out.size())
|
||||
{ // found a return statement and return value matches output param count
|
||||
ssie(skp_push_utl, "\1skip2end");
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'w':
|
||||
if (lnsw(ln, i, "while ("))
|
||||
{ // setting loop end, if not already in a loop
|
||||
ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln while loop
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
//pdbg(ln);
|
||||
//pdbg(ind + "<-");
|
||||
//if (auto match = ctre::match<var_ass_regex>(ln))
|
||||
|
||||
if (std::regex_search(ln, match1, var_ass_regex))
|
||||
{
|
||||
|
||||
int i = 0;
|
||||
//auto test = match.get<1>();
|
||||
//pdbg("variable assignment happens for " + match.get<1>().to_string());
|
||||
}
|
||||
|
||||
if (!enc_usg)
|
||||
{ // usage string can only occur once anyway
|
||||
//if (auto match = ctre::match<usage_regex>(ln))
|
||||
if (std::regex_search(ln, match1, usage_regex))
|
||||
{ // found usg string
|
||||
fi.usg = match1[1];
|
||||
size_t found = -1; // so the first find uses 0 through the increment
|
||||
while ((found = fi.usg.find("\\\"", found + 1)) != std::string::npos) fi.usg.replace(found, 2, "\"");
|
||||
enc_usg = true;
|
||||
skp_push_utl = !skp_push_utl.empty() ? skp_push_utl : "\1"; // sometimes lua_pushfstring is used before lua_error; do not consider this an output!
|
||||
}
|
||||
}
|
||||
|
||||
if (fi.in_cnt != -1)
|
||||
{
|
||||
process_varmap_regex(fi.in, ln, &lua_is_regex);
|
||||
process_varmap_regex(fi.in, ln, &lua_to_regex);
|
||||
}
|
||||
if (fi.out_cnt != -1 && skp_push_utl.empty() && process_varmap_regex(fi.out, ln, &lua_push_regex))
|
||||
{
|
||||
if (!lp_utl.empty())
|
||||
{ // found lua_push* inside a loop
|
||||
fi.out_cnt = -1;
|
||||
}
|
||||
else
|
||||
{ // normal processing
|
||||
|
||||
std::string lvl = is_if_ln ? ind : ind.substr(0, ind.length() - 2); // do not subtract 1 block level, if this was a simple if ln
|
||||
while (lvl >= " ")
|
||||
{ // if a push was found, track it for this and all lower block levels
|
||||
push_track[lvl] = true;
|
||||
lvl = lvl.substr(0, lvl.length() - 2);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!lp_utl.empty() && ln.starts_with(lp_utl)) lp_utl = "";
|
||||
if (lp_utl == "\1") lp_utl = "";
|
||||
if (!skp_push_utl.empty() && ln.starts_with(skp_push_utl)) skp_push_utl = ""; // reset skip since ln has been reached now
|
||||
if (skp_push_utl == "\1") skp_push_utl = ""; // reset temporary skip which was used for 1 ln
|
||||
if (rec_src) src.push_back(ln);
|
||||
if (ln == "}")
|
||||
{ // end of function code reached
|
||||
auto fi = FunctionInfo(src);
|
||||
fmap[fi.nm] = fi; // there is no handling for duplicate entries bc of performance - there shoulnd't be any anyway
|
||||
//PL(fi.nm);
|
||||
cnt++;
|
||||
src.clear();
|
||||
rec_src = false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
tend = std::chrono::high_resolution_clock::now();
|
||||
|
|
|
@ -6,26 +6,26 @@
|
|||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
std::unordered_map<std::string, ParseResult> prs_cache
|
||||
{
|
||||
{ "FALSE", { true, 0 } },
|
||||
{ "TRUE", { true, 1 } },
|
||||
std::unordered_map<std::string, std::optional<int>> prs_cache
|
||||
{ // initialize with constants
|
||||
{ "FALSE", 0 },
|
||||
{ "TRUE", 1 },
|
||||
};
|
||||
|
||||
ParseResult prsi(std::string str)
|
||||
std::optional<int> prsi(std::string str)
|
||||
{
|
||||
auto found = prs_cache.find(str);
|
||||
if (found != prs_cache.end()) return found->second; // cache hit
|
||||
try
|
||||
{ // cache miss, trying to parse and caching the result
|
||||
int v;
|
||||
if (str.starts_with("0x")) v = std::stoi(str.data(), 0, 16); // hex parsing
|
||||
else v = std::stoi(str.data());
|
||||
prs_cache[str] = { true, v };
|
||||
if (str.starts_with("0x")) v = std::stoul(str.data(), 0, 16); // hex parsing
|
||||
else v = std::stoul(str.data());
|
||||
prs_cache[str] = v;
|
||||
}
|
||||
catch (std::invalid_argument e)
|
||||
{ // was not parsable, cache negative parse result
|
||||
prs_cache[str] = { false, -1 }; // value doesn't really matter here
|
||||
prs_cache[str] = std::nullopt; // value doesn't matter
|
||||
}
|
||||
return prs_cache[str];
|
||||
}
|
||||
|
@ -49,6 +49,11 @@ std::vector<char>* lf(const char* path)
|
|||
return vec;
|
||||
}
|
||||
|
||||
bool lnew(std::string& ln, const char* ew)
|
||||
{
|
||||
return ln.ends_with(ew);
|
||||
}
|
||||
|
||||
bool lnsw(std::string& ln, int idx, const char* sw)
|
||||
{
|
||||
int len = strlen(sw);
|
||||
|
|
|
@ -1,16 +1,71 @@
|
|||
#pragma once
|
||||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
struct ParseResult
|
||||
{
|
||||
bool prsbl; // was parsable
|
||||
int val; // value which it was parsed into
|
||||
#define PL(msg) std::cout << msg << '\n' // print line
|
||||
|
||||
enum Op
|
||||
{ // https://en.cppreference.com/w/cpp/language/operator_precedence
|
||||
BRL,
|
||||
BRR,
|
||||
LOR,
|
||||
LAND,
|
||||
BOR,
|
||||
BXOR,
|
||||
BAND,
|
||||
EQ,
|
||||
UEQ,
|
||||
LT,
|
||||
LTE,
|
||||
BT,
|
||||
BTE,
|
||||
PLS,
|
||||
MIN,
|
||||
MUL,
|
||||
DIV,
|
||||
MOD,
|
||||
};
|
||||
|
||||
ParseResult prsi(std::string str);
|
||||
__forceinline int op_prec(Op op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case BRL:
|
||||
return -9000;
|
||||
case LOR:
|
||||
return -15;
|
||||
case LAND:
|
||||
return -14;
|
||||
case BOR:
|
||||
return -13;
|
||||
case BXOR:
|
||||
return -12;
|
||||
case BAND:
|
||||
return -11;
|
||||
case EQ:
|
||||
case UEQ:
|
||||
return -10;
|
||||
case LT:
|
||||
case LTE:
|
||||
case BT:
|
||||
case BTE:
|
||||
return -9;
|
||||
case PLS:
|
||||
case MIN:
|
||||
return -6;
|
||||
case MUL:
|
||||
case DIV:
|
||||
case MOD:
|
||||
return -5;
|
||||
}
|
||||
return -1; // unsupporeted op
|
||||
}
|
||||
|
||||
std::optional<int> prsi(std::string str);
|
||||
std::vector<char>* lf(std::string& path);
|
||||
std::vector<char>* lf(const char* path);
|
||||
bool lnew(std::string& ln, const char* ew);
|
||||
bool lnsw(std::string& ln, int idx, const char* sw);
|
||||
void ssie(std::string& str, std::string repl);
|
||||
void pdbg(std::string& msg);
|
Loading…
Reference in New Issue