lots of refactoring...
new FunctionInfo files - most functions ware now a member of FunctionInfo stable version before refactoring for loop parsing
This commit is contained in:
parent
cee06594d9
commit
82f367f068
|
@ -0,0 +1,110 @@
|
|||
#include "FunctionInfo.hpp"
|
||||
#include <algorithm>
|
||||
#include <format>
|
||||
#include <regex>
|
||||
|
||||
std::smatch match;
|
||||
|
||||
const std::regex usg_vld_rgx(R"(Usage: .+?\((.*?)\))");
|
||||
|
||||
void FunctionInfo::chk_vld()
|
||||
{ // run all checks, so we have the full picture
|
||||
bool vld = true;
|
||||
if (prs_msg.size() > 0) vld = false; // parsing messages up until this point are hard errors
|
||||
if (in_cnt > -1)
|
||||
{ // in varmap is not dynamic
|
||||
if (!usg.empty())
|
||||
{ // check against usage string
|
||||
if (std::regex_search(usg, match, usg_vld_rgx))
|
||||
{
|
||||
std::string usg_params = match[1];
|
||||
int comma_cnt = 0;
|
||||
for (char c : usg_params) comma_cnt += c == ',';
|
||||
if (in_cnt == 0 && !usg_params.empty() || in_cnt != comma_cnt + 1)
|
||||
{
|
||||
prs_msg.push_back(std::format("in param count ({}) does not match usage string ({})", in_cnt, !usg_params.empty() * (comma_cnt + 1)));
|
||||
vld = false;
|
||||
}
|
||||
}
|
||||
else prs_msg.push_back("usage string is malformed"); // do not consider this for validity
|
||||
}
|
||||
for (int i = 1; i <= in_cnt; i++)
|
||||
{ // lua indexes start with 1
|
||||
if (!in.contains(i))
|
||||
{
|
||||
prs_msg.push_back("in param index not in order");
|
||||
vld = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (out_cnt != -1 && out_cnt != out.size())
|
||||
{
|
||||
prs_msg.push_back(std::format("out param count ({}) does not match return value ({})", out.size(), out_cnt));
|
||||
vld = false;
|
||||
}
|
||||
prs_vld = vld;
|
||||
}
|
||||
|
||||
bool FunctionInfo::nil_in_varmap(bool proc_in) const
|
||||
{
|
||||
const varmap& params = proc_in ? in : out;
|
||||
for (auto& [key, value] : params)
|
||||
if (std::find(value.begin(), value.end(), "nil") != value.end())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void FunctionInfo::cln_varmap(bool proc_in)
|
||||
{
|
||||
varmap& params = proc_in ? in : out;
|
||||
int cnt = proc_in ? in_cnt : out_cnt;
|
||||
if (cnt == -1)
|
||||
{
|
||||
params.clear(); // dynamic varmap does not need entries
|
||||
return;
|
||||
}
|
||||
for (auto& [lua_index, lua_types] : params)
|
||||
{
|
||||
std::sort(lua_types.begin(), lua_types.end());
|
||||
lua_types.erase(std::unique(lua_types.begin(), lua_types.end()), lua_types.end());
|
||||
}
|
||||
if (proc_in) in_cnt = in.size(); // input count can only be inferred by in varmap size
|
||||
}
|
||||
|
||||
std::string FunctionInfo::str() const
|
||||
{
|
||||
std::string str = std::format("{:X} {}{}\n", addr, nm, prs_vld ? "" : " (invalid)");
|
||||
if (!usg.empty()) str += " " + usg + "\n";
|
||||
str += " in: " + str_varmap(true) + "\n";
|
||||
str += " out: " + str_varmap(false);
|
||||
for (const std::string& err : prs_msg) str += "\n " + err;
|
||||
return str;
|
||||
}
|
||||
|
||||
std::string FunctionInfo::str_varmap(bool proc_in) const
|
||||
{
|
||||
int cnt = proc_in ? in_cnt : out_cnt;
|
||||
switch (cnt)
|
||||
{
|
||||
case -1:
|
||||
return "dynamic";
|
||||
case 0:
|
||||
return "0 ()";
|
||||
}
|
||||
const varmap& params = proc_in ? in : out;
|
||||
if (params.size() > 0)
|
||||
{ // cnt and params.size() might differ - lua_push* calls can be undetected
|
||||
std::string str = std::to_string(cnt) + " (";
|
||||
for (const auto& [lua_index, lua_types] : params)
|
||||
{
|
||||
for (const auto& lua_type : lua_types) str += lua_type + "/";
|
||||
str.pop_back(); // remove last slash
|
||||
str += ", ";
|
||||
}
|
||||
str[str.length() - 2] = ')'; // replace last comma with closing bracket
|
||||
str.pop_back(); // remove space after last comma
|
||||
return str;
|
||||
}
|
||||
return std::to_string(cnt) + " ()";
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
#pragma once
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef std::map<int, std::vector<std::string>> varmap;
|
||||
|
||||
struct FunctionInfo
|
||||
{
|
||||
uint addr = 0;
|
||||
std::string nm = "";
|
||||
std::string usg = "";
|
||||
varmap in{};
|
||||
int in_cnt = 0;
|
||||
varmap out{};
|
||||
int out_cnt = 0;
|
||||
std::vector<std::string> prs_msg;
|
||||
bool prs_vld = false;
|
||||
void chk_vld();
|
||||
void cln_varmap(bool proc_in);
|
||||
std::string str() const;
|
||||
std::string str_varmap(bool proc_in) const;
|
||||
bool nil_in_varmap(bool proc_in) const;
|
||||
};
|
|
@ -1,3 +1,5 @@
|
|||
#include "FunctionInfo.hpp"
|
||||
#include "Utility.hpp"
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
|
@ -6,174 +8,96 @@
|
|||
#include <queue>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
#include "GhidraParser.hpp"
|
||||
|
||||
std::string file_path;
|
||||
std::string fl_path;
|
||||
bool dbg;
|
||||
bool dbg_prnt_invld_only;
|
||||
bool use_all = true; // 26 invalid parses
|
||||
bool ivld_only;
|
||||
bool use_all = true;
|
||||
/*
|
||||
* 907 cln
|
||||
* 25 ivld
|
||||
*/
|
||||
/*
|
||||
* 1742 cln
|
||||
* 123 ivld
|
||||
*/
|
||||
|
||||
|
||||
#define PL(msg) std::cout << msg << '\n' // print line
|
||||
#define PDBG(msg) if(!dbg) PL(msg) // print line if debug flag is set
|
||||
|
||||
std::smatch match1; // TODO remove later
|
||||
|
||||
std::regex usage_regex(R"(,"(Usage: [^;]+).?"[, \)])"); // .? is just a workaround, since )" in the raw string would terminate it immediately
|
||||
std::regex lua_push_regex(R"(lua_push(.+?)\(())");
|
||||
std::regex lua_is_regex(R"(lua_is(.+?)\(L,(\w+?)\))");
|
||||
std::regex lua_to_regex(R"(lua_to(.+?)\(L,(\w+?)[,\)])");
|
||||
std::regex var_decl_regex(R"(^ (?:int|uint|BOOL) (\w+);)"); // currently only considering these types for declarations
|
||||
std::regex var_ass_regex(R"(^ +?(\w+?) = (\w+) ?(.)? ?(\w+?){0,1};)");
|
||||
|
||||
FunctionInfo fi;
|
||||
std::string func;
|
||||
std::smatch match;
|
||||
std::string lua_type;
|
||||
int lua_index;
|
||||
std::string lua_type;
|
||||
int out_index;
|
||||
std::regex lua_push_regex(R"(lua_push(.+?)\()");
|
||||
std::regex usage_validity_regex(R"(Usage: .+?\((.*?)\))");
|
||||
|
||||
std::unordered_map<std::string, FunctionInfo> fmap;
|
||||
std::chrono::steady_clock::time_point tstart;
|
||||
std::chrono::steady_clock::time_point tend;
|
||||
|
||||
std::string get_varmap_info(std::map<int, std::vector<std::string>> varmap)
|
||||
{
|
||||
if (varmap.size() == 0) return "()";
|
||||
std::string str = "(";
|
||||
for (auto &[key, values] : varmap)
|
||||
{
|
||||
for (auto &value : values)
|
||||
{
|
||||
str += value + "/";
|
||||
}
|
||||
str = str.substr(0, str.length() - 1);
|
||||
str += ", ";
|
||||
}
|
||||
str = str.substr(0, str.length() - 2);
|
||||
str += ")";
|
||||
return str;
|
||||
}
|
||||
|
||||
int parse_int(std::string input)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (input.starts_with("0x")) return std::stoi(input, 0, 16);
|
||||
return std::stoi(input);
|
||||
}
|
||||
catch (std::invalid_argument e) {}
|
||||
return -1; // couldn't parse; -1 is used for dynamic return
|
||||
}
|
||||
|
||||
void pdbg(std::string msg)
|
||||
{
|
||||
if (!dbg) return;
|
||||
std::cout << msg << '\n';
|
||||
}
|
||||
|
||||
void pfi(FunctionInfo& fi)
|
||||
{ // print FunctionInfo
|
||||
char endl = '\n';
|
||||
std::cout << std::format("{:X}", fi.address) << " " << fi.func << (fi.valid_parse ? "" : " (invalid)") << endl;
|
||||
if (!fi.usg.empty()) std::cout << fi.usg << endl;
|
||||
if (fi.param_in_cnt == -1) std::cout << "in: dynamic" << endl;
|
||||
else std::cout << "in: " << (fi.param_in_cnt == 0 ? "0" : std::to_string(fi.param_in_cnt) + " " + get_varmap_info(fi.param_in)) << endl;
|
||||
if (fi.param_out_cnt == -1) std::cout << "out: dynamic" << endl;
|
||||
else std::cout << "out: " << (fi.param_out_cnt == 0 ? "0" : std::to_string(fi.param_out_cnt) + " " + get_varmap_info(fi.param_out)) << endl;
|
||||
for (auto &error : fi.parse_errors) std::cout << error << endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
bool process_varmap_regex(std::map<int, std::vector<std::string>> *varmap, std::string line, std::regex* regex)
|
||||
bool process_varmap_regex(std::map<int, std::vector<std::string>>& varmap, std::string ln, std::regex* regex)
|
||||
{
|
||||
bool found = false;
|
||||
while (std::regex_search(line, match, *regex))
|
||||
while (std::regex_search(ln, match1, *regex))
|
||||
{
|
||||
found = true;
|
||||
lua_type = match[1];
|
||||
lua_type = match1[1];
|
||||
if (lua_type == "lstring") lua_type = "string"; // lstring is a string!
|
||||
if (lua_type == "fstring") lua_type = "string"; // frsting is a string!
|
||||
else if (lua_type == "fstring") lua_type = "string"; // frsting is a string!
|
||||
if (regex == &lua_push_regex)
|
||||
{ // push parsing uses a global index starting with 1
|
||||
//if (lua_type == "nil") return false; // skipping lua_pushnil
|
||||
lua_index = out_index;
|
||||
out_index++;
|
||||
}
|
||||
else lua_index = parse_int(match[2]);
|
||||
else lua_index = prsi(match1[2]).val;
|
||||
if (lua_index == -1)
|
||||
{ // -1 means there wasn't a literal used for accessing the index, so i can not parse it
|
||||
if (regex == &lua_push_regex) fi.param_out_cnt = -1;
|
||||
else fi.param_in_cnt = -1;
|
||||
if (regex == &lua_push_regex) fi.out_cnt = -1;
|
||||
else fi.in_cnt = -1; // TODO might be possible to parse when evaluating variables
|
||||
return found;
|
||||
}
|
||||
(*varmap)[lua_index].push_back(lua_type); // always push lua type for now
|
||||
//if (varmap->count(lua_index) == 0) (*varmap)[lua_index].push_back(lua_type); // new entry
|
||||
//else if ((*varmap)[lua_index] != lua_type)
|
||||
//{ // already got an entry, check if it has the same type
|
||||
// fi.parse_errors.push_back("parameter type mismatch for index " + std::to_string(lua_index) + " - " + (*varmap)[lua_index] + " vs " + lua_type);
|
||||
//}
|
||||
line = match.suffix();
|
||||
varmap[lua_index].push_back(lua_type); // always push lua type for now
|
||||
ln = match1.suffix();
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
void chk_vld(FunctionInfo* fi)
|
||||
{ // check validity of parsed info - default value is false, so i simply return if invalid
|
||||
|
||||
bool valid = true;
|
||||
if (fi->parse_errors.size() > 0) valid = false; // any parsing errors occurred
|
||||
if (fi->param_out_cnt != -1 && fi->param_out_cnt != fi->param_out.size())
|
||||
void pfmap(bool ivld_only)
|
||||
{
|
||||
for (const auto& [name, fi] : fmap)
|
||||
{
|
||||
fi->parse_errors.push_back("output param mismatch between found number of return values (" + std::to_string(fi->param_out_cnt) +") and lua_push* calls (" + std::to_string(fi->param_out.size()) + ")");
|
||||
valid = false;
|
||||
if (!ivld_only || !fi.prs_vld)
|
||||
PL(fi.str() + "\n");
|
||||
}
|
||||
if (fi->param_in_cnt != -1 && !fi->usg.empty())
|
||||
{ // do extra check against usg string
|
||||
std::string usage_params;
|
||||
if (std::regex_search(fi->usg, match, usage_validity_regex))
|
||||
{
|
||||
usage_params = match[1];
|
||||
if (fi->param_in_cnt == 0 && !usage_params.empty())
|
||||
{
|
||||
valid = false;
|
||||
fi->parse_errors.push_back("input param count does not match usage string");
|
||||
}
|
||||
else
|
||||
{
|
||||
int comma_cnt = 0;
|
||||
for (auto ch : usage_params) if (ch == ',') comma_cnt++;
|
||||
if (fi->param_in_cnt != comma_cnt + 1)
|
||||
{
|
||||
valid = false;
|
||||
fi->parse_errors.push_back("input param count does not match usage string");
|
||||
}
|
||||
}
|
||||
}
|
||||
else fi->parse_errors.push_back("usage string malformed"); // if the usg string is malformed, do not consider it for validity
|
||||
}
|
||||
if (fi->param_in_cnt > 0)
|
||||
{ // check if input param indexes are in order
|
||||
for (int i = 1; i <= fi->param_in_cnt; i++)
|
||||
{ // lua indexes start with 1
|
||||
if (!fi->param_in.contains(i))
|
||||
{
|
||||
fi->parse_errors.push_back("input param index not in order");
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
fi->valid_parse = valid;
|
||||
}
|
||||
|
||||
void sort_prune_varmap(std::map<int, std::vector<std::string>> *varmap)
|
||||
{
|
||||
for (auto &[lua_index, lua_types] : *varmap)
|
||||
void pstats()
|
||||
{ // print parsing statistics
|
||||
int ttl = 0;
|
||||
int cln = 0;
|
||||
int ivld = 0;
|
||||
for (const auto& [name, fi] : fmap)
|
||||
{
|
||||
lua_types.erase(std::unique(lua_types.begin(), lua_types.end()), lua_types.end());
|
||||
std::sort(lua_types.begin(), lua_types.end());
|
||||
ttl++;
|
||||
cln += fi.prs_vld && fi.in_cnt != -1 && fi.out_cnt != -1 && !fi.nil_in_varmap(false);
|
||||
ivld += !fi.prs_vld;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline bool ln_is(std::string &ln, int idx, const char* lk_for)
|
||||
{
|
||||
int len = strlen(lk_for);
|
||||
return ln.length() - idx + 1 > len && ln.substr(idx, len) == lk_for;
|
||||
}
|
||||
|
||||
__forceinline void set_str_if_empty(std::string *init, std::string repl)
|
||||
{
|
||||
*init = init->empty() ? repl : *init;
|
||||
auto dur = duration_cast<std::chrono::milliseconds>(tend - tstart);
|
||||
PL(std::format("Functions parsed: {}", ttl));
|
||||
PL(std::format("Clean: {}", cln));
|
||||
PL(std::format("Invalid: {}", ivld));
|
||||
PL(std::format("Duration: {} ms", dur.count()));
|
||||
}
|
||||
|
||||
int main()
|
||||
|
@ -193,82 +117,98 @@ int main()
|
|||
* skip scanning for output params, if returns with different non-0 values have been found (or it can not be parsed as int)
|
||||
*/
|
||||
|
||||
tstart = std::chrono::high_resolution_clock::now();
|
||||
if (use_all)
|
||||
{
|
||||
dbg = true;
|
||||
dbg_prnt_invld_only = true;
|
||||
file_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.txt)";
|
||||
ivld_only = true;
|
||||
fl_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.txt)";
|
||||
}
|
||||
else
|
||||
{
|
||||
dbg = true;
|
||||
dbg_prnt_invld_only = false;
|
||||
file_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.test.txt)";
|
||||
ivld_only = false;
|
||||
fl_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.test.txt)";
|
||||
}
|
||||
|
||||
auto tstart = std::chrono::high_resolution_clock::now();
|
||||
|
||||
|
||||
std::ifstream source_file(file_path);
|
||||
std::ifstream source_file(fl_path);
|
||||
if (!source_file.is_open())
|
||||
{
|
||||
std::cerr << "Unable to open file " << file_path << std::endl;
|
||||
std::cerr << "Unable to open file " << fl_path << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::unordered_map<std::string, int> func_vars;
|
||||
|
||||
std::regex usage_regex(R"(,"(Usage: [^;]+)N{0}"[, \)])"); // N{0} is just a workaround, since )" in the raw string would terminate it immediately
|
||||
std::regex lua_is_regex(R"(lua_is(.+?)\()");
|
||||
std::regex lua_to_regex(R"(lua_to(.+?)\()");
|
||||
|
||||
std::string skp_push_utl = ""; // skip push parsing until this line has been reached
|
||||
std::string skp_push_utl = ""; // skip push parsing until this ln has been reached
|
||||
std::string lp_utl = "";
|
||||
std::map<int, bool> push_track;
|
||||
std::unordered_map<std::string, bool> push_track;
|
||||
|
||||
bool in_func = false;
|
||||
bool enc_usg = false; // encountered usg
|
||||
bool prc_def = false;
|
||||
uint ret_val; // return value
|
||||
bool is_if_ln;
|
||||
std::string ind; // indent
|
||||
std::string ln; // line
|
||||
std::string ln;
|
||||
|
||||
std::vector<char>::iterator eol;
|
||||
while (std::getline(source_file, ln))
|
||||
{
|
||||
{ // check for lines which can instantly be discarded
|
||||
if (ln.empty()) continue; // skip empty lines
|
||||
if (ln.starts_with("/*")) continue; // skip comment lines
|
||||
}
|
||||
if (ln.empty()) continue; // skip empty lines
|
||||
if (ln.starts_with("/*")) continue; // skip block comment lines
|
||||
if (!in_func)
|
||||
{ // searching for next function
|
||||
if (ln.starts_with("// ADDRESS - "))
|
||||
{ // found address ln - create new FunctionInfo and reset variables
|
||||
{ // found addr ln - create new FunctionInfo and reset variables
|
||||
fi = {};
|
||||
fi.address = parse_int(ln.substr(13));
|
||||
fi.addr = std::stoi(&ln[13], 0, 16); // do not use prsi, these values prb occur only once, so no caching wanted
|
||||
out_index = 0;
|
||||
enc_usg = false;
|
||||
lp_utl = "";
|
||||
skp_push_utl = "";
|
||||
push_track = {};
|
||||
func_vars = {};
|
||||
continue;
|
||||
}
|
||||
if (ln.starts_with("uint lua_wow_"))
|
||||
{ // found ln with function signature
|
||||
fi.func = ln.substr(13, ln.find_first_of("(") - 13); // get part of real function name
|
||||
fi.nm = ln.substr(13, ln.find_first_of("(") - 13); // get part of real function name
|
||||
in_func = true; // start function parsing from now on
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // processing current function
|
||||
is_if_ln = false;
|
||||
if (ln == "{")
|
||||
{ // this is the starting block of the function
|
||||
prc_def = true; // enable regex handling for local variable definitions
|
||||
continue;
|
||||
}
|
||||
if (ln == "}")
|
||||
{ // end of function found
|
||||
in_func = false;
|
||||
if (fi.param_in_cnt == -1) fi.param_in.clear();
|
||||
else fi.param_in_cnt = fi.param_in.size();
|
||||
if (fi.param_out_cnt == -1) fi.param_out.clear();
|
||||
sort_prune_varmap(&fi.param_in);
|
||||
chk_vld(&fi);
|
||||
fmap[fi.func] = fi;
|
||||
//pdbg("# END OF " + fi.func);
|
||||
fi.cln_varmap(true);
|
||||
fi.cln_varmap(false);
|
||||
fi.chk_vld();
|
||||
fmap[fi.nm] = fi;
|
||||
continue;
|
||||
}
|
||||
is_if_ln = false;
|
||||
if (prc_def)
|
||||
{ // if local variable definition parsing is enabled
|
||||
if (ln == " ")
|
||||
{ // reached end variable definition block, no further processing required
|
||||
prc_def = false;
|
||||
continue;
|
||||
}
|
||||
if (std::regex_search(ln, match1, var_decl_regex))
|
||||
{ // found local variable, track it with init value 0
|
||||
func_vars[match1[1]] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
int i = 0;
|
||||
while (i < ln.length())
|
||||
{
|
||||
|
@ -283,135 +223,133 @@ int main()
|
|||
{
|
||||
case 'c':
|
||||
case 'd':
|
||||
if (ln_is(ln, i, "do {"))
|
||||
if (lnsw(ln, i, "do {"))
|
||||
{ // setting loop end, if not already in a loop
|
||||
set_str_if_empty(&lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single line do-while loop (who would even program such a thing!?
|
||||
ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln do-while loop (who would even program such a thing!?
|
||||
break;
|
||||
}
|
||||
if (ln_is(ln, i, "case ") || ln_is(ln, i, "default:"))
|
||||
if (lnsw(ln, i, "case ") || lnsw(ln, i, "default:"))
|
||||
{
|
||||
if (push_track[ind.length() / 2]) set_str_if_empty(&skp_push_utl, ind + "}");
|
||||
else push_track[ind.length() / 2] = false; // only execute if a previous case didn't already contain a push
|
||||
if (push_track[ind]) ssie(skp_push_utl, ind + "}");
|
||||
else push_track[ind] = false; // only execute if a previous case didn't already contain a push
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'f':
|
||||
if (ln_is(ln, i, "for ("))
|
||||
if (lnsw(ln, i, "for ("))
|
||||
{
|
||||
set_str_if_empty(&lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single line for loop
|
||||
ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln for loop
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'i': // check for if
|
||||
if (ln_is(ln, i, "if "))
|
||||
if (lnsw(ln, i, "if "))
|
||||
{ // enough characters left to be if statement
|
||||
is_if_ln = true;
|
||||
push_track[ind.length() / 2] = false;
|
||||
push_track[ind] = false;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
if (ln_is(ln, i, "else "))
|
||||
if (lnsw(ln, i, "else "))
|
||||
{ // enough characters left to be if statement
|
||||
if (push_track[ind.length() / 2]) set_str_if_empty(&skp_push_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1");
|
||||
if (push_track[ind]) ssie(skp_push_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'g':
|
||||
if (!fi.param_out.empty() && ln_is(ln, i, "goto "))
|
||||
if (!fi.out.empty() && lnsw(ln, i, "goto "))
|
||||
{
|
||||
set_str_if_empty(&skp_push_utl, ln.substr(i + 5, ln.length() - i - 6) + ":");
|
||||
ssie(skp_push_utl, std::string(ln.substr(i + 5, ln.length() - i - 6)) + ":");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'r': // check for return
|
||||
if (fi.param_out_cnt == -1) break; // this function has a dynamic number of outputs, no need for further processing
|
||||
if (ln_is(ln, i, "return "))
|
||||
if (fi.out_cnt == -1) break; // this function has a dynamic number of outputs, no need for further processing
|
||||
if (lnsw(ln, i, "return "))
|
||||
{ // enough characters left to be the simplest return
|
||||
ret_val = parse_int(ln.substr(i + 7, ln.length() - i - 8));
|
||||
ret_val = prsi(std::string(&ln[i + 7], ln.length() - i - 8)).val;
|
||||
// check if already encountered return value (except 0) matches; dynamic if not
|
||||
if (ret_val != 0)
|
||||
{ // TODO probably need to check if i am currently skipping push
|
||||
fi.param_out_cnt = (fi.param_out_cnt == 0 || fi.param_out_cnt == ret_val) ? ret_val : -1;
|
||||
if (ret_val == fi.param_out.size())
|
||||
fi.out_cnt = (fi.out_cnt == 0 || fi.out_cnt == ret_val) ? ret_val : -1;
|
||||
if (ret_val == fi.out.size())
|
||||
{ // found a return statement and return value matches output param count
|
||||
set_str_if_empty(&skp_push_utl, "\1skip2end");
|
||||
ssie(skp_push_utl, "\1skip2end");
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'w':
|
||||
if (ln_is(ln, i, "while ("))
|
||||
if (lnsw(ln, i, "while ("))
|
||||
{ // setting loop end, if not already in a loop
|
||||
set_str_if_empty(&lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single line while loop
|
||||
ssie(lp_utl, ln[ln.length() - 1] == '{' ? ind + "}" : "\1"); // check if this is a single ln while loop
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
//pdbg(ln);
|
||||
//pdbg(ind + "<-");
|
||||
//if (auto match = ctre::match<var_ass_regex>(ln))
|
||||
|
||||
if (std::regex_search(ln, match1, var_ass_regex))
|
||||
{
|
||||
|
||||
int i = 0;
|
||||
//auto test = match.get<1>();
|
||||
//pdbg("variable assignment happens for " + match.get<1>().to_string());
|
||||
}
|
||||
|
||||
if (!enc_usg)
|
||||
{ // usage string can only occur once anyway
|
||||
if (std::regex_search(ln, match, usage_regex))
|
||||
//if (auto match = ctre::match<usage_regex>(ln))
|
||||
if (std::regex_search(ln, match1, usage_regex))
|
||||
{ // found usg string
|
||||
fi.usg = match[1];
|
||||
fi.usg = match1[1];
|
||||
size_t found = -1; // so the first find uses 0 through the increment
|
||||
while ((found = fi.usg.find("\\\"", found + 1)) != std::string::npos) fi.usg.replace(found, 2, "\"");
|
||||
enc_usg = true;
|
||||
skp_push_utl = !skp_push_utl.empty() ? skp_push_utl : "\1"; // sometimes lua_pushfstring is used before lua_error; do not consider this an output!
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if (fi.param_in_cnt != -1)
|
||||
if (fi.in_cnt != -1)
|
||||
{
|
||||
process_varmap_regex(&fi.param_in, ln, &lua_is_regex);
|
||||
process_varmap_regex(&fi.param_in, ln, &lua_to_regex);
|
||||
process_varmap_regex(fi.in, ln, &lua_is_regex);
|
||||
process_varmap_regex(fi.in, ln, &lua_to_regex);
|
||||
}
|
||||
if (fi.param_out_cnt != -1 && skp_push_utl.empty() && process_varmap_regex(&fi.param_out, ln, &lua_push_regex))
|
||||
if (fi.out_cnt != -1 && skp_push_utl.empty() && process_varmap_regex(fi.out, ln, &lua_push_regex))
|
||||
{
|
||||
if (!lp_utl.empty())
|
||||
{ // found lua_push* inside a loop
|
||||
fi.param_out_cnt = -1;
|
||||
fi.out_cnt = -1;
|
||||
}
|
||||
else
|
||||
{ // normal processing
|
||||
int lvl = is_if_ln ? ind.length() / 2 : (ind.length() - 2) / 2; // do not subtract 1 block level, if this was a simple if line
|
||||
while (lvl >= 1)
|
||||
|
||||
std::string lvl = is_if_ln ? ind : ind.substr(0, ind.length() - 2); // do not subtract 1 block level, if this was a simple if ln
|
||||
while (lvl >= " ")
|
||||
{ // if a push was found, track it for this and all lower block levels
|
||||
push_track[lvl] = true;
|
||||
lvl--;
|
||||
lvl = lvl.substr(0, lvl.length() - 2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
if (!lp_utl.empty() && ln.starts_with(lp_utl))
|
||||
lp_utl = "";
|
||||
if (lp_utl == "\1")
|
||||
lp_utl = "";
|
||||
if (!skp_push_utl.empty() && ln.starts_with(skp_push_utl))
|
||||
skp_push_utl = ""; // reset skip since line has been reached now
|
||||
if (skp_push_utl == "\1")
|
||||
skp_push_utl = ""; // reset temporary skip which was used for 1 line
|
||||
if (!lp_utl.empty() && ln.starts_with(lp_utl)) lp_utl = "";
|
||||
if (lp_utl == "\1") lp_utl = "";
|
||||
if (!skp_push_utl.empty() && ln.starts_with(skp_push_utl)) skp_push_utl = ""; // reset skip since ln has been reached now
|
||||
if (skp_push_utl == "\1") skp_push_utl = ""; // reset temporary skip which was used for 1 ln
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
auto tend = std::chrono::high_resolution_clock::now();
|
||||
auto duration = duration_cast<std::chrono::milliseconds>(tend - tstart);
|
||||
int cnt_invalid = 0;
|
||||
int cnt_total = 0;
|
||||
for(auto &[name, fi] : fmap)
|
||||
{
|
||||
if (dbg && (!dbg_prnt_invld_only || !fi.valid_parse)) pfi(fi);
|
||||
cnt_invalid += fi.valid_parse ? 0 : 1;
|
||||
cnt_total++;
|
||||
|
||||
}
|
||||
|
||||
tend = std::chrono::high_resolution_clock::now();
|
||||
if (dbg) pfmap(ivld_only);
|
||||
std::cout << "GhidraParser is done..." << std::endl;
|
||||
std::cout << "Functions parsed: " << std::to_string(cnt_total) << std::endl;
|
||||
std::cout << "Invalid parses: " << std::to_string(cnt_invalid) << std::endl;
|
||||
std::cout << "duration: " << duration.count() << " ms" << std::endl;
|
||||
pstats();
|
||||
std::cin.ignore();
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,21 +1 @@
|
|||
#pragma once
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
typedef unsigned int uint; // i am lazy
|
||||
typedef unsigned char byte;
|
||||
|
||||
//#pragma pack(push, 1)
|
||||
struct FunctionInfo
|
||||
{
|
||||
uint address = 0;
|
||||
std::string func = "";
|
||||
std::string usg = "";
|
||||
std::map<int, std::vector<std::string>> param_in{};
|
||||
int param_in_cnt = 0;
|
||||
std::map<int, std::vector<std::string>> param_out{};
|
||||
int param_out_cnt = 0;
|
||||
std::vector<std::string> parse_errors;
|
||||
bool valid_parse = false;
|
||||
};
|
||||
//#pragma pack(pop)
|
||||
|
|
|
@ -106,6 +106,8 @@
|
|||
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<LanguageStandard>stdcpp20</LanguageStandard>
|
||||
<AdditionalIncludeDirectories>
|
||||
</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
|
@ -121,6 +123,7 @@
|
|||
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<LanguageStandard>stdcpp20</LanguageStandard>
|
||||
<AdditionalIncludeDirectories>C:\Users\alphaomega\.vcpkg\installed\x64-windows\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
|
@ -130,12 +133,15 @@
|
|||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="FunctionInfo.cpp" />
|
||||
<ClCompile Include="GhidraParser.cpp" />
|
||||
<ClCompile Include="Utility.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="FunctionInfo.hpp" />
|
||||
<ClInclude Include="GhidraParser.hpp" />
|
||||
<ClInclude Include="Utility.hpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="ExtensionTargets" />
|
||||
</Project>
|
|
@ -18,10 +18,22 @@
|
|||
<ClCompile Include="GhidraParser.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Utility.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="FunctionInfo.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="GhidraParser.hpp">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Utility.hpp">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="FunctionInfo.hpp">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -0,0 +1,61 @@
|
|||
#include "Utility.hpp"
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
std::unordered_map<std::string, ParseResult> prs_cache
|
||||
{
|
||||
{ "FALSE", { true, 0 } },
|
||||
{ "TRUE", { true, 1 } },
|
||||
};
|
||||
|
||||
ParseResult prsi(std::string str)
|
||||
{
|
||||
auto found = prs_cache.find(str);
|
||||
if (found != prs_cache.end()) return found->second; // cache hit
|
||||
try
|
||||
{ // cache miss, trying to parse and caching the result
|
||||
int v;
|
||||
if (str.starts_with("0x")) v = std::stoi(str.data(), 0, 16); // hex parsing
|
||||
else v = std::stoi(str.data());
|
||||
prs_cache[str] = { true, v };
|
||||
}
|
||||
catch (std::invalid_argument e)
|
||||
{ // was not parsable, cache negative parse result
|
||||
prs_cache[str] = { false, -1 }; // value doesn't really matter here
|
||||
}
|
||||
return prs_cache[str];
|
||||
}
|
||||
|
||||
std::vector<char>* lf(std::string& path)
|
||||
{
|
||||
return lf(path.c_str());
|
||||
}
|
||||
|
||||
std::vector<char>* lf(const char* path)
|
||||
{
|
||||
std::ifstream file(path, std::ios::binary);
|
||||
if (!file.is_open()) return nullptr;
|
||||
file.seekg(0, std::ios::end);
|
||||
auto vec = new std::vector<char>(file.tellg());
|
||||
if (vec->capacity() > 0)
|
||||
{
|
||||
file.seekg(0, std::ios::beg);
|
||||
file.read(vec->data(), vec->capacity());
|
||||
}
|
||||
return vec;
|
||||
}
|
||||
|
||||
bool lnsw(std::string& ln, int idx, const char* sw)
|
||||
{
|
||||
int len = strlen(sw);
|
||||
return ln.length() - idx + 1 > len && ln.substr(idx, len) == sw;
|
||||
}
|
||||
|
||||
void ssie(std::string& str, std::string repl)
|
||||
{
|
||||
if (str.empty()) str = repl;
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
struct ParseResult
|
||||
{
|
||||
bool prsbl; // was parsable
|
||||
int val; // value which it was parsed into
|
||||
};
|
||||
|
||||
ParseResult prsi(std::string str);
|
||||
std::vector<char>* lf(std::string& path);
|
||||
std::vector<char>* lf(const char* path);
|
||||
bool lnsw(std::string& ln, int idx, const char* sw);
|
||||
void ssie(std::string& str, std::string repl);
|
||||
void pdbg(std::string& msg);
|
Loading…
Reference in New Issue