LuaFunctionRegisterSpreadsheet/GhidraParser/GhidraParser.cpp

127 lines
3.5 KiB
C++
Raw Normal View History

#include "FunctionInfo.hpp"
#include "Utility.hpp"
#include <chrono>
#include <fstream>
#include <iostream>
#include <regex>
#include <string>
#include <unordered_map>
bool dbg;
bool ivld_only;
bool use_all = true;
/*
* 907 cln
* 25 ivld
*/
/*
* 1742 cln
* 123 ivld
*/
#define PDBG(msg) if(!dbg) PL(msg) // print line if debug flag is set
std::unordered_map<std::string, FunctionInfo> fmap;
std::chrono::steady_clock::time_point tstart;
std::chrono::steady_clock::time_point tend;
void pfmap(bool ivld_only)
{
for (const auto& [name, fi] : fmap)
{
if (!ivld_only || !fi.prs_vld)
PL(fi.str() + "\n");
}
}
void pstats()
{ // print parsing statistics
int ttl = 0;
int cln = 0;
int ivld = 0;
for (const auto& [name, fi] : fmap)
{
ttl++;
cln += fi.prs_vld && fi.in_cnt != -1 && fi.out_cnt != -1 && !fi.nil_in_varmap(false);
ivld += !fi.prs_vld;
}
auto dur = duration_cast<std::chrono::milliseconds>(tend - tstart);
PL(std::format("Functions parsed: {}", ttl));
PL(std::format("Clean: {}", cln));
PL(std::format("Invalid: {}", ivld));
PL(std::format("Duration: {} ms", dur.count()));
}
int main()
{
/*
* PARSING RULES
* lua_is* calls can occur in if-lines
* lua_to* calls can occur in if-lines
* lua_push* calls can NOT occur in if-lines
* lua_push* calls are not called with an index as param, unlike lua_is* / lua_to* - the index is inferred by the order of calls
* lua_push* calls can occur in branch structures (if/else, switch) - they must only be considered in one of them
* wow functions return the number of outputs - 0 when they error or there are none; consider any non-0 return as the real number of outputs
* wow functions may return a dynamic number of outputs - there is no way to parse this into a single result
* skip scanning for the usg string if it already has been encountered in the function, there can only be 1
* skip scanning for input params, if unparsable index (not into int) has been found for lua_is* or lua_to* (dynamic inputs)
* skip scanning for output params, if unparsable index (not into int) has been found for lua_push* (dynamic outputs)
* skip scanning for output params, if returns with different non-0 values have been found (or it can not be parsed as int)
*/
std::string fl_path;
if (use_all)
{
dbg = true;
ivld_only = true;
fl_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.txt)";
}
else
{
dbg = true;
ivld_only = false;
fl_path = R"(C:\Users\alphaomega\Documents\Wow.exe.c.test.txt)";
}
std::ifstream source_file(fl_path);
if (!source_file.is_open())
{
std::cerr << "Unable to open file " << fl_path << std::endl;
return 1;
}
tstart = std::chrono::high_resolution_clock::now();
bool rec_src = false; // record source
std::vector<std::string> src;
std::string ln;
int cnt = 0;
while (std::getline(source_file, ln))
{
if (ln.empty()) continue; // skip empty lines
if (ln.starts_with("/*")) continue; // skip block comment lines
if (ln.starts_with("// ADDRESS - "))
{
rec_src = true; // found starting line of function soure
}
if (rec_src) src.push_back(ln);
if (ln == "}")
{ // end of function code reached
auto fi = FunctionInfo(src);
fmap[fi.nm] = fi; // there is no handling for duplicate entries bc of performance - there shoulnd't be any anyway
//PL(fi.nm);
cnt++;
src.clear();
rec_src = false;
}
}
tend = std::chrono::high_resolution_clock::now();
if (dbg) pfmap(ivld_only);
std::cout << "GhidraParser is done..." << std::endl;
pstats();
std::cin.ignore();
return 0;
}