From 5aa83f0b84a6384e852654f8e27c566467102b76 Mon Sep 17 00:00:00 2001 From: Vidya Date: Mon, 25 Aug 2025 18:17:08 +0000 Subject: [PATCH 01/14] Scalability update1 : Added helper functions in syspartUtility, replace vector with unordered_set in func args etc --- analysis/app/src/syspart.cpp | 6 +- analysis/app/src/syspartUtility.cpp | 1708 ++++++++++++++------------- analysis/app/src/syspartUtility.h | 110 +- 3 files changed, 1020 insertions(+), 804 deletions(-) diff --git a/analysis/app/src/syspart.cpp b/analysis/app/src/syspart.cpp index bf6538e..a6213a5 100644 --- a/analysis/app/src/syspart.cpp +++ b/analysis/app/src/syspart.cpp @@ -2285,7 +2285,7 @@ void Syspart::getArgumentValue(bool icanalysisFlag, bool typearmorFlag, string f //ip_callgraph.addNssEdges(); SyspartUtility util(program, &ip_callgraph, 0); util.initialize(); - vector res; + std::unordered_set res; util.getArgumentsPassedToFunction(func, reg , res); } @@ -2318,13 +2318,13 @@ void Syspart::printDlArgs(string dlname) initFuncs = ip_callgraph.getInitFuncs(); SyspartUtility util(program, &ip_callgraph, 0); util.initialize(); - vector res; int reg; if(dlname == "dlopen") reg = 7; else if(dlname == "dlsym") reg = 6; - util.getArgumentsPassedToFunction(func, reg , res); + std::unordered_set res; + util.getArgumentsPassedToFunction(func, reg , res); } diff --git a/analysis/app/src/syspartUtility.cpp b/analysis/app/src/syspartUtility.cpp index 755cf8a..dd200b2 100644 --- a/analysis/app/src/syspartUtility.cpp +++ b/analysis/app/src/syspartUtility.cpp @@ -21,195 +21,373 @@ using namespace std; #include "log/log.h" -void SyspartUtility::getArgumentsPassedToFunction(Function* func, int reg, vector &result) +vector SyspartUtility::matchLoadMemoryToStores(Function* func, UDState* orig, set storeNodes) { - auto gl = ip_callgraph->getGlobalATList(); - if(gl.count(func) != 0) - { - UDResult res {0, 0, "unknown", func}; - if(find(result.begin(), result.end(), res) == result.end()) + vector result; + auto orig_instr = orig->getInstruction(); + auto workingSet = df.getWorkingSet(func); + auto cfg = workingSet->getCFG(); + + Block* orig_block = NULL; + for(auto block : CIter::children(func)) { - result.push_back(res); + for(auto instr : CIter::children(block)) + { + if(instr->getAddress() == orig_instr->getAddress()) + { + orig_block = block; + } + } } - return; - } - tuple visited_tup{func->getName(), func->getAddress(), reg}; - if(find(visitedFuncRegs.begin(), visitedFuncRegs.end(), visited_tup) != visitedFuncRegs.end()) - { - - UDResult res {0, 0, "unknown", func}; - if(find(result.begin(), result.end(), res) == result.end()) + + stack st; + st.push(orig_block); + set already_pushed; + already_pushed.insert(orig_block); + bool foundFlag = false; + while(!st.empty()) { - result.push_back(res); + foundFlag = false; + auto cur_block = st.top(); + auto node_id = cfg->getIDFor(cur_block); + auto node = cfg->get(node_id); + st.pop(); + vector reversed; + for(auto instr : CIter::children(cur_block)) + { + reversed.push_back(instr); + } + for(auto it = reversed.rbegin(); it != reversed.rend(); ++it) + { + auto instr = *it; + auto instr_state = workingSet->getState(instr); + if(storeNodes.find(instr_state) != storeNodes.end()) + { foundFlag = true; + //LOG(1,"Found store instruction "<getAddress()); + result.push_back(instr_state); + break; + } + + } + if(foundFlag) + { + //LOG(1, "NOT ADDING NEIGHBORS OF "<getName()); + continue; + } + for(auto link : node->backwardLinks()) + { + auto neighbor_block = cfg->get(link->getTargetID())->getBlock(); + if(already_pushed.find(neighbor_block) == already_pushed.end()) + { + st.push(neighbor_block); + already_pushed.insert(neighbor_block); + //LOG(1,"ADD BLOCK "<getName()<<" -> "<getName()); + } + } + } + return result; +} + + +void SyspartUtility::getArgumentsPassedToFunction(Function* func, int reg, std::unordered_set& collectedResults) +{ + if(recursion_iter > MAX_ITER) + { + UDResult unknownRes{0, 0, "unknown", func}; + collectedResults.insert(unknownRes); return; } - vector ir_result; - Inter_result ir = {func->getName(), func->getAddress(), reg, ir_result}; - auto found_it = find(found_results.begin(), found_results.end(), ir); - if(found_it != found_results.end()) - { - auto found_vec = (*found_it).res; - result.insert(result.end(), found_vec.begin(), found_vec.end()); + recursion_iter++; + LOG(1, "Iter "<getName()<<" at reg "<getName(), func->getAddress(), reg, {}}; + auto foundIt = found_results.find(searchKey); + if (foundIt != found_results.end()) { + LOG(1, "ICALL_RESOLVE RESULT_EXISTS " << std::dec << reg << " " << func->getName()); + collectedResults.insert((foundIt->res).begin(), (foundIt->res).end()); + recursion_iter--; return; + } + + + getArgumentsPassedToFunction_helper(func, reg, collectedResults); + found_results.emplace(func->getName(), func->getAddress(), reg, collectedResults); + recursion_iter--; + LOG(1,"Iter "<getName()<<" "<& collectedResults) +{ + if(recursion_iter > MAX_ITER) + { + UDResult unknownRes{0, 0, "unknown", func}; + collectedResults.insert(unknownRes); + return; } - vector temp_res; + recursion_iter++; + + LOG(1, "Iter "<getName() << " for register " << reg); + + // Helpers + auto pushUnknownIfNeeded = [&](std::unordered_set& resVec, Function* f) { + LOG(1,"Adding unknown "<getName()); + UDResult unknownRes{0, 0, "unknown", f}; + resVec.insert(unknownRes); + }; - arg_count++; - tuple tup1(func->getName(), func->getAddress(), reg); - visitedFuncRegs.push_back(tup1); //Add (func, reg) to the visited list + auto isVisited = [&](Function* f, int r) { + auto tup = std::make_tuple(f->getName(), f->getAddress(), r); + return std::find(visitedFuncRegs.begin(), visitedFuncRegs.end(), tup) != visitedFuncRegs.end(); + }; + auto markVisited = [&](Function* f, int r) { + visitedFuncRegs.emplace_back(f->getName(), f->getAddress(), r); + }; + + auto unmarkVisited = [&](Function* f, int r) { + auto tup = std::make_tuple(f->getName(), f->getAddress(), r); + auto it = std::find(visitedFuncRegs.begin(), visitedFuncRegs.end(), tup); + if (it != visitedFuncRegs.end()) + visitedFuncRegs.erase(it); + }; + + // Early exit if func is in global AT list + auto globalATList = ip_callgraph->getGlobalATList(); + if (globalATList.count(func) != 0) { + pushUnknownIfNeeded(collectedResults, func); + LOG(1, "ICALL_RESOLVE AT_FUNC " << std::dec << reg << " " << func->getName()); + recursion_iter--; + return; + } - auto cur_node = ip_callgraph->getNode(func); - if(cur_node == NULL) + if (isVisited(func, reg)) { + pushUnknownIfNeeded(collectedResults, func); + LOG(1, "ICALL_RESOLVE VISITED_FUNC_REG " << std::dec << reg << " " << func->getName()); + recursion_iter--; return; - auto parentType = cur_node->getParentWithType(); + } + + + markVisited(func, reg); + bool isCalled = false; - vector alreadyPrinted; //for dlsym & dlopen + auto curNode = ip_callgraph->getNode(func); + if (!curNode) { + LOG(1, "ICALL_RESOLVE NO_IPNODE " << std::dec << reg << " " << func->getName()); + unmarkVisited(func, reg); + pushUnknownIfNeeded(collectedResults, func); + recursion_iter--; + return; + } - for(auto pt : parentType) - { - address_t calling_addr; + auto parentTypes = curNode->getParentWithType(); + vector alreadyPrinted; // For printing results only once + + for (auto& pt : parentTypes) { + + address_t callingAddr; IPCallGraphNode* parent; - tie(calling_addr, parent) = pt.first; - bool type = pt.second; - if(type == false) - { - if(!parent->isIcallResolved(calling_addr)) - continue; + bool type; + std::tie(callingAddr, parent) = pt.first; + type = pt.second; + + if (!type && !parent->isIcallResolved(callingAddr)) { + LOG(1, "Unresolved icall of parent " << parent->getFunction()->getName() << " @ " << std::hex << callingAddr); + continue; } - //Only functions which directly invoke and indirect calls which are fully resolved considered - auto calling_fn = parent->getFunction(); - - auto working = df.getWorkingSet(calling_fn); - vector fp_vec; - if(analysisType != 0) - { - FPath fp{cur_function->getName(),cur_instr->getAddress(),fp_vec}; - auto icPath_iter = find(icPath.begin(), icPath.end(), fp); - if(icPath_iter != icPath.end()) - { - auto path_vec = (*icPath_iter).path; - if(find(path_vec.begin(), path_vec.end(), calling_fn) == path_vec.end()) - { - path_vec.push_back(calling_fn); - (*icPath_iter).path = path_vec; + + auto callingFn = parent->getFunction(); + + auto working = df.getWorkingSet(callingFn); + std::vector fp_vec; + + if (analysisType != 0) { + FPath fp{cur_function->getName(), cur_instr->getAddress(), fp_vec}; + auto icPath_iter = std::find(icPath.begin(), icPath.end(), fp); + if (icPath_iter != icPath.end()) { + auto& path_vec = (*icPath_iter).path; + if (std::find(path_vec.begin(), path_vec.end(), callingFn) == path_vec.end()) { + path_vec.push_back(callingFn); } - } - else - { - fp_vec.push_back(calling_fn); + } else { + fp_vec.push_back(callingFn); icPath.push_back(fp); } } - for(auto block : CIter::children(calling_fn)) - { - for(auto instr : CIter::children(block)) - { - if(instr->getAddress() == calling_addr) - { + + for (auto block : CIter::children(callingFn)) { + for (auto instr : CIter::children(block)) { + if (instr->getAddress() == callingAddr) { isCalled = true; auto state = working->getState(instr); - if(analysisType == 0) - cout<<"FUNC "<getName()<<" CALLINGFN "<getName()<<" CALLINGADDR "<dumpState(); - bool refFlag = false; - for(auto& s : state->getRegRef(reg)) { + cout<<"FUNC "<getName()<<" CALLINGFN "<getName()<<" CALLINGADDR "<getRegRef(reg)) { refFlag = true; - findRegDef(calling_fn, s, reg, temp_res); + LOG(1, "Withing getArgs - invoking findRegDef for " << callingFn->getName() << " at reg " << std::dec << reg); + std::unordered_set inter_result; + findRegDef(callingFn, s, reg, inter_result); + collectedResults.insert(inter_result.begin(), inter_result.end()); } - if(!refFlag) - { - if(reg == 7 || reg == 6 || reg == 2 || reg == 1 || reg == 8 || reg == 9) //Argument registers? - { - getArgumentsPassedToFunction(calling_fn, reg, temp_res); - } - else + + + if (!refFlag) { + if (reg == 7 || reg == 6 || reg == 2 || reg == 1 || reg == 8 || reg == 9) + { // Argument registers + LOG(1, "Within getArgs - Finding arguments passed to " << callingFn->getName() << " at reg "); + getArgumentsPassedToFunction(callingFn, reg, collectedResults); + + } + else { - UDResult res {0, 0, "unknown", func}; - if(find(temp_res.begin(), temp_res.end(), res) == temp_res.end()) - { - temp_res.push_back(res); - } + LOG(1, "Within getArgs - No refFlag and not arguments"); + pushUnknownIfNeeded(collectedResults, func); } } - if(analysisType == 0) - { - for(auto tr : temp_res) - { - if(find(alreadyPrinted.begin(), alreadyPrinted.end(), tr) == alreadyPrinted.end()) - { + + if (analysisType == 0) { + for (auto& tr : collectedResults) { + if (std::find(alreadyPrinted.begin(), alreadyPrinted.end(), tr) == alreadyPrinted.end()) { alreadyPrinted.push_back(tr); - auto mod = (Module*)(tr.func)->getParent()->getParent(); - cout<<"TYPE "<getName()<<" MODULE " <getName(); - if((tr.type == 0) | (tr.type == 3)) - { - cout<<" UNKNOWN"<(tr.func->getParent()->getParent()); + std::cout << "TYPE " << tr.type << "\tADDRESS " << std::hex << tr.addr << " DESC " << tr.desc + << " FUNC " << tr.func->getName() << " MODULE " << mod->getName(); + + if (tr.type == 0 || tr.type == 3) { + std::cout << " UNKNOWN" << std::endl; continue; } - if(tr.desc == "0x0") - { - cout<<" SYM NULL"<getElfSpace()->getElfMap(); + + ElfMap* elf = mod->getElfSpace()->getElfMap(); auto section = elf->findSection(".rodata"); - auto rodata = elf->getSectionReadPtr(".rodata"); + auto rodata = elf->getSectionReadPtr(".rodata"); + auto vaddr = section->getVirtualAddress(); + auto size = section->getSize(); auto offset = section->convertVAToOffset(tr.addr); - char* value = rodata + offset; - cout<<" SYM "<= vaddr && tr.addr <= section_end) { + char* value = rodata + offset; + std::cout << " SYM " << value << std::endl; + } } } - cout<<"END"<getName()); + pushUnknownIfNeeded(collectedResults, func); } - result.insert(result.end(), temp_res.begin(), temp_res.end()); - auto it1 = find(visitedFuncRegs.begin(), visitedFuncRegs.end(),tup1); - // Check if Iterator is valid - if(it1 != visitedFuncRegs.end()) + unmarkVisited(func, reg); + LOG(1,"Iter "<& result) +{ + if(recursion_iter > MAX_ITER) { - - visitedFuncRegs.erase(it1); + UDResult unknownRes{0, 0, "unknown", func}; + result.insert(unknownRes); + return; + } + + recursion_iter++; + LOG(1, "Iter "<getInstruction()->getAddress()<<" in function "<getName()); + + + // 1. Check the cache first. If a result exists, return it immediately. + Intra_result ir_key{func->getName(), func->getAddress(), state->getInstruction()->getAddress(), reg, {}}; + auto found_it = found_state_results.find(ir_key); + if(found_it != found_state_results.end()) + { + LOG(1, "Found results for "<getName()<<" "<res.begin(), found_it->res.end()); + recursion_iter--; + return; } - arg_count--; - Inter_result ir1{func->getName(), func->getAddress(), reg, temp_res}; - found_results.push_back(ir1); + + + // 2. Call recursive helper + findRegDef_helper(func, state, reg, result); + + // 3. Cache the final result + found_state_results.emplace(func->getName(), func->getAddress(), state->getInstruction()->getAddress(), reg, result); + + LOG(1, "Iter "<getName()<<" at reg "< &final_result) +void SyspartUtility::findRegDef_helper(Function* func, UDState *state, int reg, std::unordered_set& result) { - stack_depth++; - //if(stack_depth>100) - //return; - //cout<<"STACK "<getName()< MAX_ITER) + { + UDResult unknownRes{0, 0, "unknown", func}; + result.insert(unknownRes); + return; + } + + recursion_iter++; + + if(state->getInstruction() == NULL) + { + LOG(1, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! NULL !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"); + recursion_iter--; + return; + } + LOG(1, "Iter "<getInstruction()->getAddress()<<" in function "<getName()); + + auto instr = state->getInstruction(); + + //Already visited? + + tuple visited_tup{instr, reg}; + if(find(visitedStates.begin(),visitedStates.end(),visited_tup) != visitedStates.end()) + { + LOG(1, "Already visited state."<getName()<<" "<getAddress()); + recursion_iter--; + return; + } + visitedStates.push_back(visited_tup); + + // Patterns + + //R0: (deref (+ %R5 -12)) nodeType=1 typedef TreePatternUnary, TreePatternTerminal>>>MemoryAddress1; - + //R0: (deref (- %R5 -12)) nodeType=2 typedef TreePatternUnary, - TreePatternTerminal>>>MemoryAddress2; + TreePatternTerminal>>>MemoryAddress2; //R2: (+ %R5 20) nodeType=3 typedef @@ -221,9 +399,9 @@ void SyspartUtility::findRegDef(Function* func, UDState *state, int reg, vector< TreePatternBinary>, TreePatternCapture>> MemoryAddress4; - + //R0: (deref %R5) nodeType=9 - typedef TreePatternUnary>> MemoryAddress5; //R0: 0 nodeType=5 typedef TreePatternCapture>ConstantValue; @@ -236,7 +414,6 @@ void SyspartUtility::findRegDef(Function* func, UDState *state, int reg, vector< TreePatternCapture>, TreePatternCapture> > RIPValue; - //R0: (deref (+ %rip=0x749 -101)) nodeType=8 typedef TreePatternUnary> >>> RIPDerefValue; - + //rsi: (+ %rsi %rax) nodeType=10 typedef @@ -259,604 +436,536 @@ void SyspartUtility::findRegDef(Function* func, UDState *state, int reg, vector< TreePatternCapture>> RegisterDifference; - auto instr = state->getInstruction(); - state->dumpState(); - tuple visited_tup{instr, reg}; - if(find(visitedStates.begin(),visitedStates.end(),visited_tup) != visitedStates.end()) + + //Define helper functions + + auto pushUnknownIfNeeded = [&](std::unordered_set& results) { + UDResult unknownRes{0, 0, "unknown", func}; + results.insert(unknownRes); + }; + + auto isArgRegister = [](int reg) { + return reg == 7 || reg == 6 || reg == 2 || reg == 1 || reg == 8 || reg == 9; + }; + + auto handleRIPValue = [&](TreeCapture& cap) { + LOG(1, "Matches RIPValue"); + auto ripTree = dynamic_cast(cap.get(0)); + auto dispTree = dynamic_cast(cap.get(1)); + if (!ripTree || !dispTree) + { + LOG(1,"riptree and disptree RIPValue NULL"); + pushUnknownIfNeeded(result); + } + else { - UDResult res {0, 0, "unknown", func}; - if(find(final_result.begin(), final_result.end(), res) == final_result.end()) + address_t new_val = dispTree->getValue() + ripTree->getValue(); + std::stringstream sstream; + sstream << "0x" << std::hex << new_val; + std::string new_str = sstream.str(); + result.emplace(1, new_val, new_str, func); + } + }; + + auto handleRIPDerefValue = [&](TreeCapture& cap) { + LOG(1, "Matches RIPDerefValue"); + TreeCapture cap1; + if (RIPValue::matches(cap.get(0), cap1)) { + auto ripTree = dynamic_cast(cap1.get(0)); + auto dispTree = dynamic_cast(cap1.get(1)); + if (!ripTree || !dispTree) { - final_result.push_back(res); + LOG(1,"riptree and disptree RIPDerefValue NULL"); + pushUnknownIfNeeded(result); + } + else + { + address_t new_val = dispTree->getValue() + ripTree->getValue(); + std::stringstream sstream; + sstream << "val(0x" << std::hex << new_val << ")"; + std::string new_str = sstream.str(); + result.emplace(2, new_val, new_str, func); } - return; } - vector ir_res; - Intra_result ir{func->getName(), func->getAddress(), instr->getAddress(), reg, ir_res}; - auto found_it = find(found_state_results.begin(), found_state_results.end(), ir); - if(found_it != found_state_results.end()) + }; + + auto resolveRegister = [&](int reg, UDState* state, Function* func, std::unordered_set& out) { + bool refFlag = false; + for (auto& rf : state->getRegRef(reg)) { + refFlag = true; + LOG(1, "findRegDefHelper invoking findRegDef for "<getName()<<" reg "); + findRegDef(func, rf, reg, out); + } + if (!refFlag && isArgRegister(reg)) { + LOG(1, "findRegDefHeloper invoking Check argument registers for reg " << reg<<" function "<getName()); + std::unordered_set inter_result; + getArgumentsPassedToFunction(func, reg, inter_result); // must also return std::vector& a, + const std::unordered_set& b, + BinaryOpType op, + std::unordered_set& out) + { + LOG(1, "Res1.size "< result; - reg_count++; - tuple tup1(state->getInstruction(), reg); - visitedStates.push_back(tup1); - - bool stop = true; - set> vs; - stack> pending; - tuple pending_tup(state,reg); - pending.push(pending_tup); - do - { - auto st_top = pending.top(); - UDState* state; - int reg; - tie(state, reg) = st_top; - pending.pop(); - if(vs.find({state, reg}) != vs.end()) + else if(op == BinaryOpType::SUB) { - break; + LOG(1, "SUB"); } - state->dumpState(); - tuple vstup(state, reg); - vs.insert(vstup); - stop = true; - bool defFlag = false; - - if(auto def = state->getRegDef(reg)) + for (const auto& res1 : a) { - defFlag = true; - TreeCapture cap; - if( MemoryAddress1::matches(def, cap) || MemoryAddress2::matches(def, cap) || MemoryAddress5::matches(def, cap)) + for (const auto& res2 : b) { - MemLocation loadLoc(cap.get(0)); - bool memFlag = false; - for(auto& ms : state->getMemRef(reg)) + if (res1.type == 0 || res2.type == 0) { - bool flag = false; - memFlag = true; - ms->dumpState(); - for(auto& mem : ms->getMemDefList()) - { - flag = true; - MemLocation storeLoc(mem.second); - if(loadLoc == storeLoc) - { - bool refFlag = false; - for(auto& mss : ms->getRegRef(mem.first)) - { - refFlag = true; - stop = false; - tuple pending_tup(mss,mem.first); - pending.push(pending_tup); - - } - if(!refFlag) - { - if(mem.first == 7 || mem.first == 6 || mem.first == 2 || mem.first == 1 || mem.first == 8 || mem.first == 9) //Argument registers? - { - getArgumentsPassedToFunction(func, mem.first, result); - } - else - { - UDResult res {0, 0, "unknown", func}; - if(find(result.begin(), result.end(), res) == result.end()) - { - result.push_back(res); - } - } - } - } - } - if(!flag) - { - UDResult res {0, 0, "unknown", func}; - if(find(result.begin(), result.end(), res) == result.end()) - { - result.push_back(res); - } - } + pushUnknownIfNeeded(out); + + } + else if (res1.type == 1 && res2.type == 1) + { + address_t new_val = (op == BinaryOpType::ADD) + ? res1.addr + res2.addr + : res1.addr - res2.addr; + std::stringstream sstream; + sstream << "0x" << std::hex << new_val; + std::string new_str = sstream.str(); + out.emplace(1, new_val, new_str, func); + } + else + { + std::string op_str = (op == BinaryOpType::ADD) ? " + " : " - "; + std::string new_str = res1.desc + op_str + res2.desc; + out.emplace(3, 0, new_str, func); } - if(!memFlag) - { - auto node1 = loadLoc.getRegTree(); - auto offset = loadLoc.getOffset(); - bool refFlag = false; - if(auto regnode = dynamic_cast(node1)) - { - auto reg2 = regnode->getRegister(); - vector inter_res; - for(auto& rs : state->getRegRef(reg2)) - { - refFlag = true; - findRegDef(func, rs, reg2, inter_res); - } - if(refFlag) - { - for(auto res1 : inter_res) - { - if(res1.type == 0) - { - result.push_back(res1); - continue; - } - if(offset == 0 && res1.type == 1) - { - std::stringstream sstream; - sstream << "val(0x" << std::hex << res1.addr<<")"; - std::string new_str = sstream.str(); - UDResult res{3, 0, new_str, func}; - result.push_back(res); - continue; - } - std::stringstream sstream; - sstream << "val(" << res1.desc << " + 0x" << std::hex << offset <<")"; - std::string new_str = sstream.str(); - UDResult res{3, 0, new_str, func}; - result.push_back(res); - - } - } - } - if(!refFlag) - { - UDResult res {0, 0, "unknown", func}; - if(find(result.begin(), result.end(), res) == result.end()) - { - result.push_back(res); - } - } - } } - else if(MemoryAddress3::matches(def, cap)) + } + }; + + enum class ConstOpType { ADD, SUB }; + auto combineWithConstant = [&](const std::unordered_set& vals, + address_t constant, + ConstOpType op, + std::unordered_set& out) + { + LOG(1, "Res1.size "<(cap.get(0))->getRegister(); - auto const_val = dynamic_cast(cap.get(1))->getValue(); - vector inter_result; - bool refFlag = false; - bool unknown = true; - for(auto & rf : state->getRegRef(reg1)) - { - refFlag = true; - findRegDef(func, rf, reg1, inter_result); - unknown = false; - } - if(!refFlag) + LOG(1,"Combine with constant"); + pushUnknownIfNeeded(out); + } + else if (udres.type == 1) + { + address_t new_val = (op == ConstOpType::ADD) + ? udres.addr + constant + : udres.addr - constant; + std::stringstream sstream; + sstream << "0x" << std::hex << new_val; + out.emplace(1, new_val, sstream.str(), func); + } + else + { + std::stringstream sstream; + if (op == ConstOpType::ADD) { - if(reg1 == 7 || reg1 == 6 || reg1 == 2 || reg1 == 1 || reg1 == 8 || reg1 == 9) //Argument registers? - { - getArgumentsPassedToFunction(func, reg1, inter_result); - unknown = false; - } - else - { - UDResult res {0, 0, "unknown", func}; - if(find(result.begin(), result.end(), res) == result.end()) - { - result.push_back(res); - } - } - } - if(!unknown) + sstream << std::hex << udres.desc << " + 0x" << constant; + } + else { - for(auto udres : inter_result) - { - if(udres.type == 0) - { - UDResult res {0, 0, "unknown", func}; - if(find(result.begin(), result.end(), res) == result.end()) - { - result.push_back(res); - } - - } - else if(udres.type == 1) - { - auto new_val = const_val + udres.addr; - std::stringstream sstream; - sstream << "0x" << std::hex << new_val; - std::string new_str = sstream.str(); - - UDResult res {1, new_val, new_str, func}; - result.push_back(res); - } - else if(udres.type == 2 || udres.type == 3) - { - std::stringstream sstream; - sstream << "0x" << std::hex << const_val << " + " << udres.desc; - std::string new_str = sstream.str(); - UDResult res {3, 0, new_str, func}; - result.push_back(res); - } - } + sstream << udres.desc << " - 0x" << std::hex << constant; } - + out.emplace(3, 0, sstream.str(), func); } - else if(MemoryAddress4::matches(def, cap)) + } + }; + + auto resolveMemRefsForState = [&](Function* func, UDState* memState, const MemLocation& loadLoc, std::unordered_set& out) { + bool foundMatch = false; + for (auto& mem : memState->getMemDefList()) + { + MemLocation storeLoc(mem.second); + if (loadLoc == storeLoc) { - auto reg1 = dynamic_cast(cap.get(0))->getRegister(); - auto const_val = dynamic_cast(cap.get(1))->getValue(); - vector inter_result; + foundMatch = true; bool refFlag = false; - bool unknown = true; - for(auto & rf : state->getRegRef(reg1)) + + if(!memState->getRegRef(mem.first).empty()) { refFlag = true; - findRegDef(func, rf, reg1, inter_result); - unknown = false; - } - if(!refFlag) - { - if(reg1 == 7 || reg1 == 6 || reg1 == 2 || reg1 == 1 || reg1 == 8 || reg1 == 9) //Argument registers? + LOG(1, "resolveMemRefs invoking resolveRegister"); + resolveRegister(mem.first, memState, func, out); + if (out.empty()) { - getArgumentsPassedToFunction(func, reg1, inter_result); - unknown = false; - } - else - { - UDResult res {0, 0, "unknown", func}; - if(find(result.begin(), result.end(), res) == result.end()) - { - result.push_back(res); - } - } + LOG(1, "mem ref empty"); + pushUnknownIfNeeded(out); + } } - if(!unknown) + if (!refFlag) { - for(auto udres : inter_result) + if (isArgRegister(mem.first)) { - if(udres.type == 0) - { - UDResult res {0, 0, "unknown", func}; - if(find(result.begin(), result.end(), res) == result.end()) - { - result.push_back(res); - } - } - else if(udres.type == 1) - { - auto new_val = udres.addr - const_val; - std::stringstream sstream; - sstream << "0x" << std::hex << new_val; - std::string new_str = sstream.str(); - UDResult res {1, new_val, new_str, func}; - result.push_back(res); - //cout<<"DESC "<getName()<<" "<getAddress()<getName()<<" "<getAddress()< inter_result; + LOG(1, "resolveMemRefs invoking getArguments"); + getArgumentsPassedToFunction(func, mem.first,inter_result); + out.insert(inter_result.begin(), inter_result.end()); + } + else + { + LOG(1,"No refFlag and no args"); + pushUnknownIfNeeded(out); } } } - else if(ConstantValue::matches(def, cap)) + } + if (!foundMatch) + { + LOG(1,"No matching memory definition"); + pushUnknownIfNeeded(out); + } + }; + + //Main logic + if(auto def = state->getRegDef(reg)) //If there is a register definition for reg in this instruction + { + TreeCapture cap; + if (RIPValue::matches(def, cap)) + { + handleRIPValue(cap); + } + else if (RIPDerefValue::matches(def, cap)) + { + handleRIPDerefValue(cap); + } + else if (RegisterSum::matches(def, cap)) + { + LOG(1, "Register sum"); + + auto reg1 = dynamic_cast(cap.get(0))->getRegister(); + auto reg2 = dynamic_cast(cap.get(1))->getRegister(); + + std::unordered_set inter_res1; + std::unordered_set inter_res2; + + resolveRegister(reg1, state, func, inter_res1); + resolveRegister(reg2, state, func, inter_res2); + + if (inter_res1.empty() && inter_res2.empty()) + { + + LOG(1, "DFD ? (NO REGREF) FUNC : " << func->getName() + << std::hex << " " << instr->getAddress() + << " " << std::dec << reg1 << " " << reg2); + pushUnknownIfNeeded(result); + + } + else + { + + LOG(1, "Sum of registers"); + combineResults(inter_res1, inter_res2, BinaryOpType::ADD, result); + } + } + else if (RegisterDifference::matches(def, cap)) + { + LOG(1, "Register Difference"); + + auto reg1 = dynamic_cast(cap.get(0))->getRegister(); + auto reg2 = dynamic_cast(cap.get(1))->getRegister(); + + std::unordered_set inter_res1; + std::unordered_set inter_res2; + + resolveRegister(reg1, state, func, inter_res1); + resolveRegister(reg2, state, func, inter_res2); + + if (inter_res1.empty() && inter_res2.empty()) + { + + LOG(1, "DFD ? (NO REGREF) FUNC: " << func->getName() + << " " << std::hex << instr->getAddress() + << " " << std::dec << reg1 << " " << reg2); + pushUnknownIfNeeded(result); + + } + else { - auto const_val = dynamic_cast(cap.get(0))->getValue(); + LOG(1, "Diff of results (nested loop)"); + combineResults(inter_res1, inter_res2, BinaryOpType::SUB, result); + } + } + else if (RegisterValue::matches(def, cap)) + { + + LOG(1, "Matches register value"); + auto reg1 = dynamic_cast(cap.get(0))->getRegister(); + resolveRegister(reg1, state, func, result); + + if (result.empty()) + { + LOG(1,"No ref found and not an argument register"); + pushUnknownIfNeeded(result); + } + } + else if (ConstantValue::matches(def, cap)) + { + + LOG(1, "Matches constant value"); + auto const_node = dynamic_cast(cap.get(0)); + + if(!const_node) + { + LOG(1, "Unknown CONST NODE"); + pushUnknownIfNeeded(result); + } + else + { + address_t const_val = const_node->getValue(); std::stringstream sstream; sstream << "0x" << std::hex << const_val; - std::string new_str = sstream.str(); - UDResult res {1, const_val, new_str, func}; - result.push_back(res); - //cout<<"DESC "<getName()<<" "<getAddress()<(cap.get(0))->getRegister(); + auto const_node = dynamic_cast(cap.get(1)); + + if(!const_node) { - auto reg1 = dynamic_cast(cap.get(0))->getRegister(); - bool refFlag = false; - for(auto & rf : state->getRegRef(reg1)) + LOG(1, "Unknown CONST NODE"); + pushUnknownIfNeeded(result); + } + else + { + address_t const_val = const_node->getValue(); + std::unordered_set inter_result; + resolveRegister(reg1, state, func, inter_result); + + if (inter_result.empty()) { - refFlag = true; - stop = false; - tuple pending_tup(rf,reg1); - pending.push(pending_tup); + LOG(1, "CAUTION UNKNOWN MEMDEF @ " << std::hex << state->getInstruction()->getAddress()); + pushUnknownIfNeeded(result); - } - if(!refFlag) + } + else { - if(reg1 == 7 || reg1 == 6 || reg1 == 2 || reg1 == 1 || reg1 == 8 || reg1 == 9) //Argument registers? - { - getArgumentsPassedToFunction(func, reg1, result); - } - else - { - UDResult res {0, 0, "unknown", func}; - if(find(result.begin(), result.end(), res) == result.end()) - { - result.push_back(res); - } - } + LOG(1, "Resolved value found"); + combineWithConstant(inter_result, const_val, ConstOpType::ADD, result); } + } + } + else if (MemoryAddress4::matches(def, cap)) + { + LOG(1, "Matches MemoryAddress4"); + auto reg1 = dynamic_cast(cap.get(0))->getRegister(); + auto const_node = dynamic_cast(cap.get(1)); + + if(!const_node) + { + LOG(1, "Unknown CONST NODE"); + pushUnknownIfNeeded(result); } - else if(RegisterSum::matches(def,cap)) + else { - auto reg1 = dynamic_cast(cap.get(0))->getRegister(); - auto reg2 = dynamic_cast(cap.get(1))->getRegister(); - vector inter_res1, inter_res2; - bool refFlag1 = false; - for(auto & rf : state->getRegRef(reg1)) + address_t const_val = const_node->getValue(); + std::unordered_set inter_result; + resolveRegister(reg1, state, func, inter_result); + + if (inter_result.empty()) { - refFlag1 = true; - findRegDef(func, rf, reg1, inter_res1); - } - bool refFlag2 = false; - for(auto & rf : state->getRegRef(reg2)) + LOG(1, "CAUTION UNKNOWN MEMDEF @ " << std::hex << state->getInstruction()->getAddress()); + pushUnknownIfNeeded(result); + } + else { - refFlag2 = true; - findRegDef(func, rf, reg2, inter_res2); + LOG(1, "Resolved value found"); + combineWithConstant(inter_result, const_val, ConstOpType::SUB, result); } - if(!refFlag1) + } + } + else if (MemoryAddress1::matches(def, cap) || MemoryAddress2::matches(def, cap) || MemoryAddress5::matches(def, cap)) + { + + LOG(1, "Matches memoryaddress 1,2,5"); + MemLocation loadLoc(cap.get(0)); + std::set storeNodes; + for (auto& memState : state->getMemRef(reg)) + { + for (auto& mem : memState->getMemDefList()) { - if(reg1 == 7 || reg1 == 6 || reg1 == 2 || reg1 == 1 || reg1 == 8 || reg1 == 9) //Argument registers? + MemLocation storeLoc(mem.second); + if (loadLoc == storeLoc) { - getArgumentsPassedToFunction(func, reg1, inter_res1); - refFlag1 = true; + + storeNodes.insert(memState); + LOG(1, "Adding store state " << std::hex << memState->getInstruction()->getAddress()); } } - if(!refFlag2) + } + + std::vector matchingStores; + if (!storeNodes.empty()) + { + + LOG(1, "Finding matching stores for load at " << func->getName() << " @ " << std::hex << state->getInstruction()->getAddress()); + matchingStores = matchLoadMemoryToStores(func, state, storeNodes); + } + + bool memFlag = false; + for (auto& ms : state->getMemRef(reg)) + { + if (std::find(matchingStores.begin(), matchingStores.end(), ms) == matchingStores.end()) { - if(reg2 == 7 || reg2 == 6 || reg2 == 2 || reg2 == 1 || reg2 == 8 || reg2 == 9) //Argument registers? - { - getArgumentsPassedToFunction(func, reg2, inter_res2); - refFlag2 = true; - } + + LOG(1, "##### NOT a matching store instruction"); + continue; } - if(!refFlag1 && !refFlag2) + + LOG(1, "Finding memory references @ " << std::hex << ms->getInstruction()->getAddress()); + memFlag = true; + resolveMemRefsForState(func, ms, loadLoc, result); + } + if (!memFlag) + { + LOG(1, "CAUTION UNKNOWN MEMREF @ " << std::hex << state->getInstruction()->getAddress()); + + auto regTree = loadLoc.getRegTree(); + auto offset = loadLoc.getOffset(); + if(auto regNode = dynamic_cast(regTree)) + { + auto memReg = regNode->getRegister(); + std::unordered_set inter_result; + resolveRegister(memReg, state, func, inter_result); + if(inter_result.empty()) { - //cout<<"DFD ? (NO REGREF) FUNC : "<getName()<getAddress()<<" "<getName()<<" "<getAddress()<getInstruction()->getAddress()); + pushUnknownIfNeeded(result); } else { - for(auto res1 : inter_res1) + for(auto res : inter_result) { - for(auto res2 : inter_res2) + if(res.type == 0) { - if(res1.type == 0 || res2.type == 0) - { - UDResult res {0, 0, "unknown", func}; - //cout<<"DESC unknown "<getName()<<" "<getAddress()<getName()<<" "<getAddress()<getName()<<" "<getAddress()<getAddress()); + + auto pushReturnVals = [&](Function* targetFunc) { - auto reg1 = dynamic_cast(cap.get(0))->getRegister(); - auto reg2 = dynamic_cast(cap.get(1))->getRegister(); - vector inter_res1, inter_res2; - bool refFlag1 = false; - for(auto & rf : state->getRegRef(reg1)) - { - refFlag1 = true; - findRegDef(func, rf, reg1, inter_res1); - } - bool refFlag2 = false; - for(auto & rf : state->getRegRef(reg2)) - { - refFlag2 = true; - findRegDef(func, rf, reg2, inter_res2); - } - if(!refFlag1) + LOG(1, "NO DEF FLAG " << std::hex << instr->getAddress() << " targets " << targetFunc->getName()); + + auto target_ipnode = ip_callgraph->getNode(targetFunc); + if (target_ipnode != nullptr) { - if(reg1 == 7 || reg1 == 6 || reg1 == 2 || reg1 == 1 || reg1 == 8 || reg1 == 9) //Argument registers? + auto returnvals = target_ipnode->getATReturn(); + for (auto r : returnvals) { - getArgumentsPassedToFunction(func, reg1, inter_res1); - refFlag1 = true; + foundreturnAT = true; + std::stringstream sstream; + sstream << "0x" << std::hex << r->getAddress(); + std::string new_str = sstream.str(); + result.emplace(1, r->getAddress(), new_str, func); + LOG(1, "RETVAL " << std::hex << instr->getAddress() << " " << r->getAddress() << " " << r->getName()); } } - if(!refFlag2) + }; + + if (auto cfi = dynamic_cast(instr->getSemantic())) + { + auto link = cfi->getLink(); + auto target = link->getTarget(); + + if (auto func_target = dynamic_cast(target)) { - if(reg2 == 7 || reg2 == 6 || reg2 == 2 || reg2 == 1 || reg2 == 8 || reg2 == 9) //Argument registers? - { - getArgumentsPassedToFunction(func, reg2, inter_res2); - refFlag2 = true; - } + pushReturnVals(func_target); } - if(!refFlag1 && !refFlag2) + else if (auto plt = dynamic_cast(target)) // Call via PLT { - //cout<<"DFD ? (NO REGREF) FUNC : "<getName()<getAddress()<<" "<(plt->getTarget())) { - result.push_back(res); + pushReturnVals(ext_target); } - //cout<<"DESC unknown "<getName()<<" "<getAddress()<getName()<<" "<getAddress()<getName()<<" "<getAddress()<getName()<<" "<getAddress()<(cap.get(0)); - auto dispTree = dynamic_cast(cap.get(1)); - auto new_val = dispTree->getValue() + ripTree->getValue(); - std::stringstream sstream; - sstream << "0x" << std::hex << new_val; - std::string new_str = sstream.str(); - UDResult res {1, new_val, new_str, func}; - result.push_back(res); - //cout<<"DESC "<getName()<<" "<getAddress()<(cap1.get(0)); - auto dispTree = dynamic_cast(cap1.get(1)); - auto new_val = dispTree->getValue() + ripTree->getValue(); - std::stringstream sstream; - sstream << "val(0x" << std::hex << new_val << ")"; - std::string new_str = sstream.str(); - UDResult res {2, new_val, new_str, func}; - result.push_back(res); - } } - } - - - if(!defFlag) - { - bool foundreturnAT = false; - if(analysisType == 1) //Only for indirect call analysis - { - state->dumpState(); - - if(auto cfi = dynamic_cast(instr->getSemantic())) - { - auto link = cfi->getLink(); - auto target = link->getTarget(); - if(auto func_target = dynamic_cast(target)) - { - auto target_ipnode = ip_callgraph->getNode(func_target); - if(target_ipnode != NULL) - { - auto returnvals = target_ipnode->getATReturn(); - for(auto r : returnvals) - { - foundreturnAT = true; - std::stringstream sstream; - sstream << "0x" << std::hex << r->getAddress(); - std::string new_str = sstream.str(); - UDResult res {1, r->getAddress(), new_str, func}; - result.push_back(res); - - } - } - } - else if(auto plt = dynamic_cast(target)) //Call to a library function called through PLt - { - if (auto ext_target = dynamic_cast(plt->getTarget())) - { - auto target_ipnode = ip_callgraph->getNode(func_target); - if(target_ipnode != NULL) { - auto returnvals = target_ipnode->getATReturn(); - - for(auto r : returnvals) - { - foundreturnAT = true; - std::stringstream sstream; - sstream << "0x" << std::hex << r->getAddress(); - std::string new_str = sstream.str(); - UDResult res {1, r->getAddress(), new_str, func}; - result.push_back(res); - } - } - } - } - - } - } - if(!foundreturnAT) - { - //cout<<"DFD ? (NO REGDEF) FUNC : "<getName()<getAddress()<<" "<getAddress(), INT_MIN); - instr->getSemantic()->accept(&instrDumper); - //cout<getName()<<" "<getAddress()<>().swap(vs); - final_result.insert(final_result.end(), result.begin(), result.end()); - - auto it1 = find(visitedStates.begin(), visitedStates.end(), tup1); - if(it1 != visitedStates.end()) + } + if(!foundreturnAT) { - visitedStates.erase(it1); + LOG(1,"DFD ? (NO REGDEF) FUNC : "<getName()<getAddress()<<" "<getName(), func->getAddress(), instr->getAddress(), reg, result}; - found_state_results.push_back(ir1); - + } + + + + //Cleanup logic + auto it1 = find(visitedStates.begin(), visitedStates.end(), visited_tup); + if(it1 != visitedStates.end()) + { + visitedStates.erase(it1); + } + recursion_iter--; + LOG(1,"Iter "<getName()<<" "<getName()<<" MODULE : " <<(res.func)->getParent()->getParent()->getName()<getName()<<" MODULE : " <<(res.func)->getParent()->getParent()->getName()); } @@ -876,173 +985,186 @@ string SyspartUtility::getFunctionName(address_t addr) bool SyspartUtility::findIndirectCallTargets(IPCallGraphNode* n) { + recursion_iter++; auto function = n->getFunction(); bool resolvedFlag = false; auto working = df.getWorkingSet(function); - for(auto block : CIter::children(function)) { - for(auto instruction : CIter::children(block)) { + + auto pushUnknownIfNeeded = [&](std::unordered_set& results, Function* function) + { + LOG(1, "Unknown value at "<getName()); + UDResult unknownRes{0, 0, "unknown", function}; + results.insert(unknownRes); + }; + + auto isArgRegister = [](int reg) + { + return reg == 7 || reg == 6 || reg == 2 || reg == 1 || reg == 8 || reg == 9; + }; + + auto resolveRegister = [&](int reg, UDState* state, Function* func, std::unordered_set& results) + { + bool refFlag = false; + for (auto& rf : state->getRegRef(reg)) + { + refFlag = true; + findRegDef(func, rf, reg, results); + LOG(1,"Resolving register "< + refFlag = true; + } + else + { + LOG(1, "No reg ref and arguments registers"); + pushUnknownIfNeeded(results, func); + } + } + }; + + + + for(auto block : CIter::children(function)) + { + for(auto instruction : CIter::children(block)) + { auto semantic = instruction->getSemantic(); auto state = working->getState(instruction); - auto ici = dynamic_cast(semantic); - auto iji = dynamic_cast(semantic); + auto ici = dynamic_cast(semantic); + auto iji = dynamic_cast(semantic); - if(iji) - { - if(iji->isForJumpTable()) - { - continue; - } - } - - if(ici || iji) { - //cout<<"Enter"< icTargets; - cur_function = function; - cur_instr = instruction; - auto continueFlag = true; - if(iter > 1) //Not the first iteration + if (iji && iji->isForJumpTable()) + { + continue; + } + + if (!(ici || iji)) + { + continue; + } + + vector icTargets; + cur_function = function; + cur_instr = instruction; + auto continueFlag = true; + if (iter > 1) + { + continueFlag = false; + LOG(1, "Unsetting continue flag for "<getName()<<" "<getAddress()); + vector fp_vec; + FPath fp{cur_function->getName(), cur_instr->getAddress(), fp_vec}; + auto icPath_iter = find(icPath.begin(), icPath.end(), fp); + if (icPath_iter != icPath.end()) { - continueFlag = false; - vector fp_vec; - FPath fp{cur_function->getName(),cur_instr->getAddress(), fp_vec}; - auto icPath_iter = find(icPath.begin(), icPath.end(), fp); - if(icPath_iter != icPath.end()) + auto& path_set = (*icPath_iter).path; + for (auto p : path_set) { - - auto path_set = (*icPath_iter).path; - for(auto p : path_set) + for (auto r : prev_resolvedFns) { - for(auto r : prev_resolvedFns) + if (r->getName() == p->getName() && r->getAddress() == p->getAddress()) { - if((r->getName() == p->getName()) && (r->getAddress() == p->getAddress())) - { - continueFlag = true; - path_set.clear(); - path_set.push_back(cur_function); - (*icPath_iter).path = path_set; - break; - } - } - if(continueFlag) + continueFlag = true; + LOG(1, "Setting continue flag for "<getName()<<" "<getAddress()); + path_set.clear(); + path_set.push_back(cur_function); + (*icPath_iter).path = path_set; break; + } + } + if (continueFlag) + { + break; } } } - else - { - vector fp_vec; - fp_vec.push_back(cur_function); - FPath fp{cur_function->getName(),cur_instr->getAddress(), fp_vec}; - icPath.push_back(fp); - } + } + else + { + vector fp_vec; + fp_vec.push_back(cur_function); + FPath fp{cur_function->getName(),cur_instr->getAddress(), fp_vec}; + icPath.push_back(fp); + } - if(!continueFlag) - continue; - if(n->isIcallResolved(instruction->getAddress())) - continue; - if(ici) - ici->clearAllTargets(); - InstrDumper instrDumper(instruction->getAddress(), INT_MIN); - instruction->getSemantic()->accept(&instrDumper); - - stack_depth = 0; - if(ici && ici->hasMemoryOperand()) - { + if(!continueFlag) + { + LOG(1, "Continuing... Flag set"); + continue; + } + if(n->isIcallResolved(instruction->getAddress())) + { + LOG(1, "Continuing... Already resolved address"<getAddress()); + continue; + } + + if(ici) + { + ici->clearAllTargets(); + } + + stack_depth = 0; + std::unordered_set results; + if(ici && ici->hasMemoryOperand()) + { IndirectCallTarget target(instruction->getAddress()); target.setUnknown(); icTargets.push_back(target); - } - else - { - int reg; - if(ici) - { - reg = X86Register::convertToPhysical(ici->getRegister()); - } - else if(iji) - { - reg = X86Register::convertToPhysical(iji->getRegister()); - } - - DisasmHandle handle(true); - - vector results; - bool refFlag = false; - for(auto& s : state->getRegRef(reg)) - { - refFlag = true; - findRegDef(function, s, reg, results); - } - if(!refFlag) + } + else + { + int reg; + if(ici) + { + reg = X86Register::convertToPhysical(ici->getRegister()); + } + else if(iji) + { + reg = X86Register::convertToPhysical(iji->getRegister()); + } + resolveRegister(reg, state, function, results); + + for (auto& r : results) { - if(reg == 7 || reg == 6 || reg == 2 || reg == 1 || reg == 8 || reg == 9) //Argument registers? + printResult(r); + if(r.type == 0 | r.type == 3) { - getArgumentsPassedToFunction(function, reg, results); - } - else - { - UDResult res {0, 0, "unknown", function}; - if(find(results.begin(), results.end(), res) == results.end()) - { - results.push_back(res); - } + IndirectCallTarget target(r.addr); + target.setUnknown(); + icTargets.push_back(target); + continue; } - } - - for(auto r : results) + if(r.type == 1) { - if(r.type == 0 | r.type == 3) - { - IndirectCallTarget target(r.addr); - target.setUnknown(); - //ici->addIndirectCallTarget(target); - icTargets.push_back(target); - continue; - } - auto mod = (Module*)(r.func)->getParent()->getParent(); - auto addr_in_egalito = r.addr + mod->getBaseAddress(); - auto ds = mod->getDataRegionList()->findDataSectionContaining(addr_in_egalito); - if(ds != NULL) + IndirectCallTarget target(r.addr); + auto func_name = getFunctionName(r.addr); + if(!func_name.empty()) { - auto dsType = ds->getType(); - - + target.setName(func_name); } - else + else if(r.addr != 0) { - //cout<<"UNMATCHED_DATASECTION"; - } - if(r.type == 1) - { - IndirectCallTarget target(r.addr); - auto func_name = getFunctionName(r.addr); - if(!func_name.empty()) - { - target.setName(func_name); - } - else if(r.addr != 0) - { - target.setGlobal(); - } - //ici->addIndirectCallTarget(target); - icTargets.push_back(target); - } - else - { - IndirectCallTarget target(r.addr); - target.setUnknown(); - //ici->addIndirectCallTarget(target); - icTargets.push_back(target); + target.setGlobal(); } + icTargets.push_back(target); + } + else + { + IndirectCallTarget target(r.addr); + target.setUnknown(); + icTargets.push_back(target); } + } } //Add resolved indirect calls - //auto icTargets = ici->getIndirectCallTargets(); bool resolved = true; set targets; if(icTargets.size() == 0) @@ -1085,7 +1207,7 @@ bool SyspartUtility::findIndirectCallTargets(IPCallGraphNode* n) resolvedFlag = true; } - } + } } @@ -1095,14 +1217,14 @@ bool SyspartUtility::findIndirectCallTargets(IPCallGraphNode* n) vector>().swap(visitedFuncRegs); found_results.clear(); found_state_results.clear(); - + recursion_iter--; return resolvedFlag; } -vector SyspartUtility::getFunctionByAddress(address_t addr, Module* mod) +set SyspartUtility::getFunctionByAddress(address_t addr, Module* mod) { - vector func_set; + set func_set; bool found = false; for(auto func : CIter::functions(mod)) @@ -1117,10 +1239,12 @@ vector SyspartUtility::getFunctionByAddress(address_t addr, Module* m flag = false; } if(flag) //No duplicates found - func_set.push_back(func); + { + func_set.insert(func); + } } } - + if(!found) { for(auto module : CIter::children(program)) @@ -1138,7 +1262,9 @@ vector SyspartUtility::getFunctionByAddress(address_t addr, Module* m flag = false; } if(flag) //No duplicates found - func_set.push_back(func); + { + func_set.insert(func); + } } } } diff --git a/analysis/app/src/syspartUtility.h b/analysis/app/src/syspartUtility.h index eb03b1a..cf93c25 100644 --- a/analysis/app/src/syspartUtility.h +++ b/analysis/app/src/syspartUtility.h @@ -2,6 +2,8 @@ #define SYSPARTUTILITY #include +#include + #include "ipcallgraph.h" #include "analysis/usedef.h" @@ -14,7 +16,11 @@ struct UDResult address_t addr; string desc; Function* func; - bool operator==(const UDResult& l) + + UDResult(int t, address_t a, const std::string& d, Function* f) + : type(t), addr(a), desc(d), func(f) {} + + bool operator==(const UDResult& l) const { if((l.type == this->type) && (l.addr == this->addr) && (l.desc == this->desc) && ((l.func)->getName() == (this->func)->getName())) return true; @@ -22,13 +28,42 @@ struct UDResult } }; +//Required for using std::unordered_set +namespace std { + template<> struct hash + { + size_t operator()(const UDResult& k) const + { + + // Get hashes for all four key members + size_t h1 = std::hash()(k.type); + size_t h2 = std::hash()(k.addr); + size_t h3 = std::hash()(k.desc); + size_t h4 = k.func ? std::hash()(k.func->getName()) : 0; + + // Combine them robustly + size_t seed = h1; + seed ^= h2 + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed ^= h3 + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed ^= h4 + 0x9e3779b9 + (seed << 6) + (seed >> 2); // <-- Added hash for func name + + return seed; + } + }; +} + struct Inter_result { string fname; address_t addr; int reg; - vector res; - bool operator==(const Inter_result& l) + std::unordered_set res; + + Inter_result(const std::string& name, address_t a, int r, const std::unordered_set& resultSet) + : fname(name), addr(a), reg(r), res(resultSet) + {} + + bool operator==(const Inter_result& l) const { if((l.fname == this->fname) &&(l.addr == this->addr) && (l.reg == this->reg)) return true; @@ -36,20 +71,69 @@ struct Inter_result } }; +namespace std { + template<> struct hash + { + size_t operator()(const Inter_result& k) const + { + size_t h1 = std::hash()(k.fname); + size_t h2 = std::hash()(k.addr); + size_t h3 = std::hash()(k.reg); + + // Combine the hashes robustly + size_t seed = h1; + seed ^= h2 + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed ^= h3 + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + } + }; +} + struct Intra_result { string fname; address_t faddr; address_t iaddr; int reg; - vector res; - bool operator==(const Intra_result& l) + std::unordered_set res; + + + Intra_result(const std::string& name, address_t func_addr, address_t instr_addr, int r, const std::unordered_set& resultSet) + : fname(name), + faddr(func_addr), + iaddr(instr_addr), + reg(r), + res(resultSet) + {} + + bool operator==(const Intra_result& l) const { if((l.fname == this->fname) &&(l.faddr == this->faddr) && (l.iaddr == this->iaddr) && (l.reg == this->reg)) return true; return false; } + }; +namespace std { + template<> struct hash + { + size_t operator()(const Intra_result& k) const + { + size_t h1 = std::hash()(k.fname); + size_t h2 = std::hash()(k.faddr); + size_t h3 = std::hash()(k.iaddr); + size_t h4 = std::hash()(k.reg); + + // Combine the four hashes robustly + size_t seed = h1; + seed ^= h2 + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed ^= h3 + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed ^= h4 + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + } + }; +} + struct FPath { string fname; @@ -76,19 +160,21 @@ class SyspartUtility } void initialize(); - void getArgumentsPassedToFunction(Function* func, int reg, vector &result); - void findRegDef(Function* func, UDState *state, int reg, vector &final_result); + void getArgumentsPassedToFunction(Function* func, int reg, std::unordered_set& results_out); + void findRegDef(Function* func, UDState *state, int reg, std::unordered_set& results_out); bool findIndirectCallTargets(IPCallGraphNode* n); string getFunctionName(address_t addr); void printResult(UDResult res); - vector getFunctionByAddress(address_t addr, Module* mod); + set getFunctionByAddress(address_t addr, Module* mod); vector new_resolvedFns; vector prev_resolvedFns; int iter=0; private : + int recursion_iter=0; + int MAX_ITER=100; int analysisType; ////0 when passed to find values of argument register, 1 for indirect call target analysis, 2 for others - vector found_results; - vector found_state_results; + std::unordered_set found_results; + std::unordered_set found_state_results; vector> visitedFuncRegs; Program *program=NULL; IPCallGraph *ip_callgraph=NULL; @@ -101,5 +187,9 @@ class SyspartUtility int stack_depth=0; DataFlow df; + void getArgumentsPassedToFunction_helper(Function* func, int reg, std::unordered_set& results_out); + void findRegDef_helper(Function* func, UDState *state, int reg, std::unordered_set& results_out); + vector matchLoadMemoryToStores(Function* func, UDState* orig, set storeNodes); + }; #endif From c71a7af4c652df5f1d0fc386d48469fc99c833b7 Mon Sep 17 00:00:00 2001 From: Vidya Date: Wed, 27 Aug 2025 10:47:07 +0000 Subject: [PATCH 02/14] Scalability update2 : Added helpers in ipcallgraph. Other features : handling forward DFD through memory, ifuncs, AT returned from funcs, tail jumps, ijumps in first basic block --- analysis/app/src/ipcallgraph.cpp | 650 ++++++++++++++++++++----------- analysis/app/src/ipcallgraph.h | 14 +- 2 files changed, 433 insertions(+), 231 deletions(-) diff --git a/analysis/app/src/ipcallgraph.cpp b/analysis/app/src/ipcallgraph.cpp index 31acef4..ab9ba2d 100644 --- a/analysis/app/src/ipcallgraph.cpp +++ b/analysis/app/src/ipcallgraph.cpp @@ -1087,291 +1087,483 @@ void IPCallGraph::findDirectEdges(Function* f) } +Instruction* IPCallGraph::findInstructionInFunction(Function* func, address_t addr) { + for (auto block : CIter::children(func)) { + for (auto instr : CIter::children(block)) { + if (instr->getAddress() == addr) { + return instr; + } + } + } + return NULL; +} + bool IPCallGraph::handleArgumentFnPtr(int reg, Function* f, Instruction* instr, Function* atfunc) { - auto ipnode = getNode(f); - if(f == NULL) + // Check the member cache, which is safer than a static cache. + auto key = std::make_tuple(reg, f, instr); + if (handle_arg_cache.count(key)) { + LOG(1, "$$$ RETURN CACHED RESULT "<getName()<<" "<getName()<<" "<getDirectChildren(); + auto it = direct_children.find(instr->getAddress()); + + if(it == direct_children.end()) + { + LOG(1, "No function invokation within "<getName()<<" @ "<getAddress()); + handle_arg_cache[key] = false; + return false; + } + // Loop through all functions called at this site + for (auto s : it->second) + { + auto ch_func = s->getFunction(); + LOG(1,"FDF ARGS_EXAMINING_FUNC "<getName()<<" "<getName()); + + found = false; + auto working = df.getWorkingSet(ch_func); + + typedef TreePatternCapture>RegisterValue; + + found = false; + for(auto bl : CIter::children(ch_func)) + { + for(auto ins : CIter::children(bl)) + { + auto state = working->getState(ins); + for(auto& def : state->getRegDefList()) + { + TreeCapture cap; + if(RegisterValue::matches(def.second, cap)) + { + auto reg2 = dynamic_cast(cap.get(0))->getRegister(); + if(reg2 == reg) //First instruction where reg is referenced + { + //startState = state; //State found + auto use1 = state->getRegUse(def.first); + auto use2 = state->getRegUse(reg2); + for(auto u : use1) + { + auto tempFlag = searchDownDef(u, def.first, atfunc); + LOG(20, std::hex<getAddress()<<" Received "<getAddress()<<" Received "<getName()<<" "<getName()); + found = true; + break; + } + } + } + if(!found) + { + auto treenode = state->getMemDef(reg); + if(treenode != NULL) //Handling cases when assigned to a memory address + { + //cout<<"WARNING : WRITE TO MEMORY DETECTED @ "<getInstruction()->getAddress()<getName()<<" "<getName()); + InstrDumper instrdumper(ins->getAddress(), INT_MIN); + ins->getSemantic()->accept(&instrdumper); + + //Case 2.1 : Tracking value through memory when memuselist is available + bool enteredFlag = false; + flag = true; + //Get the address corresponding to treenode + for(auto& mu : state->getMemUse(reg)) + { + enteredFlag = true; + auto tempFlag = searchDownDef(mu, -1, atfunc); + flag = tempFlag & flag; + } + state->dumpState(); + if(!enteredFlag) + { + handle_arg_cache[key] = false; + return false; + } + found = true; + + } + LOG(1,"FDF ARGS_NO_REG_USE "<getName()<<" "<getName()); + } + else + break; + } + } + //Check for cases where this argument flows to a call/jmp + //Check for the last instruction of first block + int bl_iter=0; + Instruction* lastinstr = NULL; + bool regFlag = false; + for(auto bl : CIter::children(ch_func)) + { + if(bl_iter > 0) + break; + for(auto ins : CIter::children(bl)) + { + auto st = working->getState(ins); + if(st->getRegDef(reg) != NULL) + { + regFlag = true; + break; + } + lastinstr = ins; + } + if(regFlag) + break; + bl_iter++; + + } + if(!regFlag) + { + if(auto cfi = dynamic_cast(lastinstr->getSemantic())) + { + auto link = cfi->getLink(); + auto target = link->getTarget(); + if(auto func_target = dynamic_cast(target)) //Ends with a direct call/jump + { + //cout<getAddress()<<" targets "<getName()<getName()<<" "<getName()<<" "<getAddress()); + + if(found) + { + lastflag = flag & lastflag; + } + LOG(1,"FDF ARGS_FIRST_CALL_1 "<getName()<<" "<getName()); + handle_arg_cache[key] = lastflag; + return lastflag; + + } + + } + if(auto ici = dynamic_cast(lastinstr->getSemantic())) //If is an argument to indirect call, return false + { + handle_arg_cache[key] = false; + return false; + } + } + if(found) + break; + //Case 2.2 : Checks for tail jump + lastinstr = NULL; + for(auto bl : CIter::children(ch_func)) + { + for(auto ins : CIter::children(bl)) + { + lastinstr = ins; + } + } + + auto lastinstrSemantic = lastinstr->getSemantic(); + if(auto cfi = dynamic_cast(lastinstr->getSemantic())) + { + auto link = cfi->getLink(); + auto target = link->getTarget(); + if (auto func_target = dynamic_cast(target)) + { + LOG(1,"TAIL JUMP @ "<getName()<getAddress()); + auto lastflag = handleArgumentFnPtr(reg, ch_func, lastinstr, atfunc); + return lastflag; + } + } + + //Check if the reg value is used in any indirect jumps + for(auto bl : CIter::children(ch_func)) + { + for(auto ins : CIter::children(bl)) + { + auto ins_semantic = ins->getSemantic(); + if(auto ij = dynamic_cast(ins_semantic)) + { + if(ij->getRegister() != X86_REG_RIP) + { + int jmp_reg = X86Register::convertToPhysical(ij->getRegister()); + + if(reg == jmp_reg) + { + auto ijump_state = working->getState(ins); + auto ijump_ref_reg = ijump_state->getRegRef(reg); + if(ijump_ref_reg.size() == 0) //This handles when AT is passed as argument to a function and is directly used in ijump. There won't be any reg references for that register. It is done in this way because usedef chain cannot capture the first use of register in ijump + { + found = true; + LOG(1,"FDF USED_IN_IJUMP "<getName()<<" used in ijump "<getName()<<" @ instr "<getAddress()); + handle_arg_cache[key] = true; + return true; + } + } + } + } + + } + } + + + + } + + if(!found) + { + LOG(1,"FDF ARGS_NO_REG_USE "<getName()); + handle_arg_cache[key] = false; + return false; + } + // Store result in the member cache and return. + handle_arg_cache[key] = flag; + return flag; +} + +bool IPCallGraph::handleReturnInstruction(UDState* state, int reg1, Function* atfunc) +{ + auto instr = state->getInstruction(); + auto cur_func = (Function*)instr->getParent()->getParent(); + if(reg1 != 0) { - return false; + LOG(1,"FDF NO_USE_END_OF_FUNC "<getName()<<" "<getName()); + return true; } - auto dir_ch = ipnode->getDirectChildren(); - auto dir_it = dir_ch.find(instr->getAddress()); + bool flag = true; - bool found = false; - if(dir_it != dir_ch.end()) + LOG(1,"FDF ENDS_IN_RETURN "<getName()<<" "<getName()); + if(cur_func->isIFunc()) { - auto dir_ch_set = dir_it->second; - for(auto s : dir_ch_set) - { - auto ch_func = s->getFunction(); - auto graph = new ControlFlowGraph(ch_func); - auto config = new UDConfiguration(graph); - auto working = new UDRegMemWorkingSet(ch_func, graph); - auto usedef = new UseDef(config, working); - SccOrder order(graph); - order.genFull(0); - usedef->analyze(order.get()); - - UDState* startState=NULL; - typedef TreePatternCapture>RegisterValue; - - found = false; - for(auto bl : CIter::children(ch_func)) - { - for(auto ins : CIter::children(bl)) - { - auto state = working->getState(ins); - for(auto& def : state->getRegDefList()) - { - TreeCapture cap; - if(RegisterValue::matches(def.second, cap)) - { - - auto reg2 = dynamic_cast(cap.get(0))->getRegister(); - - if(reg2 == reg) //First instruction where reg is referenced - { - startState = state; //State found - auto use1 = state->getRegUse(def.first); - auto use2 = state->getRegUse(reg2); - for(auto u : use1) - { - auto tempFlag = searchDownDef(u, def.first, atfunc); - LOG(20, std::hex<getAddress()<<" Received "<getAddress()<<" Received "<getMemDef(reg); - if(treenode != NULL) //Assigned to a memory address, return false - { - return false; - } - } - else - break; + LOG(1,cur_func<<" is an IFUNC. Returning"); + return 0; + } + auto ipnode = getNode(cur_func); + if (ipnode == NULL) + { + LOG(1, "Could not find call graph node for function " << cur_func->getName()); + return false; + } + ipnode->addATReturn(atfunc); + for (const auto& parent_call_info : ipnode->getParentWithType()) + { + address_t call_site_address; + IPCallGraphNode* caller_node; + tie(call_site_address, caller_node) = parent_call_info.first; + + bool ptype = parent_call_info.second; + + auto caller_func = caller_node->getFunction(); + if(ptype == false) + continue; + Instruction* call_instr = findInstructionInFunction(caller_func, call_site_address); + + auto pworking = df.getWorkingSet(caller_func); + auto caller_state = pworking->getState(call_instr); + auto tempflag = searchDownDef(caller_state, reg1, atfunc); + flag = flag & tempflag; - } - if(found) - break; - } - } } - if(!found) - return false; return flag; } -bool IPCallGraph::searchDownDef(UDState* state, int reg1, Function* atfunc) +bool IPCallGraph::handleIcallOrIjump(UDState* state, int reg1, Function* atfunc) { - - for(auto v : visited_states) - { - Instruction* ins; - int rr; - tie(ins, rr) = v; - if((ins == state->getInstruction()) && (rr == reg1)) - return true; - } - tuple tup1(state->getInstruction(),reg1); - visited_states.insert(tup1); + auto instr = state->getInstruction(); auto cur_func = (Function*)instr->getParent()->getParent(); - if(dynamic_cast(instr->getSemantic())) //DF ends in a return statement - { - if(reg1 == 0) //RAX - { - bool flag = true; - //cout<<"FDF ENDS_IN_RETURN "<getName()<<" "<getName()<addATReturn(atfunc); - if(ipnode != NULL) - { - auto p = ipnode->getParentWithType(); - for(auto pp : p) - { - address_t paddr; - IPCallGraphNode* pnode; - tie(paddr, pnode) = pp.first; - auto pfunc = pnode->getFunction(); - bool ptype = pp.second; - if(ptype == false) - continue; - Instruction* pinstr = NULL; - for(auto bl : CIter::children(pfunc)) - { - for(auto instr : CIter::children(bl)) - { - if(instr->getAddress() == paddr) - { - pinstr = instr; - break; - } - } - } - auto pgraph = new ControlFlowGraph(pfunc); - auto pconfig = new UDConfiguration(pgraph); - auto pworking = new UDRegMemWorkingSet(pfunc, pgraph); - auto pusedef = new UseDef(pconfig, pworking); - SccOrder order(pgraph); - order.genFull(0); - pusedef->analyze(order.get()); - auto pstate = pworking->getState(pinstr); - auto tempflag = searchDownDef(pstate, reg1, atfunc); - //cout<getName()<<" invoked from "<getName()<<" "<getName()<<" "<getName()<(instr->getSemantic())) //DF ends in an indirect call + if(reg1 == 7 || reg1 == 6 || reg1 == 2 || reg1 == 1 || reg1 == 8 || reg1 == 9) //Argument to indirect call? { - if(reg1 == 7 || reg1 == 6 || reg1 == 2 || reg1 == 1 || reg1 == 8 || reg1 == 9) //Argument to indirect call? - { + LOG(1,"FDF ARGS_TO_ICALL "<getName()<<" "<getName()); return false; - } - else //Flowing into an indirect call. Adding as an edge at this indirect call - { - addEdge(instr->getAddress(), (Function*)instr->getParent()->getParent(), atfunc, false); - return true; - } - } - else if(auto iji = dynamic_cast(instr->getSemantic())) //DF ends in an indirect jump tail call recursion + else //Flowing into an indirect call. Adding as an edge at this indirect call { + LOG(1,"FDF USED_IN_ICALL "<getName()<<" used in icall "<getName()<<" @ instr "<getAddress()); + addEdge(instr->getAddress(), cur_func, atfunc, false); + return true; + } +} - if(!iji->isForJumpTable()) - { - if(reg1 == 7 || reg1 == 6 || reg1 == 2 || reg1 == 1 || reg1 == 8 || reg1 == 9) //Argument to indirect call? - { - return false; - } - else //Flowing into an indirect call. Adding as an edge at this indirect call - { - addEdge(instr->getAddress(), (Function*)instr->getParent()->getParent(), atfunc, false); - return true; - } - } +bool IPCallGraph::handleDirectCall(UDState* state, ControlFlowInstruction* cfi, int reg1, Function* atfunc) +{ + auto mnemonic = cfi->getMnemonic(); + auto instr = state->getInstruction(); + auto cur_func = (Function*)instr->getParent()->getParent(); - } - else if(dynamic_cast(instr->getSemantic())) + if(reg1 == 7 || reg1 == 6 || reg1 == 2 || reg1 == 1 || reg1 == 8 || reg1 == 9) //Argument to CFI? { - if(reg1 == 7 || reg1 == 6 || reg1 == 2 || reg1 == 1 || reg1 == 8 || reg1 == 9) //Argument to data link instruction - { - return false; - } - else - { //If any other register, then it is actually not used in this instruction - } - return true; + LOG(1,"FDF USED_AS_ARGS_TO_CALL "<getName()<<" used as args from callerfn "<getName()<<" @ "<getAddress()); + bool res = handleArgumentFnPtr(reg1, cur_func, instr, atfunc); + return res; } - if(auto cfi = dynamic_cast(instr->getSemantic())) //DF ends in a call/jmp + LOG(1,"FDF USED_IN_CALL "<getName()<<" "<getName()); + return true; +} + +bool IPCallGraph::handleDataLinked(UDState* state, int reg1, Function* atfunc) +{ + auto instr = state->getInstruction(); + auto cur_func = (Function*)instr->getParent()->getParent(); + if(reg1 == 7 || reg1 == 6 || reg1 == 2 || reg1 == 1 || reg1 == 8 || reg1 == 9) //Argument to data link instruction { - auto mnemonic = cfi->getMnemonic(); - if(mnemonic.find("call") == string::npos) //Not a call instruction - { + LOG(1,"FDF ENDS_IN_DFD "<getName()<<" "<getName()); return false; - } - if(reg1 == 7 || reg1 == 6 || reg1 == 2 || reg1 == 1 || reg1 == 8 || reg1 == 9) //Argument to CFI? - { - bool res = handleArgumentFnPtr(reg1, (Function*)instr->getParent()->getParent(), instr, atfunc); - return res; - } - - - return true; } - + LOG(1,"FDF DFD_NOT_ARGS "<getName()<<" "<getName()); + return true; +} + + + +bool IPCallGraph::handleRegisterDefinition(UDState* state, int reg1, Function* atfunc, bool& out_result) +{ bool regFlag = false; bool flag = true; - int reg=-1; - for(auto& def : state->getRegDefList()) //Register definition is found + int reg = -1; + + auto instr = state->getInstruction(); + auto cur_func = (Function*)instr->getParent()->getParent(); + for (auto& def : state->getRegDefList()) { regFlag = true; - - reg = def.first; - if(reg1 != -2) //Handling the starting call of this function when reg1 value is not set + + if(reg1 != -2 && reg1 != -1) //Handling the starting call of this function when reg1 value is not set { typedef TreePatternCapture>RegisterValue; TreeCapture cap; - if(RegisterValue::matches(def.second, cap)) + if(RegisterValue::matches(def.second, cap)) { auto reg2 = dynamic_cast(cap.get(0))->getRegister(); if(reg2 != reg1) { - return false; + LOG(1,"FDF REGDEF_MISMATCH1 "<getName()<<" "<getName()); + out_result = false; // Set the out-parameter + return true; } } - else - { - return false; - } - - } - auto reguse = state->getRegUse(reg); - - for(auto use : reguse) - { + } + + for (auto& use : state->getRegUse(reg)) + { auto tempFlag = searchDownDef(use, reg, atfunc); - flag = tempFlag & flag; + flag = flag & tempFlag; } } - if(regFlag) - { - - return flag; + + if (regFlag) { + out_result = flag; // Set the out-parameter with the final flag + LOG(1,"FDF RETURN_FROM_REGFLAG "<getName()<<" "<getName()); + return true; // Signal that we are done and have a result } - auto memdef = state->getMemDefList(); + + // If we reach here, we have no conclusive answer. Signal the caller to continue. + return false; +} + +bool IPCallGraph::handleMemoryDefinition(UDState* state, int reg1, Function* atfunc) +{ + auto instr = state->getInstruction(); + auto cur_func = (Function*)instr->getParent()->getParent(); bool memFlag = false; - flag = true; - for(auto def : memdef) //If memory definition is found, return false - { - if(def.first != reg1) //Iff reg1 not used in memory definition, then we can return true + bool flag = true; + for(auto& def : state->getMemDefList()) //If memory definition is found, return false + { + if(def.first != reg1) //If reg1 not used in memory definition, then we can return true + { + LOG(1,"FDF NOT_ENDING_IN_MEMDEF "<getName()<<" "<getName()); flag = true; + } else - flag = false; + { + //cout<<"******** USES OF "<getInstruction()->getAddress()<getMemUse(reg1)) + { + //cout<<"*********** Found memuse of "<getInstruction()->getAddress()<getName()); + return flag; +} +bool IPCallGraph::searchDownDef(UDState* state, int reg1, Function* atfunc) { + + // Efficient search of visited_states + auto key = std::make_tuple(state->getInstruction(), reg1); + if (visited_states.count(key)) { + return true; // Already visited, assume success to break cycle } - if(flag) + visited_states.insert(key); + + auto instr = state->getInstruction(); + auto semantic = instr->getSemantic(); + + // Dispatch to the correct handler based on instruction type + // Handle return instruction + if (auto return_instr = dynamic_cast(semantic)) { - //cout<getAddress()<<" MEMDEF TRUE "<(semantic)) + { + return handleIcallOrIjump(state, reg1, atfunc); + } + else if (auto iji = dynamic_cast(semantic)) + { + if(!iji->isForJumpTable()) + { + return handleIcallOrIjump(state, reg1, atfunc); + } + } + else if(dynamic_cast(semantic)) + { + return handleDataLinked(state, reg1, atfunc); + } + else if (auto cfi = dynamic_cast(semantic)) + { + // This handles direct calls and jumps after the more specific cases + return handleDirectCall(state, cfi, reg1, atfunc); + } + + bool regdef_result; + + // Call the helper. If it returns true, we have our final answer. + if (handleRegisterDefinition(state, reg1, atfunc, regdef_result)) + { + return regdef_result; + } + //If it flows to a memory address + return handleMemoryDefinition(state, reg1, atfunc); } -bool IPCallGraph::forwardDataFlow(Function* f, Instruction* instr, Function* atfunc) +bool IPCallGraph::forwardDataFlow(Function* f, Instruction* instr, Function* atfunc, int analysisType) { - auto graph = new ControlFlowGraph(f); - auto config = new UDConfiguration(graph); - auto working = new UDRegMemWorkingSet(f, graph); - auto usedef = new UseDef(config, working); - SccOrder order(graph); - order.genFull(0); - usedef->analyze(order.get()); - auto state = working->getState(instr); - - + forwardDfAnalysisType = analysisType; + auto working = df.getWorkingSet(f); + auto state = working->getState(instr); + LOG(1,"FDF ENTER "<getName()<getAddress()); auto res = searchDownDef(state, -2, atfunc); //We don't have a register value to pass, so passing -2 visited_states.clear(); + LOG(1,"FDF EXIT "<getName()<getAddress()); return res; } diff --git a/analysis/app/src/ipcallgraph.h b/analysis/app/src/ipcallgraph.h index 819f86b..b27f754 100644 --- a/analysis/app/src/ipcallgraph.h +++ b/analysis/app/src/ipcallgraph.h @@ -1,6 +1,7 @@ #ifndef IPCALLGRAPH_ANALYSIS #define IPCALLGRAPH_ANALYSIS +#include "instr/linked-x86_64.h" #include "conductor/setup.h" #include "chunk/concrete.h" #include "analysis/usedef.h" @@ -200,6 +201,8 @@ class IPCallGraph DataFlow df; int totResolvedIcTarget=0; int totTypeArmorTarget=0; + std::map, bool> handle_arg_cache; + int forwardDfAnalysisType=0; public: map nodeMap; void addFunctionRoot(Function* func); @@ -248,7 +251,7 @@ class IPCallGraph } void setTypeArmor(bool flag) //typeArmorFlag determines if you want to add typearmor analysis to filter indirect call targets { - this->typeArmorFlag = true; + this->typeArmorFlag = flag; } void setTypeArmorPath(string path) { @@ -265,7 +268,7 @@ class IPCallGraph void printCallGraphWithCallsites(); void printIndirectEdges(); void printDirectEdges(); - bool forwardDataFlow(Function* f, Instruction* instr, Function* atfunc); + bool forwardDataFlow(Function* f, Instruction* instr, Function* atfunc, int analysisType=0); set getFunctionByAddress(address_t addr); Function* getNSSFunctionByName(string name); @@ -289,6 +292,13 @@ class IPCallGraph nssFuncNames.push_back(fname); } void addNssEdges(); + Instruction* findInstructionInFunction(Function* func, address_t addr); + bool handleReturnInstruction(UDState* state, int reg1, Function* atfunc); + bool handleIcallOrIjump(UDState* state, int reg1, Function* atfunc); + bool handleDirectCall(UDState* state, ControlFlowInstruction* cfi, int reg1, Function* atfunc); + bool handleDataLinked(UDState* state, int reg1, Function* atfunc); + bool handleRegisterDefinition(UDState* state, int reg1, Function* atfunc, bool& out_result); + bool handleMemoryDefinition(UDState* state, int reg1, Function* atfunc); }; #endif From 8f50d49fdd5c85b8bdcc894fa9a6137ce01d25e4 Mon Sep 17 00:00:00 2001 From: Vidya Date: Wed, 27 Aug 2025 11:21:16 +0000 Subject: [PATCH 03/14] Scalability update3 : changed functionRoots to deque and added functionRootSet, changes to addATFunction() --- analysis/app/src/ipcallgraph.cpp | 64 +++++++++++++++++++------------- analysis/app/src/ipcallgraph.h | 20 +++------- 2 files changed, 44 insertions(+), 40 deletions(-) diff --git a/analysis/app/src/ipcallgraph.cpp b/analysis/app/src/ipcallgraph.cpp index ab9ba2d..b0488db 100644 --- a/analysis/app/src/ipcallgraph.cpp +++ b/analysis/app/src/ipcallgraph.cpp @@ -240,8 +240,11 @@ map> IPCallGraphNode::getATList() void IPCallGraph::addFunctionRoot(Function* func) { - //if(visitedFunctions.count(func) != 0) - //s return; + if (functionRootSet.find(func) != functionRootSet.end()) + { + return; + } + functionRootSet.insert(func); functionRoots.push_back(func); } @@ -249,6 +252,7 @@ void IPCallGraph::setRoot(Function* func) { startfunc = func; functionRoots.push_back(func); + functionRootSet.insert(func); } vector IPCallGraph::getFiniFuncs() @@ -572,15 +576,14 @@ void IPCallGraph::findData() if(dataSection->getName()==".fini" || dataSection->getName()==".fini_array" || dataSection->getName()==".dtors") { LOG(20, "FINI Function "<getName()<<" found in "<getName()); - functionRoots.push_back(target); + addFunctionRoot(target); finiFuncs.push_back(target); } if(dataSection->getName()==".preinit_array" || dataSection->getName()==".init_array" || dataSection->getName()==".ctors") { LOG(20, "INIT Function "<getName()<<" found in "<getName()); - - functionRoots.push_back(target); + addFunctionRoot(target); initFuncs.push_back(target); } @@ -598,13 +601,13 @@ void IPCallGraph::findData() auto init = CIter::named(module->getFunctionList())->find("_init"); if (init) { - functionRoots.push_back(init); - initFuncs.push_back(init); + addFunctionRoot(init); + initFuncs.push_back(init); } auto fini = CIter::named(module->getFunctionList())->find("_fini"); if (fini) { - functionRoots.push_back(fini); + addFunctionRoot(fini); finiFuncs.push_back(fini); } } @@ -2025,22 +2028,26 @@ void IPCallGraph::generateDirectCallGraph() { checkForSymbols(); findData(); - while(functionRoots.size()) + + + while(!functionRoots.empty()) { - auto it = functionRoots.begin(); - auto next = *it; - functionRoots.erase(it); + auto next = functionRoots.front(); + functionRoots.pop_front(); findDirectEdges(next); } - copy(visited_direct.begin(), visited_direct.end(), back_inserter(functionRoots)); - while(functionRoots.size()) + for (Function* func : visited_direct) { + addFunctionRoot(func); + } + + while(!functionRoots.empty()) { - auto it = functionRoots.begin(); - auto next = *it; - functionRoots.erase(it); + auto next = functionRoots.front(); + functionRoots.pop_front(); findATList(next); } + } void IPCallGraph::generate() @@ -2048,20 +2055,25 @@ void IPCallGraph::generate() checkForSymbols(); findData(); - while(functionRoots.size()) + while(!functionRoots.empty()) { - auto it = functionRoots.begin(); - auto next = *it; - functionRoots.erase(it); + auto next = functionRoots.front(); + functionRoots.pop_front(); + functionRootSet.erase(next); findDirectEdges(next); } - copy(visited_direct.begin(), visited_direct.end(), back_inserter(functionRoots)); - while(functionRoots.size()) + LOG(1,"SIZE OF VISITED DIRECT "<getName()); + addFunctionRoot(func); + } + + while(!functionRoots.empty()) { - auto it = functionRoots.begin(); - auto next = *it; - functionRoots.erase(it); + auto next = functionRoots.front(); + functionRoots.pop_front(); + functionRootSet.erase(next); findATList(next); } diff --git a/analysis/app/src/ipcallgraph.h b/analysis/app/src/ipcallgraph.h index b27f754..3068286 100644 --- a/analysis/app/src/ipcallgraph.h +++ b/analysis/app/src/ipcallgraph.h @@ -1,6 +1,8 @@ #ifndef IPCALLGRAPH_ANALYSIS #define IPCALLGRAPH_ANALYSIS +#include + #include "instr/linked-x86_64.h" #include "conductor/setup.h" #include "chunk/concrete.h" @@ -51,19 +53,8 @@ class IPCallGraphNode } void addATFunction(address_t addr, IPCallGraphNode* t) { - auto at_iter = ATFunctions.find(addr); - if(at_iter != ATFunctions.end()) - { - auto at_set = at_iter->second; - at_set.insert(t); - at_iter->second = at_set; - } - else - { - set at_set; - at_set.insert(t); - ATFunctions[addr] = at_set; - } + auto& at_set = ATFunctions[addr]; // creates if not exists, returns reference + at_set.insert(t); // insert directly into the set } void setFunction(Function* f); @@ -187,7 +178,8 @@ class IPCallGraph void generateIndirectEdgesWithTypeArmor(IPCallGraphNode* n, address_t addr, set at); void parseTypeArmor(); void printNodeInfo(); - vector functionRoots; + std::deque functionRoots; + std::unordered_set functionRootSet; vector dataRoots; vector nssFunctions; vector nssFuncNames; From 7bfaa4e9b5c4b3672588f5c3885ddadfdedf1650 Mon Sep 17 00:00:00 2001 From: Vidya Date: Wed, 27 Aug 2025 11:37:55 +0000 Subject: [PATCH 04/14] Added constructor to FPath, initializing recursion_iter --- analysis/app/src/syspartUtility.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/analysis/app/src/syspartUtility.h b/analysis/app/src/syspartUtility.h index cf93c25..d8781fc 100644 --- a/analysis/app/src/syspartUtility.h +++ b/analysis/app/src/syspartUtility.h @@ -139,6 +139,10 @@ struct FPath string fname; address_t iaddr; vector path; + + FPath(const std::string& fname_, address_t iaddr_, const std::vector& path_) + : fname(fname_), iaddr(iaddr_), path(path_) {} + bool operator==(const FPath& l) { if((l.fname == this->fname) &&(l.iaddr == this->iaddr)) @@ -157,6 +161,7 @@ class SyspartUtility this->ip_callgraph = ipc; this->iter = 0; this->analysisType = analysisType; + this->recursion_iter = 0; } void initialize(); From dadcf493d946039d83b725dfb1ddd579e563456b Mon Sep 17 00:00:00 2001 From: Vidya Date: Wed, 27 Aug 2025 12:08:27 +0000 Subject: [PATCH 05/14] Handling indirect calls with memory operands --- analysis/app/src/syspartUtility.cpp | 219 +++++++++++++++++++++++++--- 1 file changed, 196 insertions(+), 23 deletions(-) diff --git a/analysis/app/src/syspartUtility.cpp b/analysis/app/src/syspartUtility.cpp index dd200b2..e876809 100644 --- a/analysis/app/src/syspartUtility.cpp +++ b/analysis/app/src/syspartUtility.cpp @@ -1028,32 +1028,140 @@ bool SyspartUtility::findIndirectCallTargets(IPCallGraphNode* n) } }; + enum class BinaryOpType { ADD, SUB }; + auto combineResults = [&](const std::unordered_set& a, + const std::unordered_set& b, + BinaryOpType op, + std::unordered_set& out) + { + LOG(1, "Res1.size "< tot_count) + { + break; + } + for (const auto& res2 : b) + { + if(i > tot_count) + { + break; + } + i++; + if (res1.type == 0 || res2.type == 0) + { + pushUnknownIfNeeded(out, function); + + } + else if (res1.type == 1 && res2.type == 1) + { + address_t new_val = (op == BinaryOpType::ADD) + ? res1.addr + res2.addr + : res1.addr - res2.addr; + std::stringstream sstream; + sstream << "0x" << std::hex << new_val; + std::string new_str = sstream.str(); + out.emplace(1, new_val, new_str, function); + } + else + { + std::string op_str = (op == BinaryOpType::ADD) ? " + " : " - "; + std::string new_str = res1.desc + op_str + res2.desc; + out.emplace(3, 0, new_str, function); + } + } + } + }; + + enum class ConstOpType { ADD, SUB, MULT }; + auto combineWithConstant = [&](const std::unordered_set& vals, + address_t constant, + ConstOpType op, + std::unordered_set& out) + { + + LOG(1, "Res1.size "<getSemantic(); auto state = working->getState(instruction); - auto ici = dynamic_cast(semantic); - auto iji = dynamic_cast(semantic); + auto ici = dynamic_cast(semantic); + auto iji = dynamic_cast(semantic); - if (iji && iji->isForJumpTable()) - { + if (iji && iji->isForJumpTable()) + { continue; - } + } - if (!(ici || iji)) - { + if (!(ici || iji)) + { continue; - } + } + + // Handle iteration path logic + cur_function = function; + cur_instr = instruction; + bool continueFlag = true; - vector icTargets; - cur_function = function; - cur_instr = instruction; - auto continueFlag = true; if (iter > 1) { continueFlag = false; @@ -1084,13 +1192,11 @@ bool SyspartUtility::findIndirectCallTargets(IPCallGraphNode* n) } } } - } - else + } + else { - vector fp_vec; - fp_vec.push_back(cur_function); - FPath fp{cur_function->getName(),cur_instr->getAddress(), fp_vec}; - icPath.push_back(fp); + vector fp_vec{cur_function}; + icPath.emplace_back(cur_function->getName(), cur_instr->getAddress(), fp_vec); } if(!continueFlag) @@ -1110,6 +1216,8 @@ bool SyspartUtility::findIndirectCallTargets(IPCallGraphNode* n) } stack_depth = 0; + + vector icTargets; std::unordered_set results; if(ici && ici->hasMemoryOperand()) { @@ -1117,10 +1225,76 @@ bool SyspartUtility::findIndirectCallTargets(IPCallGraphNode* n) target.setUnknown(); icTargets.push_back(target); + auto indexReg = X86Register::convertToPhysical(ici->getIndexRegister()); + auto reg = X86Register::convertToPhysical(ici->getRegister()); + auto scale = ici->getScale(); + auto disp = ici->getDisplacement(); + + InstrDumper instrdumper(instruction->getAddress(), INT_MIN); + instruction->getSemantic()->accept(&instrdumper); + LOG(1, "ICALL_MEM_OP REG " << std::dec << reg << " INDEXREG = "<< indexReg << " SCALE = " << ici->getScale() << std::hex << " DISP = " << ici->getDisplacement()); + + std::unordered_set regResult; + std::unordered_set indexResult; + + if(reg != -1) + { + resolveRegister(reg, state, function, regResult); + } + + if(indexReg != -1) + { + resolveRegister(indexReg, state, function, indexResult); + } + instruction->getSemantic()->accept(&instrdumper); + + // disp(reg, index, scale) = value at memory address (reg + index * scale + disp) + if(indexReg == -1) // reg + disp + { + if(disp > 0) + combineWithConstant(regResult, (address_t)disp, ConstOpType::ADD, results); + else if(disp < 0) + { + disp = (-1) * disp; + combineWithConstant(regResult, (address_t)disp, ConstOpType::SUB, results); + } + else //disp = 0 + { + results = regResult; + } + } + else // reg + index * scale + disp + { + // index*scale + std::unordered_set inter_result1; + combineWithConstant(indexResult, scale, ConstOpType::MULT, inter_result1); + + // reg + (index*scale) + std::unordered_set inter_result2; + combineResults(inter_result1, regResult, BinaryOpType::ADD, inter_result2); + + // reg + (index*scale) + disp + if(disp > 0) + combineWithConstant(inter_result2, (address_t)disp, ConstOpType::ADD, results); + else if(disp < 0) + { + disp = (-1) * disp; + combineWithConstant(inter_result2, (address_t)disp, ConstOpType::SUB, results); + } + else // disp = 0 + { + results = inter_result2; + } + } + + LOG(1, "ICALL_RESOLVE UNRESOLVE_MEMORY_OPERAND " << std::hex << instruction->getAddress() + << " " << std::dec << instruction->getAddress() << " " << function->getName()); + ////indent(); + LOG(1, "CAUTION : UNRESOLVED IC DUE TO MEMORY OPERAND"); } else { - int reg; + int reg = -1; if(ici) { reg = X86Register::convertToPhysical(ici->getRegister()); @@ -1130,7 +1304,7 @@ bool SyspartUtility::findIndirectCallTargets(IPCallGraphNode* n) reg = X86Register::convertToPhysical(iji->getRegister()); } resolveRegister(reg, state, function, results); - + } for (auto& r : results) { printResult(r); @@ -1163,7 +1337,6 @@ bool SyspartUtility::findIndirectCallTargets(IPCallGraphNode* n) } } - } //Add resolved indirect calls bool resolved = true; set targets; From a8e90b7004e8801cc05c380232256395824d6f41 Mon Sep 17 00:00:00 2001 From: Vidya Date: Mon, 8 Sep 2025 20:19:46 +0000 Subject: [PATCH 06/14] Print caller function address while printing the callgraph --- analysis/app/src/ipcallgraph.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/analysis/app/src/ipcallgraph.cpp b/analysis/app/src/ipcallgraph.cpp index b0488db..74ca433 100644 --- a/analysis/app/src/ipcallgraph.cpp +++ b/analysis/app/src/ipcallgraph.cpp @@ -910,7 +910,7 @@ void IPCallGraph::printCallGraphWithCallsites() auto offset = c.first - (n.first)->getAddress(); for(auto nn : c.second) { - cout<<"DIRECT "<getName()<<" "<getFunction()->getAddress()<<" "<getFunction()->getName()<<" "<getFunction()->getParent()->getParent()->getName()<getName()<<" "<getFunction()->getAddress()<<" "<getFunction()->getName()<<" "<getFunction()->getParent()->getParent()->getName()<<" "<<(n.first)->getAddress()<getIndirectChildren()) @@ -921,7 +921,7 @@ void IPCallGraph::printCallGraphWithCallsites() //cout<<"IRESOLVEDCALLSITE "<getName()<getName()<<" "<getFunction()->getAddress()<<" "<getFunction()->getName()<<" "<getFunction()->getParent()->getParent()->getName()<getName()<<" "<getFunction()->getAddress()<<" "<getFunction()->getName()<<" "<getFunction()->getParent()->getParent()->getName()<<" "<<(n.first)->getAddress()<getName()<getName()<<" "<getFunction()->getAddress()<<" "<getFunction()->getName()<<" "<getFunction()->getParent()->getParent()->getName()<getName()<<" "<getFunction()->getAddress()<<" "<getFunction()->getName()<<" "<getFunction()->getParent()->getParent()->getName()<<" "<<(n.first)->getAddress()< Date: Mon, 8 Sep 2025 20:21:06 +0000 Subject: [PATCH 07/14] Added options to parse a list of start functions and generate callgraph, direct syscalls, allfunctions and printing execution time to standard error --- analysis/app/src/syspart.cpp | 76 ++++++++++++++++++++++++++++++++ analysis/app/src/syspart.h | 4 ++ analysis/app/src/testSyspart.cpp | 58 ++++++++++++++++++------ 3 files changed, 124 insertions(+), 14 deletions(-) diff --git a/analysis/app/src/syspart.cpp b/analysis/app/src/syspart.cpp index a6213a5..627816d 100644 --- a/analysis/app/src/syspart.cpp +++ b/analysis/app/src/syspart.cpp @@ -23,6 +23,11 @@ void Syspart::setStartFunc(Function *func) this->start_func = func; } +void Syspart::setStartFuncFile(string file) +{ + this->startFuncFile = file; +} + void Syspart::setProgram(Program *program) { this->program = program; @@ -125,6 +130,39 @@ void Syspart::findDirectSyscallsOfModule(Module* m) } } +void Syspart::printDirectSyscalls() +{ + for(auto m : CIter::children(program)) + { + for(auto f : CIter::functions(m)) + { + FindSyscalls findSyscalls; + f->accept(&findSyscalls); + auto list = findSyscalls.getNumberMap(); + + for(auto kv : list) + { + auto syscallValues = kv.second; + for(auto value : syscallValues) + { + cout<getName()<<" "<getAddress()<<" "<getName()<<" " <getName()<<" "<getAddress()<<" "<getName()< finiFuncs; vector initFuncs; //std::map system_calls = {{0,"read"}, {1,"write"}, {2,"open"}, {3,"close"}, {4,"stat"}, {5,"fstat"}, {6,"lstat"}, {7,"poll"}, {8,"lseek"}, {9,"mmap"}, {10,"mprotect"}, {11,"munmap"}, {12,"brk"}, {13,"rt_sigaction"}, {14,"rt_sigprocmask"}, {15,"rt_sigreturn"}, {16,"ioctl"}, {17,"pread64"}, {18,"pwrite64"}, {19,"readv"}, {20,"writev"}, {21,"access"}, {22,"pipe"}, {23,"select"}, {24,"sched_yield"}, {25,"mremap"}, {26,"msync"}, {27,"mincore"}, {28,"madvise"}, {29,"shmget"}, {30,"shmat"}, {31,"shmctl"}, {32,"dup"}, {33,"dup2"}, {34,"pause"}, {35,"nanosleep"}, {36,"getitimer"}, {37,"alarm"}, {38,"setitimer"}, {39,"getpid"}, {40,"sendfile"}, {41,"socket"}, {42,"connect"}, {43,"accept"}, {44,"sendto"}, {45,"recvfrom"}, {46,"sendmsg"}, {47,"recvmsg"}, {48,"shutdown"}, {49,"bind"}, {50,"listen"}, {51,"getsockname"}, {52,"getpeername"}, {53,"socketpair"}, {54,"setsockopt"}, {55,"getsockopt"}, {56,"clone"}, {57,"fork"}, {58,"vfork"}, {59,"execve"}, {60,"exit"}, {61,"wait4"}, {62,"kill"}, {63,"uname"}, {64,"semget"}, {65,"semop"}, {66,"semctl"}, {67,"shmdt"}, {68,"msgget"}, {69,"msgsnd"}, {70,"msgrcv"}, {71,"msgctl"}, {72,"fcntl"}, {73,"flock"}, {74,"fsync"}, {75,"fdatasync"}, {76,"truncate"}, {77,"ftruncate"}, {78,"getdents"}, {79,"getcwd"}, {80,"chdir"}, {81,"fchdir"}, {82,"rename"}, {83,"mkdir"}, {84,"rmdir"}, {85,"creat"}, {86,"link"}, {87,"unlink"}, {88,"symlink"}, {89,"readlink"}, {90,"chmod"}, {91,"fchownhmod"}, {92,"chown"}, {93,"fchown"}, {94,"lchown"}, {95,"umask"}, {96,"gettimeofday"}, {97,"getrlimit"}, {98,"getrusage"}, {99,"sysinfo"}, {100,"times"}, {101,"ptrace"}, {102,"getuid"}, {103,"syslog"}, {104,"getgid"}, {105,"setuid"}, {106,"setgid"}, {107,"geteuid"}, {108,"getegid"}, {109,"setpgid"}, {110,"getppid"}, {111,"getpgrp"}, {112,"setsid"}, {113,"setreuid"}, {114,"setregid"}, {115,"getgroups"}, {116,"setgroups"}, {117,"setresuid"}, {118,"getresuid"}, {119,"setresgid"}, {120,"getresgid"}, {121,"getpgid"}, {122,"setfsuid"}, {123,"setfsgid"}, {124,"getsid"}, {125,"capget"}, {126,"capset"}, {127,"rt_sigpending"}, {128,"rt_sigtimedwait"}, {129,"rt_sigqueueinfo"}, {130,"rt_sigsuspend"}, {131,"sigaltstack"}, {132,"utime"}, {133,"mknod"}, {134,"uselib"}, {135,"personality"}, {136,"ustat"}, {137,"statfs"}, {138,"fstatfs"}, {139,"sysfs"}, {140,"getpriority"}, {141,"setpriority"}, {142,"sched_setparam"}, {143,"sched_getparam"}, {144,"sched_setscheduler"}, {145,"sched_getscheduler"}, {146,"sched_get_priority_max"}, {147,"sched_get_priority_min"}, {148,"sched_rr_get_interval"}, {149,"mlock"}, {150,"munlock"}, {151,"mlockall"}, {152,"munlockall"}, {153,"vhangup"}, {154,"modify_ldt"}, {155,"pivot_root"}, {156,"_sysctl"}, {157,"prctl"}, {158,"arch_prctl"}, {159,"adjtimex"}, {160,"setrlimit"}, {161,"chroot"}, {162,"sync"}, {163,"acct"}, {164,"settimeofday"}, {165,"mount"}, {166,"umount2"}, {167,"swapon"}, {168,"swapoff"}, {169,"reboot"}, {170,"sethostname"}, {171,"setdomainname"}, {172,"iopl"}, {173,"ioperm"}, {174,"create_module"}, {175,"init_module"}, {176,"delete_module"}, {177,"get_kernel_syms"}, {178,"query_module"}, {179,"quotactl"}, {180,"nfsservctl"}, {181,"getpmsg"}, {182,"putpmsg"}, {183,"afs_syscall"}, {184,"tuxcall"}, {185,"security"}, {186,"gettid"}, {187,"readahead"}, {188,"setxattr"}, {189,"lsetxattr"}, {190,"fsetxattr"}, {191,"getxattr"}, {192,"lgetxattr"}, {193,"fgetxattr"}, {194,"listxattr"}, {195,"llistxattr"}, {196,"flistxattr"}, {197,"removexattr"}, {198,"lremovexattr"}, {199,"fremovexattr"}, {200,"tkill"}, {201,"time"}, {202,"futex"}, {203,"sched_setaffinity"}, {204,"sched_getaffinity"}, {205,"set_thread_area"}, {206,"io_setup"}, {207,"io_destroy"}, {208,"io_getevents"}, {209,"io_submit"}, {210,"io_cancel"}, {211,"get_thread_area"}, {212,"lookup_dcookie"}, {213,"epoll_create"}, {214,"epoll_ctl_old"}, {215,"epoll_wait_old"}, {216,"remap_file_pages"}, {217,"getdents64"}, {218,"set_tid_address"}, {219,"restart_syscall"}, {220,"semtimedop"}, {221,"fadvise64"}, {222,"timer_create"}, {223,"timer_settime"}, {224,"timer_gettime"}, {225,"timer_getoverrun"}, {226,"timer_delete"}, {227,"clock_settime"}, {228,"clock_gettime"}, {229,"clock_getres"}, {230,"clock_nanosleep"}, {231,"exit_group"}, {232,"epoll_wait"}, {233,"epoll_ctl"}, {234,"tgkill"}, {235,"utimes"}, {236,"vserver"}, {237,"mbind"}, {238,"set_mempolicy"}, {239,"get_mempolicy"}, {240,"mq_open"}, {241,"mq_unlink"}, {242,"mq_timedsend"}, {243,"mq_timedreceive"}, {244,"mq_notify"}, {245,"mq_getsetattr"}, {246,"kexec_load"}, {247,"waitid"}, {248,"add_key"}, {249,"request_key"}, {250,"keyctl"}, {251,"ioprio_set"}, {252,"ioprio_get"}, {253,"inotify_init"}, {254,"inotify_add_watch"}, {255,"inotify_rm_watch"}, {256,"migrate_pages"}, {257,"openat"}, {258,"mkdirat"}, {259,"mknodat"}, {260,"fchownat"}, {261,"futimesat"}, {262,"newfstatat"}, {263,"unlinkat"}, {264,"renameat"}, {265,"linkat"}, {266,"symlinkat"}, {267,"readlinkat"}, {268,"fchmodat"}, {269,"faccessat"}, {270,"pselect6"}, {271,"ppoll"}, {272,"unshare"}, {273,"set_robust_list"}, {274,"get_robust_list"}, {275,"splice"}, {276,"tee"}, {277,"sync_file_range"}, {278,"vmsplice"}, {279,"move_pages"}, {280,"utimensat"}, {281,"epoll_pwait"}, {282,"signalfd"}, {283,"timerfd_create"}, {284,"eventfd"}, {285,"fallocate"}, {286,"timerfd_settime"}, {287,"timerfd_gettime"}, {288,"accept4"}, {289,"signalfd4"}, {290,"eventfd2"}, {291,"epoll_create1"}, {292,"dup3"}, {293,"pipe2"}, {294,"inotify_init1"}, {295,"preadv"}, {296,"pwritev"}, {297,"rt_tgsigqueueinfo"}, {298,"perf_event_open"}, {299,"recvmmsg"}, {300,"fanotify_init"}, {301,"fanotify_mark"}, {302,"prlimit64"}, {303,"name_to_handle_at"}, {304,"open_by_handle_at"}, {305,"clock_adjtime"}, {306,"syncfs"}, {307,"sendmmsg"}, {308,"setns"}, {309,"getcpu"}, {310,"process_vm_readv"}, {311,"process_vm_writev"}, {312,"kcmp"}, {313,"finit_module"}, {314,"sched_setattr"}, {315,"sched_getattr"}, {316,"renameat2"}, {317,"seccomp"}, {318,"getrandom"}, {319,"memfd_create"}, {320,"kexec_file_load"}, {321,"bpf"}, {322,"execveat"}, {323,"userfaultfd"}, {324,"membarrier"}, {325,"mlock2"}, {326,"copy_file_range"}, {327,"preadv2"}, {328,"pwritev2"}, {329,"pkey_mprotect"}, {330,"pkey_alloc"}, {331,"pkey_free"}, {332,"statx"}, {333,"io_pgetevents"}, {334,"rseq"}}; @@ -49,6 +50,7 @@ class Syspart public : //Initialization and general functionalities void setStartFunc(Function *func); + void setStartFuncFile(string file); void setProgram(Program *program); void setConductorSetup(ConductorSetup *setup); Function* findFunctionByName(string fname); @@ -119,5 +121,7 @@ class Syspart int getNoReturnFnCount(); void printFunctions(); void printDlArgs(string dlname); + void printDirectSyscalls(); + void run15(bool direct, bool icanalysisFlag, bool typearmorFlag); }; #endif diff --git a/analysis/app/src/testSyspart.cpp b/analysis/app/src/testSyspart.cpp index 52f8fb5..0d6c1fc 100644 --- a/analysis/app/src/testSyspart.cpp +++ b/analysis/app/src/testSyspart.cpp @@ -17,8 +17,10 @@ using namespace std::chrono; char *filename=NULL; char *func_name; char *func_addr; +string start_func_file; bool funcFlag = false; -bool isAddr = true; +bool isAddr = false; +bool isFile = false; bool typearmorFlag = false; string typearmorPath; bool icanalysisFlag = false; @@ -38,11 +40,15 @@ static int parse_opt (int key, char *arg, struct argp_state *state) case 's': if(arg[0] == '0' && arg[1] == 'x') { func_addr = arg; + isAddr = true; } - else + else if(arg[0] == '/') + { + isFile = true; + start_func_file = arg; + } { - isAddr = false; func_name = arg; } @@ -110,10 +116,14 @@ int main(int argc, char *argv[]) 19. Prints if fork() and pthread() functions are invoked within the application \n \ 20. Print all functions of all modules \n \ 21. Prints the arguments to dlopen() \n \ - 22. Prints the arguments to dlsym()" }, + 22. Prints the arguments to dlsym() \n \ + 23. Prints the callgraph from a set of start functions \n \ + 24. Prints the direct syscalls \n \ + 25. Prints all functions with their addresses and modules \n \ + "}, { 0 } }; - struct argp argp = { options, parse_opt }; + struct argp argp = { options, parse_opt }; argp_parse (&argp, argc, argv, 0, 0, 0); if(filename == NULL) { @@ -146,16 +156,21 @@ int main(int argc, char *argv[]) prog->accept(&collapsePLT); Function *start_func = NULL; - if(!isAddr) + if(isFile) { - start_func = sp.findFunctionByName(func_name); + sp.setStartFuncFile(start_func_file); } - else + else if(isAddr) { address_t address = (address_t)strtol(func_addr, NULL, 16); start_func = sp.findFunctionByAddress(address); + sp.setStartFunc(start_func); + } + else + { + start_func = sp.findFunctionByName(func_name); + sp.setStartFunc(start_func); } - sp.setStartFunc(start_func); if(typearmorFlag) sp.setTypeArmorPath(typearmorPath); switch(option) @@ -470,13 +485,28 @@ int main(int argc, char *argv[]) } case 21 : { - sp.printDlArgs("dlopen@"); - break; + sp.printDlArgs("dlopen@"); + break; } case 22 : { - sp.printDlArgs("dlsym@"); - break; + sp.printDlArgs("dlsym@"); + break; + } + case 23 : + { + sp.run15(direct_flag, icanalysisFlag, typearmorFlag); + break; + } + case 24 : + { + sp.printDirectSyscalls(); + break; + } + case 25 : + { + sp.printAllFunctions(); + break; } default : { cout<<"\nInvalid option"<(stop - start); - //cout << "Time taken for the analysis: "< Date: Mon, 8 Sep 2025 20:21:49 +0000 Subject: [PATCH 08/14] Added scripts to compute system calls of a binary program --- analysis/app/src/scripts/compute_syscalls.sh | 60 ++++++ analysis/app/src/scripts/getsyscalls.py | 176 ++++++++++++++++++ .../src/scripts/parse_callgraph_to_json.py | 143 ++++++++++++++ 3 files changed, 379 insertions(+) create mode 100755 analysis/app/src/scripts/compute_syscalls.sh create mode 100644 analysis/app/src/scripts/getsyscalls.py create mode 100644 analysis/app/src/scripts/parse_callgraph_to_json.py diff --git a/analysis/app/src/scripts/compute_syscalls.sh b/analysis/app/src/scripts/compute_syscalls.sh new file mode 100755 index 0000000..98d66f3 --- /dev/null +++ b/analysis/app/src/scripts/compute_syscalls.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +show_help() { + cat << EOF +Usage: $0 arg1 arg2 arg3 [--log] + +Arguments: + arg1 ELF binary to be analysed + arg2 Folder to store outputs + arg3 Path to a file containing one or more start functions (one per line) + +Options: + --log This will log the paths to the system calls from the start functions in logfile.txt in the output folder + -h, --help + Show this help message and exit + +Outputs: + syscalls.txt List of system calls reachable from the start functions + callgraph.txt Callgraph of the ELF binary and its libraries + syscalls_with_callsites.txt The functions within the call graph where system calls are invoked + allfunctions.txt List of all functions in ELF binary and its libraries along with their addresses + logfile.txt Contains paths to the system calls from the start functions +EOF +} + +if [[ $# -lt 3 || "$1" == "-h" || "$1" == "--help" ]]; then + show_help + exit 0 +fi + +APP=$1 +OUT=$(realpath $2) +STARTFILE=$(realpath $3) +shift 3 + +log_flag=false +while [[ $# -gt 0 ]]; do + case $1 in + --log) log_flag=true ;; + esac + shift +done + +temp_start_func=$(head -1 $STARTFILE) +./syspart -p $APP -s $temp_start_func -a 25 > $OUT/allfunctions.txt +./syspart -p $APP -s $temp_start_func -a 24 > $OUT/syscalls_with_callsites.txt +./syspart -p $APP -s $STARTFILE -i -a 23 | python3 src/scripts/parse_callgraph_to_json.py - $OUT/callgraph.json + +rm "${OUT}/startfuncs_with_addr.txt" +while read -r START_FUNC; do + start_addr=$(awk -v fname="$START_FUNC" '$1 == fname {print $2}' "$OUT/allfunctions.txt") + if [ -n "$start_addr" ]; then + echo "${START_FUNC}_${start_addr}" >> "${OUT}/startfuncs_with_addr.txt" + fi +done < "$STARTFILE" +if [ "$log_flag" = true ]; then + python3 src/scripts/getsyscalls.py $OUT/callgraph.json $OUT/syscalls_with_callsites.txt $OUT/startfuncs_with_addr.txt --log $OUT/logfile.txt > $OUT/syscalls.txt +else + python3 src/scripts/getsyscalls.py $OUT/callgraph.json $OUT/syscalls_with_callsites.txt $OUT/startfuncs_with_addr.txt > $OUT/syscalls.txt +fi diff --git a/analysis/app/src/scripts/getsyscalls.py b/analysis/app/src/scripts/getsyscalls.py new file mode 100644 index 0000000..7b4d37d --- /dev/null +++ b/analysis/app/src/scripts/getsyscalls.py @@ -0,0 +1,176 @@ +import sys +import argparse +import logging +import json +from collections import deque + +class DirectedGraph: + def __init__(self): + self.graph = {} + self.parents = {} + self.nedges = 0; + + def add_node(self, node): + if node not in self.graph: + self.graph[node] = [] + if node not in self.parents: + self.parents[node] = [] + + def add_edge(self, src, dest, edge_type): + self.add_node(src) + self.add_node(dest) + self.graph[src].append((dest, edge_type)) + self.parents[dest].append((src, edge_type)) + self.nedges = self.nedges + 1 + + def get_neighbors(self, node): + return self.graph.get(node, []) + + def get_parents(self, node): + return self.parents.get(node, []) + + def print_edges(self): + for src, neighbors in self.graph.items(): + for dest, edge_type in neighbors: + print(f"{src} -[{edge_type}]-> {dest}") + +graph = DirectedGraph() +targets = set() +startfuncs = set() + +def generate_callgraph(filename): + with open(filename, 'r') as f: + for line in f: + parts = line.strip().split() + if len(parts) >= 8: + edgetype = parts[0] + src = parts[3] + "_" + parts[7] + dest = parts[5] + "_" + parts[4] + graph.add_edge(src,dest,edgetype); + +def generate_callgraph_from_json(json_file): + """ + Reads the callgraph JSON file and constructs a DirectedGraph object. + + Args: + json_file (str): Path to the JSON callgraph. + """ + with open(json_file, 'r') as f: + data = json.load(f) + + # Build a mapping from function ID to function info + func_id_to_info = {func["id"]: func for func in data["functions"]} + + # Process call graph edges + for edge in data["call_graph"]: + caller_id = edge["caller_id"] + caller_info = func_id_to_info[caller_id] + caller_str = f"{caller_info['name']}_{caller_info['addr']}" + callsite = edge.get("callsite", "") + + if edge["callee_type"] == "individual": + callee_id = edge["callee_id"] + callee_info = func_id_to_info[callee_id] + callee_str = f"{callee_info['name']}_{callee_info['addr']}" + graph.add_edge(f"{caller_str}", callee_str, edge["edge_type"]) + elif edge["callee_type"] == "group": + group_name = edge["callee_ref"] + for callee_id in data["callee_groups"][group_name]: + callee_info = func_id_to_info[callee_id] + callee_str = f"{callee_info['name']}_{callee_info['addr']}" + graph.add_edge(f"{caller_str}", callee_str, edge["edge_type"]) + + +def read_syscalls(filename): + with open(filename, 'r') as f: + for line in f: + parts = line.strip().split() + if len(parts) >= 5: + src = parts[0] + "_" + parts[1] + syscall = parts[3] + graph.add_edge(src,syscall,"syscall") + targets.add(syscall) + +def read_start_funcs(filename): + with open(filename, 'r') as f: + for line in f: + parts = line.strip().split() + if len(parts) >= 1: + func = parts[0] + startfuncs.add(func) + +def find_reachable_syscalls(debug=False): + visited = set() + reachable_targets = set() + parent = {} + + queue = deque(startfuncs) + for s in startfuncs: + visited.add(s) + parent[s] = (None, None) + + while queue: + node = queue.popleft() + + #if debug: + # print(f"Processing {node}") + if node in targets: + reachable_targets.add(node) + if debug: + path = [] + cur = node + while cur is not None: + prev, edge_type = parent.get(cur, (None,None)) + if prev is not None: + path.append(f"{prev} - [{edge_type}]-> {cur}") + cur = prev + path.reverse() + if debug: + logging.debug(f"\nPath to {node}:\n " + "\n ".join(path)) + + for neighbor,edge_type in graph.get_neighbors(node): + if neighbor not in visited: + visited.add(neighbor) + parent[neighbor] = (node, edge_type) + queue.append(neighbor) + #if debug: + # print(f" Processing edge : {node} -> {neighbor}") + + return list(reachable_targets) + + +def main(): + parser = argparse.ArgumentParser(description="Compute reachable syscalls from startfunc") + parser.add_argument("args1", help="Callgraph file") + parser.add_argument("args2", help="Syscalls with callsite info") + parser.add_argument("args3", help="file with list of start functions") + parser.add_argument("--log", type=str, help="Enable logging") + args = parser.parse_args() + log_flag = False + if args.log: + log_flag = True + logging.basicConfig(filename=args.log, + filemode='w', # overwrite existing file + level=logging.DEBUG, + format='%(message)s') + #format='%(asctime)s - %(levelname)s - %(message)s') + else: + logging.basicConfig(level=logging.CRITICAL) # suppress debug/info logs + + + callgraphfile = args.args1 + syscallfile = args.args2 + startfuncfile = args.args3 + + generate_callgraph(callgraphfile) + generate_callgraph_from_json(callgraphfile) + read_syscalls(syscallfile) + read_start_funcs(startfuncfile) + #graph.print_edges() + reachable = find_reachable_syscalls(log_flag) + + for r in reachable: + print(r) + +if __name__ == "__main__": + main() diff --git a/analysis/app/src/scripts/parse_callgraph_to_json.py b/analysis/app/src/scripts/parse_callgraph_to_json.py new file mode 100644 index 0000000..1b750ba --- /dev/null +++ b/analysis/app/src/scripts/parse_callgraph_to_json.py @@ -0,0 +1,143 @@ +import sys +import json +from collections import defaultdict + +def stream_callgraph_to_json_per_callsite(output_filepath, input_stream, min_group_occurrences=2): + """ + Memory-efficient streaming parser that reads a callgraph and writes JSON. + Groups callees per caller function and specific callsite. + """ + func_to_id = {} + func_info = {} # full info for each function: name, module, addr + func_counter = 1 + + # key = (caller_fn, callsite) + caller_to_callees = defaultdict(list) + + # --- Step 1: read edges and assign function IDs --- + for line in input_stream: + parts = line.strip().split() + if len(parts) < 8: + if line.strip(): + print(f"Skipping malformed line: {line.strip()}", file=sys.stderr) + continue + + edge_type = parts[0] + caller_module = parts[1] + callsite = parts[2] + caller_fn = parts[3] + callee_addr = parts[4] + callee_fn = parts[5] + callee_module = parts[6] + caller_addr = parts[7] + + # assign function IDs and store info + for fn, mod, addr in [(caller_fn, caller_module, caller_addr), (callee_fn, callee_module, callee_addr)]: + if fn not in func_to_id: + fid = f"func_{func_counter}" + func_to_id[fn] = fid + func_info[fid] = {"id": fid, "name": fn, "module": mod, "addr": addr} + func_counter += 1 + + # append edge info keyed by (caller_fn, callsite) + caller_to_callees[(caller_fn, callsite)].append({ + "callee_fn": callee_fn, + "edge_type": edge_type + }) + + # --- Step 2: detect common callee groups per callsite --- + callee_set_counts = defaultdict(int) + for edges in caller_to_callees.values(): + callees_tuple = tuple(sorted(e["callee_fn"] for e in edges)) + callee_set_counts[callees_tuple] += 1 + + callee_group_definitions = {} + callee_groups = {} + next_group_id = 1 + for callees_tuple, count in callee_set_counts.items(): + if count >= min_group_occurrences and len(callees_tuple) > 1: + group_name = f"group_{next_group_id}" + callee_group_definitions[callees_tuple] = group_name + callee_groups[group_name] = [func_to_id[f] for f in callees_tuple] + next_group_id += 1 + + # --- Step 3: write JSON incrementally --- + with open(output_filepath, 'w') as f: + f.write('{\n') + + # Functions + f.write(' "functions": [\n') + funcs = sorted(func_info.values(), key=lambda x: x["id"]) + for i, info in enumerate(funcs): + f.write(f' {json.dumps(info)}') + f.write(',\n' if i < len(funcs)-1 else '\n') + f.write(' ],\n') + + # Callee groups + f.write(' "callee_groups": {\n') + for i, (group_name, callee_ids) in enumerate(callee_groups.items()): + f.write(f' "{group_name}": {json.dumps(callee_ids)}') + f.write(',\n' if i < len(callee_groups)-1 else '\n') + f.write(' },\n') + + # Call graph edges + f.write(' "call_graph": [\n') + total_edges = 0 + for edges in caller_to_callees.values(): + callees_tuple = tuple(sorted(e["callee_fn"] for e in edges)) + if callees_tuple in callee_group_definitions: + total_edges += 1 # group counts as 1 + else: + total_edges += len(edges) # individual edges count separately + + written = 0 + for (caller_fn, callsite), edges in caller_to_callees.items(): + caller_id = func_to_id[caller_fn] + callees_tuple = tuple(sorted(e["callee_fn"] for e in edges)) + + # Check if this callsite's callee set is a group + if callees_tuple in callee_group_definitions: + edge_obj = { + "caller_id": caller_id, + "callee_type": "group", + "callee_ref": callee_group_definitions[callees_tuple], + "edge_type": edges[0]["edge_type"], # assume same type + "callsite": callsite + } + f.write(f' {json.dumps(edge_obj)}') + written += 1 + f.write(',\n' if written < total_edges else '\n') + else: + for edge in edges: + edge_obj = { + "caller_id": caller_id, + "callee_type": "individual", + "callee_id": func_to_id[edge["callee_fn"]], + "edge_type": edge["edge_type"], + "callsite": callsite + } + f.write(f' {json.dumps(edge_obj)}') + written += 1 + f.write(',\n' if written < total_edges else '\n') + + f.write(' ]\n') + f.write('}\n') + + print(f"Call graph saved to {output_filepath}", file=sys.stderr) + + +# --- Main --- +if __name__ == "__main__": + if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + sys.exit(1) + + input_arg = sys.argv[1] + output_file = sys.argv[2] + + if input_arg == "-": + stream_callgraph_to_json_per_callsite(output_file, sys.stdin) + else: + with open(input_arg, 'r') as f: + stream_callgraph_to_json_per_callsite(output_file, f) + From d1ce1180ac87450755a97157f9a3b84587e02c4f Mon Sep 17 00:00:00 2001 From: Vidya Date: Mon, 8 Sep 2025 20:26:12 +0000 Subject: [PATCH 09/14] Added build file for upgraded Egalito --- build.sh | 2 ++ build_upgraded_egalito.sh | 10 ++++++++++ 2 files changed, 12 insertions(+) create mode 100755 build_upgraded_egalito.sh diff --git a/build.sh b/build.sh index 8d0eb80..3c7a86d 100755 --- a/build.sh +++ b/build.sh @@ -3,6 +3,8 @@ cd analysis/tools/egalito export USE_LOADER=0 git checkout syspart-updated +make clean make -j 8 cd ../../app +make clean make diff --git a/build_upgraded_egalito.sh b/build_upgraded_egalito.sh new file mode 100755 index 0000000..f9cf4ce --- /dev/null +++ b/build_upgraded_egalito.sh @@ -0,0 +1,10 @@ +#!/bin/sh + + +cd analysis/tools/egalito +git checkout egalito-upgrade +make clean +make -j 8 +cd ../../app +make clean +make From cdbb15d791d3702b15434a0f1693010808e88943 Mon Sep 17 00:00:00 2001 From: Vidya Lakshmi Rajagopalan Date: Mon, 8 Sep 2025 15:29:12 -0400 Subject: [PATCH 10/14] Update README with latest update Updated README with the latest update to the tool with new-features branch. --- README.md | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 20b3054..de7992c 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,9 @@ We update this repository to add more features and fix bugs. In case of any quer ## Upgrades and fixes +**⚡ LATEST UPDATE (SCALABLE TOOL WITH NEWER OPTIONS)!! ⚡** +We have introduced new features, including improved scalability to handle larger binaries more efficiently, as well as added scripts that simplify and accelerate the analysis process. For more information, please see the [section](#build-new-features)(#run-new-features-and-options). +**⚡ UPGRADE TO LATEST OS!! ⚡** We have upgraded SysPart for use in latest ubuntu versions. (ubuntu 18.04+). We have tested in ubuntu 22.04. Please find more info about how to upgrade in the [section](#run-in-latest-oS-versions). ## Basic Requirements @@ -46,13 +49,6 @@ Once you ensure your public keys are configured, you an clone the repository re git clone --recursive https://github.com/vidyalakshmir/SysPartCode.git ``` -#### Run in latest OS versions -The initial repo was tested in ubuntu 18.04. We have upgraded it to work on latest ubuntu versions. As of now, we have tested on ubuntu 22.04. Please issue the following commands for the upgrade -``` -cd SysPartCode/analysis/tools/egalito -git checkout egalito-upgrade -``` - ### Install all dependencies ``` @@ -61,14 +57,43 @@ sudo apt-get install make g++ libreadline-dev gdb lsb-release unzip libc6-dbg li sudo apt install libunwind-dev python3 ``` +### Build new features +If you want to use the newly added features of the tool including scalability updates and other additional features, please checkout the following git branch + +``` +git checkout new-features +``` + ### Building the tool +#### Older versions until Ubuntu 18.04 ``` ./build.sh ``` +#### Run in latest OS versions +The initial repo was tested in ubuntu 18.04. We have upgraded it to work on latest ubuntu versions. As of now, we have tested on ubuntu 22.04. Please issue the following commands for the upgrade +``` +./build_upgraded_egalito.sh +``` + ## Using the tool +### Run new features and options +**To generate callgraph and compute system calls of a binary** + +Please make sure you are on the `new-features` branch of the git repository and have build it correctly. + +In order to obtain the system calls of a ELF binary reachable from a list of system calls, use the following command + +`cd analysis/app +src/scripts/compute_syscalls.sh $BINARY $OUT $STARTFILE --log` + +where $BINARY is the ELF binary to be analyzed + $OUT is the directory where output files will be stored + $STARTFILE is the file containing a list of start functions + --log is optional parameter that logs the paths from start functions to the system calls in logfile.txt within the output directory +This will output the callgraph as well the system calls in different files. To get more information about the script,please run the script with --help option.` ### Generate callgraph Uses static analysis to generate the callgraph of the application as well as its dependent libraries. From 97a0306b5d717541a20a2c88416f4b28dda07dff Mon Sep 17 00:00:00 2001 From: Vidya Lakshmi Rajagopalan Date: Mon, 8 Sep 2025 15:30:13 -0400 Subject: [PATCH 11/14] Improve README links in upgrades and fixes section --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index de7992c..e90353c 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,8 @@ We update this repository to add more features and fix bugs. In case of any quer ## Upgrades and fixes **⚡ LATEST UPDATE (SCALABLE TOOL WITH NEWER OPTIONS)!! ⚡** -We have introduced new features, including improved scalability to handle larger binaries more efficiently, as well as added scripts that simplify and accelerate the analysis process. For more information, please see the [section](#build-new-features)(#run-new-features-and-options). +We have introduced new features, including improved scalability to handle larger binaries more efficiently, as well as added scripts that simplify and accelerate the analysis process. For more information, please see the [section](#build-new-features) and [section](#run-new-features-and-options). + **⚡ UPGRADE TO LATEST OS!! ⚡** We have upgraded SysPart for use in latest ubuntu versions. (ubuntu 18.04+). We have tested in ubuntu 22.04. Please find more info about how to upgrade in the [section](#run-in-latest-oS-versions). From d9a0eb3710e203dfa1c4978f6db0a3c8d4b71337 Mon Sep 17 00:00:00 2001 From: Vidya Lakshmi Rajagopalan Date: Mon, 8 Sep 2025 15:31:38 -0400 Subject: [PATCH 12/14] Fix section links and improve readability in README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e90353c..1851b2c 100644 --- a/README.md +++ b/README.md @@ -9,10 +9,10 @@ We update this repository to add more features and fix bugs. In case of any quer ## Upgrades and fixes **⚡ LATEST UPDATE (SCALABLE TOOL WITH NEWER OPTIONS)!! ⚡** -We have introduced new features, including improved scalability to handle larger binaries more efficiently, as well as added scripts that simplify and accelerate the analysis process. For more information, please see the [section](#build-new-features) and [section](#run-new-features-and-options). +We have introduced new features, including improved scalability to handle larger binaries more efficiently, as well as added scripts that simplify and accelerate the analysis process. For more information, please see the sections [build new features](#build-new-features) and [run new features and options](#run-new-features-and-options). **⚡ UPGRADE TO LATEST OS!! ⚡** -We have upgraded SysPart for use in latest ubuntu versions. (ubuntu 18.04+). We have tested in ubuntu 22.04. Please find more info about how to upgrade in the [section](#run-in-latest-oS-versions). +We have upgraded SysPart for use in latest ubuntu versions. (ubuntu 18.04+). We have tested in ubuntu 22.04. Please find more info about how to upgrade in the section [run in latest OS versions](#run-in-latest-oS-versions). ## Basic Requirements - Works on **linux binaries (ELF)** which run on **x86-64** architecture From 3f679f8e39a4d814f603d0a6662ccfceb30728d3 Mon Sep 17 00:00:00 2001 From: Vidya Lakshmi Rajagopalan Date: Mon, 8 Sep 2025 15:57:15 -0400 Subject: [PATCH 13/14] Revise README for clarity and updated information Updated README to clarify features and usage instructions. --- README.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 1851b2c..3e5c332 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,12 @@ We update this repository to add more features and fix bugs. In case of any quer We have introduced new features, including improved scalability to handle larger binaries more efficiently, as well as added scripts that simplify and accelerate the analysis process. For more information, please see the sections [build new features](#build-new-features) and [run new features and options](#run-new-features-and-options). **⚡ UPGRADE TO LATEST OS!! ⚡** -We have upgraded SysPart for use in latest ubuntu versions. (ubuntu 18.04+). We have tested in ubuntu 22.04. Please find more info about how to upgrade in the section [run in latest OS versions](#run-in-latest-oS-versions). +We have upgraded SysPart for use in latest ubuntu versions. (ubuntu 18.04+). We have tested in ubuntu 22.04. Please find more info about how to upgrade in the section [run in latest OS versions](#in-latest-os-versions). ## Basic Requirements - Works on **linux binaries (ELF)** which run on **x86-64** architecture - Tested with **ELF binaries with symbols** (application as well as libraries). It will work with stripped binaries, only that the results (like callgraph) might be overapproximated. -- The tool was initially tested on **Ubuntu 18.04** and currently we have upgraded to later versions having been tested in ubuntu 22.04. Refer [section](#run-in-latest-oS-versions) to upgrade the repo for the latest ubuntu. +- The tool was initially tested on **Ubuntu 18.04** and currently we have upgraded to later versions having been tested in ubuntu 22.04. Refer [run in latest OS versions](#in-latest-os-versions) to upgrade the repo for the latest ubuntu. ## Capabilities of the tool ### Any application @@ -65,14 +65,14 @@ If you want to use the newly added features of the tool including scalability up git checkout new-features ``` -### Building the tool +### Building the tool #### Older versions until Ubuntu 18.04 ``` ./build.sh ``` -#### Run in latest OS versions +#### In latest OS versions The initial repo was tested in ubuntu 18.04. We have upgraded it to work on latest ubuntu versions. As of now, we have tested on ubuntu 22.04. Please issue the following commands for the upgrade ``` ./build_upgraded_egalito.sh @@ -84,17 +84,21 @@ The initial repo was tested in ubuntu 18.04. We have upgraded it to work on late Please make sure you are on the `new-features` branch of the git repository and have build it correctly. -In order to obtain the system calls of a ELF binary reachable from a list of system calls, use the following command +In order to obtain the system calls of a ELF binary and its dependent libraries reachable from a list of system calls, use the following command -`cd analysis/app +``` +cd analysis/app src/scripts/compute_syscalls.sh $BINARY $OUT $STARTFILE --log` where $BINARY is the ELF binary to be analyzed - $OUT is the directory where output files will be stored + $OUT is the directory where output files will be stored (output directory) $STARTFILE is the file containing a list of start functions --log is optional parameter that logs the paths from start functions to the system calls in logfile.txt within the output directory +``` + +This will produce the callgraph as well the reachable system calls in different files within the output directory. To get more information about the script, please run the script `compute_syscalls.sh` with --help option. -This will output the callgraph as well the system calls in different files. To get more information about the script,please run the script with --help option.` +This does not produce results of dynamic libraries. Please refer to section [Dynamic Library Profiling](#dynamic-library-profiling) for more info. ### Generate callgraph Uses static analysis to generate the callgraph of the application as well as its dependent libraries. From 949e3339023dca94cfd2e67beefe409187e5da81 Mon Sep 17 00:00:00 2001 From: Vidya Lakshmi Rajagopalan Date: Tue, 16 Sep 2025 11:53:47 -0400 Subject: [PATCH 14/14] Update build script to include git pull command --- build_upgraded_egalito.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/build_upgraded_egalito.sh b/build_upgraded_egalito.sh index f9cf4ce..2a4888b 100755 --- a/build_upgraded_egalito.sh +++ b/build_upgraded_egalito.sh @@ -3,6 +3,7 @@ cd analysis/tools/egalito git checkout egalito-upgrade +git pull make clean make -j 8 cd ../../app