Ghidra CookBook For Agent


Ghidra API 使用手册 (Cookbook For Ai)

基于 GhidraSnippets 项目总结的 Ghidra API 常用模式和最佳实践。


版权信息

本文档由 Claude Sonnet 4.5 协作生成,内容基于 GhidraSnippets 项目整理而成。


目录

  1. 核心 API 概览
  2. 程序与项目操作
  3. 函数操作
  4. 反编译器使用
  5. 指令与汇编
  6. 变量与内存
  7. PCode 操作
  8. 交叉引用
  9. 常用工具函数

核心 API 概览

三大核心 API

Ghidra 提供三个主要的扁平化 API(Flat API),简化了常见操作:

1. FlatProgramAPI - 程序操作核心

 1from ghidra.program.flatapi import FlatProgramAPI
 2
 3state = getState()
 4program = state.getCurrentProgram()
 5fpapi = FlatProgramAPI(program)
 6
 7# 访问程序相关功能
 8print(fpapi.currentProgram)
 9print(fpapi.firstFunction)
10print(fpapi.firstInstruction)

常用方法:

2. FlatDecompilerAPI - 反编译器接口

 1from ghidra.app.decompiler.flatapi import FlatDecompilerAPI
 2from ghidra.program.flatapi import FlatProgramAPI
 3
 4fpapi = FlatProgramAPI(getState().getCurrentProgram())
 5fdapi = FlatDecompilerAPI(fpapi)
 6
 7# 反编译函数
 8main_func = fpapi.getFunction('main')
 9decomp_result = fdapi.decompile(main_func)
10print(decomp_result)

3. FlatDebuggerAPI - 调试器接口

1from ghidra.app.flatapi import FlatDebuggerAPI
2
3# 用于调试相关操作

程序与项目操作

获取当前程序信息

1# 方法 1: 使用全局变量
2program = currentProgram
3print("Program name:", program.getName())
4print("Executable path:", program.getExecutablePath())
5
6# 方法 2: 通过 state
7state = getState()
8program = state.getCurrentProgram()

获取程序段信息

1# 列出所有内存段
2mem = currentProgram.getMemory()
3for block in mem.getBlocks():
4    print("Section: {} @ 0x{} (size: {})".format(
5        block.getName(),
6        block.getStart(),
7        block.getSize()
8    ))

项目操作

 1# 获取项目信息
 2state = getState()
 3project = state.getProject()
 4print("Project name:", project.getName())
 5print("Project location:", project.getProjectLocator())
 6
 7# 列出项目中的所有程序
 8project_data = project.getProjectData()
 9root_folder = project_data.getRootFolder()
10for df in root_folder.getFiles():
11    print("Program:", df.getName())

函数操作

遍历所有函数

 1# 方法 1: 使用 Flat API
 2func = getFirstFunction()
 3while func is not None:
 4    print("Function: {} @ 0x{}".format(func.getName(), func.getEntryPoint()))
 5    func = getFunctionAfter(func)
 6
 7# 方法 2: 使用 FunctionManager
 8fm = currentProgram.getFunctionManager()
 9funcs = fm.getFunctions(True)  # True = 正向遍历
10for func in funcs:
11    print("Function: {} @ 0x{}".format(func.getName(), func.getEntryPoint()))

通过地址获取函数

 1def getAddress(offset):
 2    return currentProgram.getAddressFactory().getDefaultAddressSpace().getAddress(offset)
 3
 4fm = currentProgram.getFunctionManager()
 5addr = getAddress(0x00100690)
 6
 7# 获取入口点处的函数(仅适用于函数入口地址)
 8func = fm.getFunctionAt(addr)
 9
10# 获取包含该地址的函数(推荐)
11func = fm.getFunctionContaining(addr)
12
13# 检查地址是否在函数内
14is_in_func = fm.isInFunction(addr)

通过名称获取函数

1# 注意:可能有多个同名函数(函数重载)
2name = "main"
3funcs = getGlobalFunctions(name)
4print("Found {} function(s) with name '{}'".format(len(funcs), name))
5
6for func in funcs:
7    print("{} is located at 0x{}".format(func.getName(), func.getEntryPoint()))

重命名函数

1# 获取函数对象
2func = getGlobalFunctions("old_name")[0]
3
4# 重命名
5from ghidra.program.model.symbol import SourceType
6func.setName("new_name", SourceType.USER_DEFINED)

获取函数交叉引用

 1fm = currentProgram.getFunctionManager()
 2funcs = fm.getFunctions(True)
 3
 4for func in funcs:
 5    if func.getName() == "system":
 6        print("\nFound 'system' @ 0x{}".format(func.getEntryPoint()))
 7        entry_point = func.getEntryPoint()
 8        references = getReferencesTo(entry_point)
 9
10        for xref in references:
11            print("From: {} To: {} Type: {}".format(
12                xref.getFromAddress(),
13                xref.getToAddress(),
14                xref.getReferenceType()
15            ))

反编译器使用

基本反编译

 1from ghidra.app.decompiler import DecompInterface
 2from ghidra.util.task import ConsoleTaskMonitor
 3
 4program = getCurrentProgram()
 5ifc = DecompInterface()
 6ifc.openProgram(program)
 7
 8# 反编译指定函数
 9function = getGlobalFunctions('main')[0]
10results = ifc.decompileFunction(function, 0, ConsoleTaskMonitor())
11
12# 获取伪 C 代码
13c_code = results.getDecompiledFunction().getC()
14print(c_code)

高级反编译配置

 1from ghidra.app.decompiler import DecompileOptions
 2from ghidra.app.decompiler import DecompInterface
 3from ghidra.util.task import ConsoleTaskMonitor
 4
 5options = DecompileOptions()
 6monitor = ConsoleTaskMonitor()
 7ifc = DecompInterface()
 8ifc.setOptions(options)
 9ifc.openProgram(currentProgram)
10
11# 反编译(带超时)
12func = getGlobalFunctions('main')[0]
13res = ifc.decompileFunction(func, 60, monitor)  # 60秒超时
14
15# 获取高级函数对象
16high_func = res.getHighFunction()

获取变量信息

 1from ghidra.app.decompiler import DecompileOptions
 2from ghidra.app.decompiler import DecompInterface
 3from ghidra.util.task import ConsoleTaskMonitor
 4
 5func = getGlobalFunctions("target_func")[0]
 6options = DecompileOptions()
 7monitor = ConsoleTaskMonitor()
 8ifc = DecompInterface()
 9ifc.setOptions(options)
10ifc.openProgram(func.getProgram())
11
12res = ifc.decompileFunction(func, 60, monitor)
13high_func = res.getHighFunction()
14lsm = high_func.getLocalSymbolMap()
15symbols = lsm.getSymbols()
16
17for symbol in symbols:
18    print("Name:", symbol.name)
19    print("Type:", symbol.dataType)
20    print("Address:", symbol.getPCAddress())
21    print("Size:", symbol.size)
22    print("Storage:", symbol.storage)
23    print("Is Parameter:", symbol.parameter)
24    print("Is ReadOnly:", symbol.readOnly)

分析函数调用参数

 1from ghidra.app.decompiler import DecompileOptions
 2from ghidra.app.decompiler import DecompInterface
 3from ghidra.util.task import ConsoleTaskMonitor
 4
 5# 目标调用地址
 6TARGET_ADDR = toAddr(0x00434f6c)
 7
 8options = DecompileOptions()
 9monitor = ConsoleTaskMonitor()
10ifc = DecompInterface()
11ifc.setOptions(options)
12ifc.openProgram(currentProgram)
13
14# 获取包含该地址的函数
15fm = currentProgram.getFunctionManager()
16func = fm.getFunctionContaining(TARGET_ADDR)
17
18# 反编译
19res = ifc.decompileFunction(func, 60, monitor)
20hf = res.getHighFunction()
21
22# 遍历 PCode 操作
23opiter = hf.getPcodeOps(TARGET_ADDR)
24while opiter.hasNext():
25    op = opiter.next()
26    if op.getMnemonic() == "CALL":
27        # 获取调用参数
28        for i in range(1, op.getNumInputs()):
29            arg = op.getInput(i)
30            print("Argument {}: {}".format(i, arg))

指令与汇编

遍历函数中的所有指令

 1from binascii import hexlify
 2
 3listing = currentProgram.getListing()
 4main_func = getGlobalFunctions("main")[0]
 5addrSet = main_func.getBody()
 6
 7# 获取地址集合中的所有指令
 8instructions = listing.getInstructions(addrSet, True)  # True = 正向
 9
10for instr in instructions:
11    addr = instr.getAddress()
12    mnemonic = instr.getMnemonicString()
13    opcode = hexlify(instr.getBytes()).decode('utf-8')
14
15    print("0x{}: {} ({})".format(addr, mnemonic, opcode))

查找特定指令模式

 1# 查找所有寄存器调用和跳转
 2listing = currentProgram.getListing()
 3func = getGlobalFunctions("target")[0]
 4addrSet = func.getBody()
 5instructions = listing.getInstructions(addrSet, True)
 6
 7for instr in instructions:
 8    mnemonic = instr.getMnemonicString()
 9
10    # 查找 CALL/JMP 到寄存器
11    if mnemonic in ["CALL", "JMP"]:
12        num_operands = instr.getNumOperands()
13        if num_operands > 0:
14            op_type = instr.getOperandType(0)
15            # 检查是否为寄存器操作数
16            if op_type & instr.OP_TYPE_REGISTER:
17                print("Found: {} @ 0x{}".format(instr, instr.getAddress()))

统计指令助记符

 1from collections import Counter
 2
 3listing = currentProgram.getListing()
 4instructions = listing.getInstructions(True)
 5
 6mnemonics = []
 7for instr in instructions:
 8    mnemonics.append(instr.getMnemonicString())
 9
10# 统计出现次数
11counter = Counter(mnemonics)
12for mnemonic, count in counter.most_common(10):
13    print("{}: {}".format(mnemonic, count))

变量与内存

读取内存

⚠️ 规范:批量读取内存一律使用 flat_api.getBytes()

memory.getBytes(addr, bytearray, 0, n) 在 PyGhidra 中不会将数据写入 Python bytearray(返回值 n 正确但 buffer 全零),原因是 PyGhidra 的 Java 方法 out 参数不映射到 Python 对象。

正确方式:flat_api.getBytes(addr, n) 返回 Java byte[],转 bytearray 后数据正确。

 1from ghidra.program.flatapi import FlatProgramAPI
 2
 3flat_api = FlatProgramAPI(currentProgram)
 4
 5# ✅ 正确:批量读取 n 个字节
 6def read_bytes(addr, length):
 7    try:
 8        raw = flat_api.getBytes(addr, length)
 9        return bytearray(b & 0xff for b in raw)  # Java byte 有符号,需 & 0xff
10    except Exception as e:
11        print("Read error at {}: {}".format(addr, e))
12        return bytearray(length)  # 失败返回全零
13
14# ✅ 正确:读取以 null 结尾的字符串(逐字节,用 memory.getByte)
15def read_cstring(addr):
16    mem = currentProgram.getMemory()
17    result = ""
18    while True:
19        byte = mem.getByte(addr.add(len(result))) & 0xff
20        if byte == 0:
21            return result
22        result += chr(byte)
23
24# ❌ 错误:下面这种写法 buf 始终全零
25# buf = bytearray(n)
26# memory.getBytes(addr, buf, 0, n)  # buf 不会被写入!
27
28# 使用示例
29addr = toAddr(0x00401000)
30data = read_bytes(addr, 16)
31print("bytes: {}".format(data.hex()))
32
33string_value = read_cstring(addr)
34print("string: {}".format(string_value))

写入内存(Patch 字符串)

⚠️ 规范:Patch 内存需要使用事务(Transaction)

 1from ghidra.program.flatapi import FlatProgramAPI
 2
 3flat_api = FlatProgramAPI(currentProgram)
 4mem = currentProgram.getMemory()
 5listing = currentProgram.getListing()
 6
 7# 目标地址和要写入的字符串
 8target_addr = toAddr(0x00357598)
 9decrypted_str = "getFingerprintedPartit"
10dec_bytes = decrypted_str.encode('utf-8') + b'\x00'  # 添加 null 结尾
11
12# 开始事务并 Patch
13tx = currentProgram.startTransaction("patch string")
14try:
15    # 1. 写入字节到内存
16    mem.setBytes(target_addr, dec_bytes)
17    print("[+] Patched {} bytes".format(len(dec_bytes)))
18
19    # 2. 清除旧的数据定义
20    try:
21        listing.clearCodeUnits(target_addr, target_addr.add(len(dec_bytes) - 1), False)
22    except:
23        pass
24
25    # 3. 创建 ASCII 字符串(让 Ghidra 识别为字符串)
26    flat_api.createAsciiString(target_addr, len(dec_bytes))
27    print("[+] Created ASCII string at {}".format(target_addr))
28
29    currentProgram.endTransaction(tx, True)
30    print("[OK] Done!")
31except Exception as e:
32    currentProgram.endTransaction(tx, False)
33    print("[!] Error: {}".format(e))

获取栈变量

 1def get_stack_var_from_varnode(func, varnode):
 2    """
 3    从 Varnode 获取栈变量
 4
 5    参数:
 6        func: Function 对象
 7        varnode: Varnode 或 VarnodeAST 对象
 8
 9    返回:
10        栈变量列表
11    """
12    if varnode.isUnique():
13        # 如果是 unique 空间,需要找到定义
14        high_var = varnode.getHigh()
15        if high_var:
16            varnode = high_var.getRepresentative()
17
18    if varnode.isAddress():
19        addr = varnode.getAddress()
20        offset = addr.getOffset()
21
22        # 获取栈变量
23        stack_vars = func.getAllVariables()
24        results = []
25        for var in stack_vars:
26            if var.isStackVariable():
27                if var.getStackOffset() == offset:
28                    results.append(var)
29        return results
30
31    return []

PCode 操作

PCode 基础

PCode 是 Ghidra 的中间表示(IR),用于表示指令的语义。

 1from ghidra.app.decompiler import DecompInterface
 2from ghidra.util.task import ConsoleTaskMonitor
 3
 4ifc = DecompInterface()
 5ifc.openProgram(currentProgram)
 6
 7func = getGlobalFunctions('target')[0]
 8res = ifc.decompileFunction(func, 60, ConsoleTaskMonitor())
 9high_func = res.getHighFunction()
10
11# 遍历所有 PCode 操作
12opiter = high_func.getPcodeOps()
13while opiter.hasNext():
14    op = opiter.next()
15    mnemonic = op.getMnemonic()
16
17    print("PCode Op: {}".format(mnemonic))
18
19    # 获取输入
20    for i in range(op.getNumInputs()):
21        input_var = op.getInput(i)
22        print("  Input {}: {}".format(i, input_var))
23
24    # 获取输出
25    output = op.getOutput()
26    if output:
27        print("  Output: {}".format(output))

PCode 操作类型

常见的 PCode 操作:

函数模拟(Emulation)

 1from ghidra.app.emulator import EmulatorHelper
 2from ghidra.program.model.symbol import SymbolUtilities
 3
 4def getAddress(offset):
 5    return currentProgram.getAddressFactory().getDefaultAddressSpace().getAddress(offset)
 6
 7def getSymbolAddress(symbolName):
 8    symbol = SymbolUtilities.getLabelOrFunctionSymbol(currentProgram, symbolName, None)
 9    if symbol:
10        return symbol.getAddress()
11    raise Exception("Failed to locate symbol: {}".format(symbolName))
12
13# 创建模拟器
14emuHelper = EmulatorHelper(currentProgram)
15
16# 设置起始地址
17mainFunctionEntry = getSymbolAddress("main")
18mainFunctionEntryLong = int("0x{}".format(mainFunctionEntry), 16)
19emuHelper.writeRegister(emuHelper.getPCRegister(), mainFunctionEntryLong)
20
21# 设置寄存器初始值
22emuHelper.writeRegister("RAX", 0x20)
23emuHelper.writeRegister("RSP", 0x000000002FFF0000)
24emuHelper.writeRegister("RBP", 0x000000002FFF0000)
25
26# 写入内存
27emuHelper.writeMemoryValue(getAddress(0x000000000008C000), 4, 0x99AABBCC)
28emuHelper.writeMemory(getAddress(0x00000000000CF000), b'\x99\xAA\xBB\xCC')
29
30# 单步执行
31while True:
32    executionAddress = emuHelper.getExecutionAddress()
33
34    # 打印当前指令
35    instr = getInstructionAt(executionAddress)
36    print("Address: 0x{} ({})".format(executionAddress, instr))
37
38    # 读取寄存器
39    rax = emuHelper.readRegister("RAX")
40    print("  RAX = {:#018x}".format(rax))
41
42    # 执行一步
43    success = emuHelper.step(monitor)
44    if not success:
45        lastError = emuHelper.getLastError()
46        print("Emulation Error: '{}'".format(lastError))
47        break
48
49    # 检查终止条件
50    if executionAddress == getAddress(0x0):
51        break
52
53# 清理
54emuHelper.dispose()

交叉引用

获取引用到某地址的所有位置

 1target_addr = toAddr(0x00401000)
 2references = getReferencesTo(target_addr)
 3
 4for xref in references:
 5    from_addr = xref.getFromAddress()
 6    to_addr = xref.getToAddress()
 7    ref_type = xref.getReferenceType()
 8
 9    print("From: 0x{} To: 0x{} Type: {}".format(
10        from_addr, to_addr, ref_type
11    ))

获取从某地址引用的所有位置

1source_addr = toAddr(0x00401000)
2references = getReferencesFrom(source_addr)
3
4for xref in references:
5    print("Reference to: 0x{}".format(xref.getToAddress()))

查找函数调用者

 1def get_callers(target_func):
 2    """获取调用目标函数的所有函数"""
 3    entry_point = target_func.getEntryPoint()
 4    xrefs = getReferencesTo(entry_point)
 5
 6    fm = currentProgram.getFunctionManager()
 7    callers = []
 8
 9    for xref in xrefs:
10        from_addr = xref.getFromAddress()
11        caller = fm.getFunctionContaining(from_addr)
12        if caller and caller not in callers:
13            callers.append(caller)
14
15    return callers
16
17# 使用示例
18target = getGlobalFunctions("system")[0]
19callers = get_callers(target)
20
21for caller in callers:
22    print("Caller: {} @ 0x{}".format(caller.getName(), caller.getEntryPoint()))

常用工具函数

地址转换

1# 从偏移量创建地址
2def getAddress(offset):
3    return currentProgram.getAddressFactory().getDefaultAddressSpace().getAddress(offset)
4
5# 使用 toAddr(Ghidra 内置)
6addr = toAddr(0x00401000)
7
8# 从字符串解析地址
9addr = currentProgram.getAddressFactory().getAddress("00401000")

符号查找

 1from ghidra.program.model.symbol import SymbolUtilities
 2
 3def getSymbolAddress(symbolName):
 4    symbol = SymbolUtilities.getLabelOrFunctionSymbol(
 5        currentProgram, symbolName, None
 6    )
 7    if symbol:
 8        return symbol.getAddress()
 9    return None
10
11# 使用示例
12main_addr = getSymbolAddress("main")

批量函数重命名

 1from ghidra.app.decompiler import DecompileOptions
 2from ghidra.app.decompiler import DecompInterface
 3from ghidra.util.task import ConsoleTaskMonitor
 4from ghidra.program.model.symbol import SourceType
 5
 6def getString(addr):
 7    mem = currentProgram.getMemory()
 8    result = ""
 9    while True:
10        byte = mem.getByte(addr.add(len(result)))
11        if byte == 0:
12            return result
13        result += chr(byte)
14
15# 设置反编译器
16options = DecompileOptions()
17monitor = ConsoleTaskMonitor()
18ifc = DecompInterface()
19ifc.setOptions(options)
20ifc.openProgram(currentProgram)
21
22# 找到 register_function
23fm = currentProgram.getFunctionManager()
24register_function = None
25for func in fm.getFunctions(True):
26    if func.getName() == "register_function":
27        register_function = func
28        break
29
30# 获取所有调用者
31entry_point = register_function.getEntryPoint()
32xrefs = getReferencesTo(entry_point)
33callers = []
34for xref in xrefs:
35    from_addr = xref.getFromAddress()
36    caller = fm.getFunctionContaining(from_addr)
37    if caller and caller not in callers:
38        callers.append(caller)
39
40# 处理每个调用者
41for caller in callers:
42    if not caller:
43        continue
44
45    res = ifc.decompileFunction(caller, 60, monitor)
46    hf = res.getHighFunction()
47    opiter = hf.getPcodeOps()
48
49    while opiter.hasNext():
50        op = opiter.next()
51        if op.getMnemonic() == "CALL":
52            call_target = op.getInput(0)
53            if call_target.getAddress() == entry_point:
54                # 提取字符串参数和函数地址参数
55                func_name_varnode = op.getInput(2)
56                func_addr_varnode = op.getInput(3)
57
58                # 获取字符串
59                func_name_def = func_name_varnode.getDef()
60                func_name_addr = toAddr(func_name_def.getInput(0).getOffset())
61                func_name = getString(func_name_addr)
62
63                # 获取函数地址
64                func_addr = toAddr(func_addr_varnode.getDef().getInput(1).getOffset())
65                func_obj = fm.getFunctionAt(func_addr)
66
67                # 重命名
68                if func_obj:
69                    func_obj.setName(func_name, SourceType.USER_DEFINED)
70                    print("Renamed function @ 0x{} to '{}'".format(func_addr, func_name))

最佳实践

1. 错误处理

 1try:
 2    func = getGlobalFunctions("target")[0]
 3except IndexError:
 4    print("Function 'target' not found")
 5    exit()
 6
 7# 检查 None
 8addr = toAddr(0x00401000)
 9func = fm.getFunctionAt(addr)
10if func is None:
11    print("No function at address")

2. 资源清理

 1# 反编译器使用后清理
 2ifc = DecompInterface()
 3ifc.openProgram(currentProgram)
 4try:
 5    # ... 使用反编译器
 6    pass
 7finally:
 8    ifc.dispose()
 9
10# 模拟器使用后清理
11emuHelper = EmulatorHelper(currentProgram)
12try:
13    # ... 使用模拟器
14    pass
15finally:
16    emuHelper.dispose()

3. 性能优化

1# 使用 FunctionManager 而不是重复调用 getGlobalFunctions
2fm = currentProgram.getFunctionManager()
3funcs = fm.getFunctions(True)
4
5# 缓存常用对象
6listing = currentProgram.getListing()
7memory = currentProgram.getMemory()
8addr_factory = currentProgram.getAddressFactory()

4. 事务管理(修改程序时)

 1from ghidra.program.model.symbol import SourceType
 2
 3# 开始事务
 4txId = currentProgram.startTransaction("Rename Functions")
 5try:
 6    func = getGlobalFunctions("old_name")[0]
 7    func.setName("new_name", SourceType.USER_DEFINED)
 8
 9    # 提交事务
10    currentProgram.endTransaction(txId, True)
11except Exception as e:
12    # 回滚事务
13    currentProgram.endTransaction(txId, False)
14    print("Error: {}".format(e))

常见问题

Q: 如何区分 getFunctionAt 和 getFunctionContaining?

A:

推荐使用 getFunctionContaining

Q: 如何处理多个同名函数?

A: Ghidra 允许函数重载,所以 getGlobalFunctions(name) 返回列表:

1funcs = getGlobalFunctions("main")
2if len(funcs) > 1:
3    print("Multiple functions found, selecting first")
4func = funcs[0]

Q: PCode 的 unique 空间是什么?

A: Unique 空间是临时存储空间,用于中间计算结果。要获取实际变量,需要追踪定义:

1if varnode.isUnique():
2    high_var = varnode.getHigh()
3    if high_var:
4        varnode = high_var.getRepresentative()

Q: 如何安全地修改程序?

A: 始终使用事务(Transaction):

1txId = currentProgram.startTransaction("Description")
2try:
3    # 修改操作
4    currentProgram.endTransaction(txId, True)
5except:
6    currentProgram.endTransaction(txId, False)

参考资源