Skip to content

Commit f7c3a49

Browse files
committed
Add derived strings and string recognizer API (WIP)
1 parent eee045f commit f7c3a49

File tree

8 files changed

+545
-17
lines changed

8 files changed

+545
-17
lines changed

binaryninjaapi.h

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4074,6 +4074,7 @@ namespace BinaryNinja {
40744074
class Segment;
40754075
class Component;
40764076
class TypeArchive;
4077+
struct DerivedString;
40774078

40784079
/*!
40794080

@@ -4107,6 +4108,8 @@ namespace BinaryNinja {
41074108

41084109
static void StringFoundCallback(void* ctxt, BNBinaryView* data, BNStringType type, uint64_t offset, size_t len);
41094110
static void StringRemovedCallback(void* ctxt, BNBinaryView* data, BNStringType type, uint64_t offset, size_t len);
4111+
static void DerivedStringFoundCallback(void* ctxt, BNBinaryView* data, BNDerivedString* str);
4112+
static void DerivedStringRemovedCallback(void* ctxt, BNBinaryView* data, BNDerivedString* str);
41104113
static void TypeDefinedCallback(void* ctxt, BNBinaryView* data, BNQualifiedName* name, BNType* type);
41114114
static void TypeUndefinedCallback(void* ctxt, BNBinaryView* data, BNQualifiedName* name, BNType* type);
41124115
static void TypeReferenceChangedCallback(void* ctx, BNBinaryView* data, BNQualifiedName* name, BNType* type);
@@ -4203,6 +4206,8 @@ namespace BinaryNinja {
42034206
UndoEntryTaken = 1ULL << 50,
42044207
RedoEntryTaken = 1ULL << 51,
42054208
Rebased = 1ULL << 52,
4209+
DerivedStringFound = 1ULL << 53,
4210+
DerivedStringRemoved = 1ULL << 54,
42064211

42074212
BinaryDataUpdates = DataWritten | DataInserted | DataRemoved,
42084213
FunctionLifetime = FunctionAdded | FunctionRemoved,
@@ -4213,7 +4218,7 @@ namespace BinaryNinja {
42134218
TagUpdates = TagLifetime | TagUpdated,
42144219
SymbolLifetime = SymbolAdded | SymbolRemoved,
42154220
SymbolUpdates = SymbolLifetime | SymbolUpdated,
4216-
StringUpdates = StringFound | StringRemoved,
4221+
StringUpdates = StringFound | StringRemoved | DerivedStringFound | DerivedStringRemoved,
42174222
TypeLifetime = TypeDefined | TypeUndefined,
42184223
TypeUpdates = TypeLifetime | TypeReferenceChanged | TypeFieldReferenceChanged,
42194224
SegmentLifetime = SegmentAdded | SegmentRemoved,
@@ -4350,6 +4355,16 @@ namespace BinaryNinja {
43504355
(void)offset;
43514356
(void)len;
43524357
}
4358+
virtual void OnDerivedStringFound(BinaryView* data, const DerivedString& str)
4359+
{
4360+
(void)data;
4361+
(void)str;
4362+
}
4363+
virtual void OnDerivedStringRemoved(BinaryView* data, const DerivedString& str)
4364+
{
4365+
(void)data;
4366+
(void)str;
4367+
}
43534368
virtual void OnTypeDefined(BinaryView* data, const QualifiedName& name, Type* type)
43544369
{
43554370
(void)data;
@@ -4822,7 +4837,7 @@ namespace BinaryNinja {
48224837

48234838
const char* c_str() const;
48244839
size_t size() const;
4825-
BNStringRef* GetObject() { return m_ref; }
4840+
BNStringRef* GetObject() const { return m_ref; }
48264841

48274842
bool operator==(const StringRef& other) const { return this->operator std::string_view() == other.operator std::string_view(); }
48284843
bool operator!=(const StringRef& other) const { return this->operator std::string_view() != other.operator std::string_view(); }
@@ -5356,6 +5371,42 @@ namespace BinaryNinja {
53565371
std::vector<TypeReferenceSource> typeRefs;
53575372
};
53585373

5374+
class CustomStringType: public StaticCoreRefCountObject<BNCustomStringType>
5375+
{
5376+
public:
5377+
CustomStringType(BNCustomStringType* type);
5378+
std::string GetName() const;
5379+
};
5380+
5381+
struct DerivedString
5382+
{
5383+
StringRef value;
5384+
std::optional<std::pair<uint64_t, uint64_t>> addrAndLength;
5385+
Ref<CustomStringType> customType;
5386+
5387+
bool operator==(const DerivedString& other) const
5388+
{
5389+
if (value != other.value)
5390+
return false;
5391+
if (addrAndLength != other.addrAndLength)
5392+
return false;
5393+
return customType == other.customType;
5394+
}
5395+
5396+
bool operator<(const DerivedString& other) const
5397+
{
5398+
if (value < other.value)
5399+
return true;
5400+
if (other.value < value)
5401+
return false;
5402+
if (addrAndLength < other.addrAndLength)
5403+
return true;
5404+
if (other.addrAndLength < addrAndLength)
5405+
return false;
5406+
return customType < other.customType;
5407+
}
5408+
};
5409+
53595410
struct QualifiedNameAndType;
53605411
struct PossibleValueSet;
53615412
class Metadata;
@@ -7131,6 +7182,10 @@ namespace BinaryNinja {
71317182
*/
71327183
std::vector<BNStringReference> GetStrings(uint64_t start, uint64_t len);
71337184

7185+
std::vector<DerivedString> GetDerivedStrings();
7186+
std::vector<ReferenceSource> GetDerivedStringCodeReferences(
7187+
const DerivedString& str, std::optional<size_t> maxItems = std::nullopt);
7188+
71347189
/*! Sets up a call back function to be called when analysis has been completed.
71357190

71367191
This is helpful when using `UpdateAnalysis` which does not wait for analysis completion before returning.

binaryninjacore.h

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,8 @@ extern "C"
312312
typedef struct BNIndirectBranchInfo BNIndirectBranchInfo;
313313
typedef struct BNArchitectureAndAddress BNArchitectureAndAddress;
314314
typedef struct BNConstantRenderer BNConstantRenderer;
315+
typedef struct BNStringRecognizer BNStringRecognizer;
316+
typedef struct BNCustomStringType BNCustomStringType;
315317

316318
typedef bool(*BNProgressFunction)(void*, size_t, size_t);
317319

@@ -1631,6 +1633,15 @@ extern "C"
16311633
size_t nameCount;
16321634
} BNQualifiedName;
16331635

1636+
typedef struct BNDerivedString
1637+
{
1638+
BNStringRef* value;
1639+
bool addrValid;
1640+
uint64_t addr;
1641+
uint64_t len;
1642+
BNCustomStringType* customType;
1643+
} BNDerivedString;
1644+
16341645
typedef struct BNBinaryDataNotification
16351646
{
16361647
void* context;
@@ -1656,6 +1667,8 @@ extern "C"
16561667
void (*symbolUpdated)(void* ctxt, BNBinaryView* view, BNSymbol* sym);
16571668
void (*stringFound)(void* ctxt, BNBinaryView* view, BNStringType type, uint64_t offset, size_t len);
16581669
void (*stringRemoved)(void* ctxt, BNBinaryView* view, BNStringType type, uint64_t offset, size_t len);
1670+
void (*derivedStringFound)(void* ctxt, BNBinaryView* view, BNDerivedString* str);
1671+
void (*derivedStringRemoved)(void* ctxt, BNBinaryView* view, BNDerivedString* str);
16591672
void (*typeDefined)(void* ctxt, BNBinaryView* view, BNQualifiedName* name, BNType* type);
16601673
void (*typeUndefined)(void* ctxt, BNBinaryView* view, BNQualifiedName* name, BNType* type);
16611674
void (*typeReferenceChanged)(void* ctxt, BNBinaryView* view, BNQualifiedName* name, BNType* type);
@@ -3817,6 +3830,31 @@ extern "C"
38173830
BNOperatorPrecedence precedence);
38183831
} BNCustomConstantRenderer;
38193832

3833+
typedef struct BNCustomStringRecognizerResult
3834+
{
3835+
BNCustomStringType* type;
3836+
bool addrValid;
3837+
uint64_t addr;
3838+
size_t len;
3839+
BNDataBuffer* value;
3840+
} BNCustomStringRecognizerResult;
3841+
3842+
typedef struct BNCustomStringRecognizer
3843+
{
3844+
void* context;
3845+
void (*freeObject)(void* ctxt);
3846+
bool (*isValidForType)(void* ctxt, BNHighLevelILFunction* hlil, BNType* type);
3847+
bool (*recognizeConstantPointer)(void* ctxt, BNHighLevelILFunction* hlil, size_t expr, BNType* type,
3848+
int64_t val, BNCustomStringRecognizerResult* result);
3849+
} BNCustomStringRecognizer;
3850+
3851+
typedef struct BNCustomStringTypeInfo
3852+
{
3853+
char* name;
3854+
char* stringPrefix;
3855+
char* stringPostfix;
3856+
} BNCustomStringTypeInfo;
3857+
38203858
BINARYNINJACOREAPI char* BNAllocString(const char* contents);
38213859
BINARYNINJACOREAPI char* BNAllocStringWithLength(const char* contents, size_t len);
38223860
BINARYNINJACOREAPI void BNFreeString(char* str);
@@ -5260,6 +5298,11 @@ extern "C"
52605298
BNBinaryView* view, uint64_t start, uint64_t len, size_t* count);
52615299
BINARYNINJACOREAPI void BNFreeStringReferenceList(BNStringReference* strings);
52625300

5301+
BINARYNINJACOREAPI BNDerivedString* BNGetDerivedStrings(BNBinaryView* view, size_t* count);
5302+
BINARYNINJACOREAPI BNReferenceSource* BNGetDerivedStringCodeReferences(
5303+
BNBinaryView* view, BNDerivedString* str, size_t* count, bool limit, size_t maxItems);
5304+
BINARYNINJACOREAPI void BNFreeDerivedStringList(BNDerivedString* strings, size_t count);
5305+
52635306
BINARYNINJACOREAPI BNVariableNameAndType* BNGetStackLayout(BNFunction* func, size_t* count);
52645307
BINARYNINJACOREAPI void BNFreeVariableNameAndTypeList(BNVariableNameAndType* vars, size_t count);
52655308
BINARYNINJACOREAPI void BNCreateAutoStackVariable(
@@ -8691,9 +8734,25 @@ extern "C"
86918734
const char* name, BNCustomConstantRenderer* renderer);
86928735
BINARYNINJACOREAPI BNConstantRenderer* BNGetConstantRendererByName(const char* name);
86938736
BINARYNINJACOREAPI BNConstantRenderer** BNGetConstantRendererList(size_t* count);
8694-
BINARYNINJACOREAPI void BNFreeConstantRendererList(BNLanguageRepresentationFunctionType** renderers);
8737+
BINARYNINJACOREAPI void BNFreeConstantRendererList(BNConstantRenderer** renderers);
86958738
BINARYNINJACOREAPI char* BNGetConstantRendererName(BNConstantRenderer* renderer);
86968739

8740+
// String recognizers
8741+
BINARYNINJACOREAPI BNCustomStringType* BNRegisterCustomStringType(BNCustomStringTypeInfo* info);
8742+
BINARYNINJACOREAPI BNCustomStringType* BNGetCustomStringTypeByName(const char* name);
8743+
BINARYNINJACOREAPI BNCustomStringType* BNGetCustomStringTypeByID(uint32_t id);
8744+
BINARYNINJACOREAPI BNCustomStringType** BNGetCustomStringTypeList(size_t* count);
8745+
BINARYNINJACOREAPI void BNFreeCustomStringTypeList(BNCustomStringType** types);
8746+
BINARYNINJACOREAPI char* BNGetCustomStringTypeName(BNCustomStringType* type);
8747+
BINARYNINJACOREAPI char* BNGetCustomStringTypePrefix(BNCustomStringType* type);
8748+
BINARYNINJACOREAPI char* BNGetCustomStringTypePostfix(BNCustomStringType* type);
8749+
BINARYNINJACOREAPI BNStringRecognizer* BNRegisterStringRecognizer(
8750+
const char* name, BNCustomStringRecognizer* recognizer);
8751+
BINARYNINJACOREAPI BNStringRecognizer* BNGetStringRecognizerByName(const char* name);
8752+
BINARYNINJACOREAPI BNStringRecognizer** BNGetStringRecognizerList(size_t* count);
8753+
BINARYNINJACOREAPI void BNFreeStringRecognizerList(BNStringRecognizer** recognizers);
8754+
BINARYNINJACOREAPI char* BNGetStringRecognizerName(BNStringRecognizer* recognizer);
8755+
86978756
#ifdef __cplusplus
86988757
}
86998758
#endif

binaryview.cpp

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,32 @@ void BinaryDataNotification::StringRemovedCallback(
222222
}
223223

224224

225+
void BinaryDataNotification::DerivedStringFoundCallback(void* ctxt, BNBinaryView* data, BNDerivedString* str)
226+
{
227+
BinaryDataNotification* notify = (BinaryDataNotification*)ctxt;
228+
Ref<BinaryView> view = new BinaryView(BNNewViewReference(data));
229+
DerivedString derivedStr;
230+
derivedStr.value = StringRef(BNDuplicateStringRef(str->value));
231+
derivedStr.customType = str->customType ? new CustomStringType(str->customType) : nullptr;
232+
if (str->addrValid)
233+
derivedStr.addrAndLength = {str->addr, str->len};
234+
notify->OnDerivedStringFound(view, derivedStr);
235+
}
236+
237+
238+
void BinaryDataNotification::DerivedStringRemovedCallback(void* ctxt, BNBinaryView* data, BNDerivedString* str)
239+
{
240+
BinaryDataNotification* notify = (BinaryDataNotification*)ctxt;
241+
Ref<BinaryView> view = new BinaryView(BNNewViewReference(data));
242+
DerivedString derivedStr;
243+
derivedStr.value = StringRef(BNDuplicateStringRef(str->value));
244+
derivedStr.customType = str->customType ? new CustomStringType(str->customType) : nullptr;
245+
if (str->addrValid)
246+
derivedStr.addrAndLength = {str->addr, str->len};
247+
notify->OnDerivedStringRemoved(view, derivedStr);
248+
}
249+
250+
225251
void BinaryDataNotification::TypeDefinedCallback(void* ctxt, BNBinaryView* data, BNQualifiedName* name, BNType* type)
226252
{
227253
BinaryDataNotification* notify = (BinaryDataNotification*)ctxt;
@@ -556,6 +582,8 @@ BinaryDataNotification::BinaryDataNotification()
556582
m_callbacks.symbolRemoved = SymbolRemovedCallback;
557583
m_callbacks.stringFound = StringFoundCallback;
558584
m_callbacks.stringRemoved = StringRemovedCallback;
585+
m_callbacks.derivedStringFound = DerivedStringFoundCallback;
586+
m_callbacks.derivedStringRemoved = DerivedStringRemovedCallback;
559587
m_callbacks.typeDefined = TypeDefinedCallback;
560588
m_callbacks.typeUndefined = TypeUndefinedCallback;
561589
m_callbacks.typeReferenceChanged = TypeReferenceChangedCallback;
@@ -615,6 +643,8 @@ BinaryDataNotification::BinaryDataNotification(NotificationTypes notifications)
615643
m_callbacks.symbolRemoved = (notifications & NotificationType::SymbolRemoved) ? SymbolRemovedCallback : nullptr;
616644
m_callbacks.stringFound = (notifications & NotificationType::StringFound) ? StringFoundCallback : nullptr;
617645
m_callbacks.stringRemoved = (notifications & NotificationType::StringRemoved) ? StringRemovedCallback : nullptr;
646+
m_callbacks.derivedStringFound = (notifications & NotificationType::DerivedStringFound) ? DerivedStringFoundCallback : nullptr;
647+
m_callbacks.derivedStringRemoved = (notifications & NotificationType::DerivedStringRemoved) ? DerivedStringRemovedCallback : nullptr;
618648
m_callbacks.typeDefined = (notifications & NotificationType::TypeDefined) ? TypeDefinedCallback : nullptr;
619649
m_callbacks.typeUndefined = (notifications & NotificationType::TypeUndefined) ? TypeUndefinedCallback : nullptr;
620650
m_callbacks.typeReferenceChanged = (notifications & NotificationType::TypeReferenceChanged) ? TypeReferenceChangedCallback : nullptr;
@@ -664,7 +694,7 @@ StringRef::StringRef(BNStringRef* ref)
664694

665695
StringRef::StringRef(const StringRef& other)
666696
{
667-
m_ref = BNDuplicateStringRef(other.m_ref);
697+
m_ref = other.m_ref ? BNDuplicateStringRef(other.m_ref) : nullptr;
668698
}
669699

670700

@@ -4007,6 +4037,57 @@ vector<BNStringReference> BinaryView::GetStrings(uint64_t start, uint64_t len)
40074037
}
40084038

40094039

4040+
vector<DerivedString> BinaryView::GetDerivedStrings()
4041+
{
4042+
size_t count;
4043+
BNDerivedString* strings = BNGetDerivedStrings(m_object, &count);
4044+
vector<DerivedString> result;
4045+
for (size_t i = 0; i < count; i++)
4046+
{
4047+
DerivedString str;
4048+
str.value = StringRef(BNDuplicateStringRef(strings[i].value));
4049+
str.customType = strings[i].customType ? new CustomStringType(strings[i].customType) : nullptr;
4050+
if (strings[i].addrValid)
4051+
str.addrAndLength = {strings[i].addr, strings[i].len};
4052+
result.push_back(str);
4053+
}
4054+
BNFreeDerivedStringList(strings, count);
4055+
return result;
4056+
}
4057+
4058+
4059+
vector<ReferenceSource> BinaryView::GetDerivedStringCodeReferences(
4060+
const DerivedString& str, std::optional<size_t> maxItems)
4061+
{
4062+
BNDerivedString derivedStr;
4063+
derivedStr.value = str.value.GetObject();
4064+
derivedStr.customType = str.customType ? str.customType->GetObject() : nullptr;
4065+
derivedStr.addrValid = str.addrAndLength.has_value();
4066+
if (str.addrAndLength.has_value())
4067+
{
4068+
derivedStr.addr = str.addrAndLength->first;
4069+
derivedStr.len = str.addrAndLength->second;
4070+
}
4071+
4072+
size_t count;
4073+
BNReferenceSource* refs = BNGetDerivedStringCodeReferences(
4074+
m_object, &derivedStr, &count, maxItems.has_value(), maxItems.value_or(0));
4075+
vector<ReferenceSource> result;
4076+
result.reserve(count);
4077+
for (size_t i = 0; i < count; i++)
4078+
{
4079+
ReferenceSource src;
4080+
src.func = new Function(BNNewFunctionReference(refs[i].func));
4081+
src.arch = new CoreArchitecture(refs[i].arch);
4082+
src.addr = refs[i].addr;
4083+
result.push_back(src);
4084+
}
4085+
4086+
BNFreeCodeReferences(refs, count);
4087+
return result;
4088+
}
4089+
4090+
40104091
// The caller of this function must hold a reference to the returned Ref<AnalysisCompletionEvent>.
40114092
// Otherwise, it can be freed before the callback is triggered, leading to a crash.
40124093
Ref<AnalysisCompletionEvent> BinaryView::AddAnalysisCompletionEvent(const function<void()>& callback)
@@ -5943,3 +6024,18 @@ void SymbolQueue::Process()
59436024
{
59446025
BNProcessSymbolQueue(m_object);
59456026
}
6027+
6028+
6029+
CustomStringType::CustomStringType(BNCustomStringType* type)
6030+
{
6031+
m_object = type;
6032+
}
6033+
6034+
6035+
string CustomStringType::GetName() const
6036+
{
6037+
char* name = BNGetCustomStringTypeName(m_object);
6038+
string result = name;
6039+
BNFreeString(name);
6040+
return result;
6041+
}

python/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
from .lineformatter import *
8484
from .renderlayer import *
8585
from .constantrenderer import *
86+
from .stringrecognizer import *
8687
# We import each of these by name to prevent conflicts between
8788
# log.py and the function 'log' which we don't import below
8889
from .log import (

0 commit comments

Comments
 (0)