Skip to content

Commit ead81d4

Browse files
andrewchiGitHub Enterprise
authored andcommitted
Merge pull request #354 from network-intelligence/dev
Add STUN telemetry and a document on JSON output guidelines
2 parents 6e46935 + 460bf0c commit ead81d4

File tree

6 files changed

+94
-2
lines changed

6 files changed

+94
-2
lines changed

doc/guidelines.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
## Best Practices for JSON Output
2+
3+
4+
5+
This note provides guidance for developers of code that generates JSON output, with the goals of producing JSON that works well with Parquet and `jq`.
6+
7+
#### Principles
8+
9+
- All names and strings must be valid UTF-8 with JSON characters escaped.
10+
- Data from packets is not trusted to be in the correct format.
11+
- No spaces or dashes in names.
12+
- Prefer lowercase.
13+
- There should be no empty JSON objects.
14+
- For compressibility, highly variable fields (e.g. IP.ID) should be at the tail end of a record, not the front.
15+
- Avoid using network data as JSON keys, so that keys are consistent (and thus parquet-friendly) and follow the other guidelines.
16+
- There should be no empty JSON arrays (if semantically necessary, exceptions can be made if we pre-deploy the json2parquet schema).
17+
- In an array of objects, the objects can have distinct schema, as long as any name that appears in more than one object schema has the same type in all objects.
18+
- Prefer flat schemas where possible; avoid arrays of objects unless necessary.
19+
20+
#### Resources
21+
22+
The class utf8_safe_string
23+
https://wwwin-github.cisco.com/network-intelligence/mercury-transition/blob/dev/src/libmerc/utf8.hpp#L931
24+
can be used to safely convert packet data into a string that can be
25+
used as e.g. a JSON array or object name.

src/libmerc/buffer_stream.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,6 +1176,13 @@ class output_buffer : public buffer_stream {
11761176
return buffer;
11771177
}
11781178

1179+
std::pair<const uint8_t *, const uint8_t *> get_datum() const {
1180+
if (trunc) {
1181+
return { nullptr, nullptr };
1182+
}
1183+
return { (uint8_t *)buffer, (uint8_t *)buffer + doff };
1184+
}
1185+
11791186
};
11801187

11811188
#endif /* BUFFER_STREAM_H */

src/libmerc/pkt_proc.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,13 @@ struct do_observation {
228228
analysis_.reset_user_agent();
229229
}
230230

231+
void operator()(stun::message &m) {
232+
// create event and send it to the data/stats aggregator
233+
event_string ev_str{k_, analysis_, m};
234+
mq_->push(ev_str.construct_event_string());
235+
analysis_.reset_user_agent();
236+
}
237+
231238
template <typename T>
232239
void operator()(T &) { }
233240

src/libmerc/stun.h

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include "utf8.hpp"
1111
#include "match.h"
1212
#include "fingerprint.h"
13+
#include "result.h"
14+
#include "util_obj.h"
1315
#include <unordered_map>
1416

1517
namespace stun {
@@ -471,6 +473,8 @@ namespace stun {
471473

472474
uint16_t get_type() const { return type; }
473475

476+
datum get_value() const { return value; }
477+
474478
};
475479

476480

@@ -772,6 +776,7 @@ namespace stun {
772776
class message : public base_protocol {
773777
header hdr;
774778
datum body;
779+
datum software;
775780

776781
public:
777782

@@ -959,6 +964,13 @@ namespace stun {
959964
} else {
960965
; // by default, attribute information is not included in fingerprint
961966
}
967+
968+
// remember SOFTWARE for later use in analysis
969+
//
970+
if (attr.value.get_type() == attr_type::SOFTWARE) {
971+
software = attr.value.get_value();
972+
}
973+
962974
} else {
963975
break;
964976
}
@@ -970,10 +982,33 @@ namespace stun {
970982
// analyzes the dst_ip, dst_port, and SOFTWARE attribute
971983
// value, using a classifier selected by the stun fingerprint
972984
//
973-
bool do_analysis(const struct key &, struct analysis_context &, classifier*) {
985+
// request format: dst_addr, dst_port
986+
// response format: src_addr, src_port
987+
988+
bool do_analysis(const struct key &flow_key, struct analysis_context &ac, classifier*) {
989+
990+
// create a json-friendly utf8 copy of the SOFTWARE atribute's value field
974991
//
975-
// TBD
992+
utf8_safe_string<MAX_USER_AGENT_LEN> utf8_software{software};
993+
994+
// handle message classes appropriately: reverse the
995+
// addresses and ports in the flow key for responses,
996+
// leave the flow key untouched for requests, and ignore
997+
// all other message classes
976998
//
999+
key k{flow_key};
1000+
if ((hdr.get_message_class() & 0b10) == 0b10) {
1001+
//
1002+
// success_resp and error_resp: swap addrs and ports
1003+
//
1004+
k.reverse();
1005+
}
1006+
ac.destination.init({nullptr,nullptr}, // domain name
1007+
utf8_software.get_datum(), // user agent
1008+
{nullptr,nullptr}, // alpn
1009+
k // flow key, used for dst_addr and dst_port
1010+
);
1011+
9771012
return false;
9781013
}
9791014

src/libmerc/utf8.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -960,6 +960,10 @@ class utf8_safe_string {
960960
return buf.get_buffer_start();
961961
}
962962

963+
datum get_datum() const {
964+
return buf.get_datum();
965+
}
966+
963967
/// performs unit tests for \ref class utf8_safe_string and
964968
/// returns `true` if they all pass, and `false` otherwise
965969
///

src/libmerc/util_obj.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,20 @@ struct key {
177177
snprintf(src_port_string, MAX_PORT_STR_LEN, "%u", src_port);
178178
}
179179

180+
void reverse() {
181+
std::swap(src_port, dst_port);
182+
switch (ip_vers) {
183+
case 4:
184+
std::swap(addr.ipv4.src, addr.ipv4.dst);
185+
break;
186+
case 6:
187+
std::swap(addr.ipv6.src, addr.ipv6.dst);
188+
break;
189+
default:
190+
;
191+
}
192+
}
193+
180194
};
181195

182196
struct eth_addr : public datum {

0 commit comments

Comments
 (0)