Skip to content

Commit b0a5cf5

Browse files
authored
adding metadata to logs and bug fixes (#39)
* adding metadata to logs and bug fixes * update version
1 parent f41e963 commit b0a5cf5

9 files changed

+122
-53
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
name: 🐛 Bug Report/Support
2+
description: Ask a question or report an issue
3+
labels: [bug]
4+
body:
5+
- type: markdown
6+
attributes:
7+
value: |
8+
Thank you for submitting a bug report. It helps make PerfSpect better.
9+
10+
Please try to include as much information as possible.
11+
- type: textarea
12+
attributes:
13+
label: Verbose output from perf-collect
14+
render: shell
15+
description: Copy the output of `./perf-collect` with `-v` flag (it will automatically format as a code block)
16+
- type: textarea
17+
attributes:
18+
label: Verbose output from perf-postprocess
19+
render: shell
20+
description: Copy the output of `./perf-postprocess` with `-v` flag (it will automatically format as a code block)
21+
- type: textarea
22+
attributes:
23+
label: What steps can reproduce the bug?
24+
description: Explain the bug, system setup, and provide a code snippet that can reproduce it.
25+
validations:
26+
required: true
27+
- type: textarea
28+
attributes:
29+
label: Additional information
30+
description: Is there anything else you think we should know?
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name: 🚀 Feature Request
2+
description: Suggest an idea, feature, or enhancement
3+
labels: [enhancement]
4+
body:
5+
- type: markdown
6+
attributes:
7+
value: |
8+
Thank you for submitting an idea. It helps make PerfSpect better.
9+
- type: textarea
10+
attributes:
11+
label: What is the problem this feature would solve?
12+
validations:
13+
required: true
14+
- type: textarea
15+
attributes:
16+
label: What is the feature you are proposing to solve the problem?
17+
validations:
18+
required: true
19+
- type: textarea
20+
attributes:
21+
label: What alternatives have you considered?

.github/ISSUE_TEMPLATE/config.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
blank_issues_enabled: true

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
PerfSpect is a system performance characterization tool built on top of linux perf. Most metrics and events come from [perfmon](https://github.com/intel/perfmon) and [TMA v4.5](https://www.intel.com/content/www/us/en/docs/vtune-profiler/cookbook/2023-1/top-down-microarchitecture-analysis-method.html). It contains two parts:
66

7-
perf-collect: Collects harware events
7+
perf-collect: Collects harware events at a 5 second output interval with practically zero overhead since PMU's run in counting mode.
88

99
- Collection mode:
1010
- `sudo ./perf-collect` _default system wide_

_version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.2.12
1+
1.2.13

perf-collect.py

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -93,20 +93,15 @@ def write_metadata(
9393
cg_path_found = False
9494
for path in cgroup_paths:
9595
try:
96-
cpu_set_file = open(path, "r")
97-
cg_path_found = True
98-
# no need to check other paths
99-
break
96+
with open(path, "r") as cpu_set_file:
97+
cg_path_found = True
98+
cpu_set = cpu_set_file.read()
99+
cpu_set = cpu_set.strip()
100+
cpu_set = cpu_set.replace(",", "+")
101+
break
100102
except FileNotFoundError:
101-
# check next path
102103
continue
103104

104-
if cg_path_found:
105-
cpu_set = cpu_set_file.read()
106-
cpu_set_file.close()
107-
cpu_set = cpu_set.strip()
108-
cpu_set = cpu_set.replace(",", "+")
109-
110105
if not cg_path_found or cpu_set == "":
111106
# A missing path or an empty cpu-set in v2 indicates that the container is running on all CPUs
112107
cpu_set = "0-" + str(
@@ -284,6 +279,18 @@ def validate_file(fname):
284279
else:
285280
crash("Unknown application type")
286281

282+
events, collection_events = prep_events.prepare_perf_events(
283+
eventfile,
284+
(
285+
args.pid is not None
286+
or args.cid is not None
287+
or args.thread
288+
or args.socket
289+
or not have_uncore
290+
),
291+
args.pid is not None or args.cid is not None,
292+
)
293+
287294
if not perf_helpers.validate_outfile(args.outcsv):
288295
crash(
289296
"Output filename not accepted. Filename should be a .csv without special characters"
@@ -299,7 +306,6 @@ def validate_file(fname):
299306
cgroups = perf_helpers.get_cgroups(args.cid)
300307

301308
# get perf events to collect
302-
collection_events = []
303309
sys_devs = perf_helpers.get_sys_devices()
304310
if (
305311
"uncore_cha" not in sys_devs
@@ -318,21 +324,24 @@ def validate_file(fname):
318324
logging.warning(
319325
"Due to lack of vPMU support, TMA L1 & L2 events will not be collected"
320326
)
321-
events, collection_events = prep_events.prepare_perf_events(
322-
eventfile,
323-
(
324-
args.pid is not None
325-
or args.cid is not None
326-
or args.thread
327-
or args.socket
328-
or not have_uncore
329-
),
330-
args.pid is not None or args.cid is not None,
331-
)
332327

333328
if args.thread or args.socket or args.pid is not None or args.cid is not None:
334329
logging.info("Not collecting uncore events in this run mode")
335330

331+
# log some metadata
332+
logging.info("Architecture: " + arch)
333+
logging.info("Model: " + cpuname)
334+
logging.info("Kernel version: " + perf_helpers.get_version())
335+
logging.info("Cores per socket: " + str(perf_helpers.get_cpu_count()))
336+
logging.info("Socket: " + str(perf_helpers.get_socket_count()))
337+
logging.info("Hyperthreading on: " + str(perf_helpers.get_ht_status()))
338+
imc, upi = perf_helpers.get_imc_upi_count()
339+
logging.info("IMC count: " + str(imc))
340+
logging.info("CHA per socket: " + str(perf_helpers.get_cha_count()))
341+
logging.info("UPI count: " + str(upi))
342+
logging.info("PerfSpect version: " + perf_helpers.get_tool_version())
343+
logging.info("/sys/devices/: " + str(sys_devs))
344+
336345
# build perf stat command
337346
collection_type = "-a" if not args.thread and not args.socket else "-a -A"
338347
cmd = f"perf stat -I {interval} -x , {collection_type} -o {args.outcsv}"
@@ -358,13 +367,12 @@ def validate_file(fname):
358367
if args.verbose:
359368
logging.info(cmd)
360369
try:
361-
logging.info("Collecting perf stat for events in : %s" % eventfilename)
362370
start = time.time()
363371
subprocess.call(perfargs) # nosec
364372
end = time.time()
365-
if end - start < 5.2:
373+
if end - start < 7:
366374
logging.warning(
367-
"PerfSpect was run for less than 5 seconds, some events make be zero because they didn't get scheduled"
375+
"PerfSpect was run for a short duration, some events might be zero or blank because they never got scheduled"
368376
)
369377
logging.info("Collection complete! Calculating TSC frequency now")
370378
except KeyboardInterrupt:

perf-postprocess.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ def get_args(script_path):
129129
# for socket or thread: add rows for each 2nd hyper thread with same values as 1st thread
130130
def get_fixed_c6_residency_fields(perf_data_lines, perf_mode):
131131
# handle special case events: c6-residency
132+
# if hyperthreading is disabled, no fixing is required
133+
if meta_data["constants"]["HYPERTHREADING_ON"] == 0:
134+
return perf_data_lines
135+
132136
new_perf_data_lines = []
133137
if meta_data["constants"]["CONST_THREAD_COUNT"] == 2:
134138
for fields in perf_data_lines:
@@ -216,8 +220,11 @@ def get_all_data_lines(input_file_path):
216220
fields = line.split(",")
217221
perf_data_lines.append(fields)
218222

219-
infile.close()
220-
return meta_data_lines, perf_events_lines, perf_data_lines
223+
if len(perf_data_lines) == 0:
224+
crash(
225+
"perfstat.csv contains no perf event data, try collecting for a longer time"
226+
)
227+
return meta_data_lines, perf_events_lines, perf_data_lines
221228

222229

223230
# get_metadata
@@ -416,6 +423,7 @@ def get_socket_number(sockets_dict, core):
416423

417424

418425
def extract_dataframe(perf_data_lines, meta_data, perf_mode):
426+
logging.info("Formatting event data")
419427
# parse event data into dataframe and set header names
420428
perf_data_df = pd.DataFrame(perf_data_lines)
421429
if "CGROUPS" in meta_data and meta_data["CGROUPS"] == "enabled":
@@ -657,10 +665,13 @@ def generate_metrics(
657665
}
658666
prev_time_slice = 0
659667
group_to_start_end_indexes = {}
668+
logging.info("processing " + str(time_slice_groups.ngroups) + " samples")
660669
for time_slice, item in time_slice_groups:
661670
time_slice_float = float(time_slice)
662671
if time_slice_float - prev_time_slice < 4.5:
663672
logging.warning("throwing out last sample because it was too short")
673+
if time_slice_groups.ngroups == 1:
674+
crash("no remaining samples")
664675
continue
665676
time_slice_df = time_slice_groups.get_group(time_slice).copy()
666677
# normalize by difference between current time slice and previous time slice

src/base.html

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@
167167
series: [
168168
{
169169
type: 'line',
170-
data: CPUUTIL.map((e, i) => [i, e]),
170+
data: CPUUTIL.map((e, i) => [i * 5, e]),
171171
}
172172
]
173173
}
@@ -177,7 +177,7 @@
177177
series: [
178178
{
179179
type: 'line',
180-
data: CPIDATA.map((e, i) => [i, e]),
180+
data: CPIDATA.map((e, i) => [i * 5, e]),
181181
}
182182
]
183183
}
@@ -187,7 +187,7 @@
187187
series: [
188188
{
189189
type: 'line',
190-
data: CPUFREQ.map((e, i) => [i, e]),
190+
data: CPUFREQ.map((e, i) => [i * 5, e]),
191191
}
192192
]
193193
}
@@ -197,7 +197,7 @@
197197
series: [
198198
{
199199
type: 'line',
200-
data: REMOTENUMA.map((e, i) => [i, e]),
200+
data: REMOTENUMA.map((e, i) => [i * 5, e]),
201201
}
202202
]
203203
}
@@ -208,17 +208,17 @@
208208
{
209209
name: "L1D",
210210
type: 'line',
211-
data: L1DATA.map((e, i) => [i, e]),
211+
data: L1DATA.map((e, i) => [i * 5, e]),
212212
},
213213
{
214214
name: "L2",
215215
type: 'line',
216-
data: L2DATA.map((e, i) => [i, e]),
216+
data: L2DATA.map((e, i) => [i * 5, e]),
217217
},
218218
{
219219
name: "LLC Data",
220220
type: 'line',
221-
data: LLCDATA.map((e, i) => [i, e]),
221+
data: LLCDATA.map((e, i) => [i * 5, e]),
222222
},
223223
]
224224
}
@@ -229,17 +229,17 @@
229229
{
230230
name: "Read",
231231
type: 'line',
232-
data: READDATA.map((e, i) => [i, e]),
232+
data: READDATA.map((e, i) => [i * 5, e]),
233233
},
234234
{
235235
name: "Write",
236236
type: 'line',
237-
data: WRITEDATA.map((e, i) => [i, e]),
237+
data: WRITEDATA.map((e, i) => [i * 5, e]),
238238
},
239239
{
240240
name: "Total",
241241
type: 'line',
242-
data: TOTALDATA.map((e, i) => [i, e]),
242+
data: TOTALDATA.map((e, i) => [i * 5, e]),
243243
},
244244
]
245245
}
@@ -249,7 +249,7 @@
249249
series: [
250250
{
251251
type: 'line',
252-
data: PKGPOWER.map((e, i) => [i, e]),
252+
data: PKGPOWER.map((e, i) => [i * 5, e]),
253253
}
254254
]
255255
}
@@ -259,7 +259,7 @@
259259
series: [
260260
{
261261
type: 'line',
262-
data: DRAMPOWER.map((e, i) => [i, e]),
262+
data: DRAMPOWER.map((e, i) => [i * 5, e]),
263263
}
264264
]
265265
}

src/perf_helpers.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -269,19 +269,17 @@ def get_cpuinfo():
269269
cpuinfo = []
270270
temp_dict = {}
271271
try:
272-
fo = open("/proc/cpuinfo", "r")
272+
with open("/proc/cpuinfo", "r") as fo:
273+
for line in fo:
274+
try:
275+
key, value = list(map(str.strip, line.split(":", 1)))
276+
except ValueError:
277+
cpuinfo.append(temp_dict)
278+
temp_dict = {}
279+
else:
280+
temp_dict[key] = value
273281
except EnvironmentError as e:
274282
logging.warning(str(e), UserWarning)
275-
else:
276-
for line in fo:
277-
try:
278-
key, value = list(map(str.strip, line.split(":", 1)))
279-
except ValueError:
280-
cpuinfo.append(temp_dict)
281-
temp_dict = {}
282-
else:
283-
temp_dict[key] = value
284-
fo.close()
285283
return cpuinfo
286284

287285

0 commit comments

Comments
 (0)