Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions general_setup
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ gs_usage_info()
echo " --sysname: name of the system running, used in determining config files. Defaults to hostname."
echo " --test_verification <test_verify_file>: Runs the test verification. Information is in the test_verify file in the tests github"
echo " --tuned_setting: used in naming the tar file, default for RHEL is the current active tuned. For non"
echo " --use_pcp: Enables use of Performance Co-Pilot in wrappers, defaults to 0."
echo " RHEL systems, default is none."
echo " --usage: this usage message."
exit 1
Expand All @@ -91,6 +92,8 @@ to_sysname=`hostname`
to_pstats="default"
to_no_pkg_install=0

to_use_pcp=0

to_tuned_setting=""

i=1
Expand Down Expand Up @@ -195,7 +198,12 @@ do
--usage)
gs_usage_info
;;
--use_pcp)
i=$((i + 1))
to_use_pcp=1
shift 1
--)
;;
break;
;;
*)
Expand Down
10 changes: 10 additions & 0 deletions pcp/PCPrecord.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[Unit]
Description=PCP Recorder

[Service]
Type=notify
WorkingDirectory=/usr/local/src/PCPrecord
ExecStart=/usr/local/src/PCPrecord/PCPrecord_actions.sh

[Install]
WantedBy=multi-user.target
166 changes: 166 additions & 0 deletions pcp/PCPrecord_actions.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
#!/bin/bash
# Executed by systemd service 'PCPrecord.service'
# See: /etc/systemd/system/PCPrecord.service
################################################################

# GLOBALS ###################
# Include the PCP Functions file
source $PWD/pcp_functions.inc

FIFO="/tmp/pcpFIFO" # get from cmdline
sample_rate=5 # hardcode DEFAULT for now
pmlogger_running="false" # Initialize service as OFF
om_workload_file="/tmp/openmetrics_workload.txt"

#############################
# Functions #################
update_om_workload() {
# Removes existing and Writes a new <openmetrics_workload> file
# Called by 'reset_om_metrics()', below

# Check for proper number of args
if [ "$#" -ne 6 ]; then
echo "ERROR on number of parameters in ${FUNCNAME}"
exit 2
else
v_iter_cnt=$1
v_running=$2
v_numthreads=$3
v_runtime=$4
v_throughput=$5
v_latency=$6
fi

# Prepare for an update to the $om_workload_file (GLOBAL)
rm -f $om_workload_file
touch $om_workload_file
# Update metrics in the openmetric.workload file
printf "iteration %d\n" "$v_iter_cnt">>$om_workload_file
printf "running %d\n" "$v_started">>$om_workload_file
printf "numthreads %d\n" "$v_numthreads">>$om_workload_file
echo "runtime ${v_runtime}">>$om_workload_file
echo "throughput ${v_throughput}">>$om_workload_file
echo "latency ${v_latency}">>$om_workload_file
}

reset_om_metrics() {
# Initialize openmetric.workload metric values
r_iteration=0 ; r_running=0
r_numthreads=0 ; r_runtime="NaN" ; r_throughput="NaN" ; r_latency="NaN"

# Update the openmetrics.workload
update_om_workload "$r_iteration" "$r_running" \
"$r_numthreads" "$r_runtime" "$r_throughput" "$r_latency"
}

error_exit() {
if [ "$?" != "0" ]; then
systemd-notify --status="ERROR: $1"
# Additional error handling logic can be added here
rm -f "$FIFO"
# Reset openmetric.workload metric values prior to leaving
reset_om_metrics
## if pmlogger_running = True then attempt forcible STOP?
exit 1
fi
}
# END Functions #################

# Main #################
# Initialize openmetric.workload metric values
reset_om_metrics

# Verify required files and Packages are available
#----------------------------------
test -f "${om_workload_file}"
error_exit "Initialization: ${om_workload_file} not found!"

# Remove and recreate FIFO on every service 'start'
rm -f "$FIFO"
mkfifo "$FIFO"
error_exit "Initialization: mkfifo $FIFO failed"

## DEBUG - measure processing interval: $postaction-$preaction
action='NONE'
interval=0.0

# Infinite Loop #################
# Read FIFO and perform requested ACTION (start, stop, ...)
# Access each word in $action string for parsing 'actions' & 'metric'
# NOTE: 'Start, Stop, Reset' actions have no metrics
while : ; do
# Required or we get TIMEOUT on 'read action < "$FIFO" '
# Signal readiness for next $action. SYNC point w/client Workload
# Report timing interval for most recent ACTION
systemd-notify --ready --status="READY: last-action - $action = ${interval}ms"
# Read the Request/'$action' and then process it
read action < "$FIFO" # Blocks until data is available
# Signal busy Processing this $action
systemd-notify --status="$action PMLOGGER Request"
action_arr=($action) # Array of 'words' in Request read from FIFO
## DEBUG - measure processing interval for ACTION: $postaction-$preaction
preaction=$(mark_ms)
case "${action_arr[0]}" in
Start) # 'Start $archive_dir $test_name $conf_file'
archive_dir="${action_arr[1]}"
archive_name="${action_arr[2]}"
conf_file="${action_arr[3]}"
# Start PMLOGGER to create ARCHIVE
if [ "$pmlogger_running" = "false" ]; then
# Signal Processing this $action
systemd-notify --status="DEBUG: $action PMLOGGER Request"
# These functions attempt to catch errors and verify success
pcp_verify $conf_file
error_exit "pcp_verify: Unable to start PMLOGGER"
pcp_start $conf_file $sample_rate $archive_dir $archive_name
error_exit "pcp_start: Unable to start PMLOGGER"
pmlogger_running="true" # Record this STATE info
fi
;;
Stop) # artifacts_dir="${action_arr[1]}"
# Terminate PMLOGGER
if [ "$pmlogger_running" = "true" ]; then
# Will ZATHRAS Store PCP Archive related artifacts ?
# - Currently Missing from PCPSTOP logic
##pcp_stop "${artifacts_dir}"
pcp_stop
error_exit "pcp_stop: Unable to stop PMLOGGER"
pmlogger_running="false"
fi
;;
Reset) # om_workload_file="${action_arr[1]}"
# RESET the Workload Metrics
# the only Request that doesn't require $pmlogger_running
reset_om_metrics
error_exit "reset_om_metrics: Unable to RESET Workload Metrics"
;;
throughput|latency|numthreads|runtime) # Workload Metrics
# metric="${action_arr[1]}" om_workload_file=$2
if [ "$pmlogger_running" = "true" ]; then
# Forward workload metric to openmetrics_workload.txt
# Change only one metric line at a time
# Replaces the entire line using sed
# Should I only print 'action_arr[0] & action_arr[1]'
sed -i "s/^.*${action_arr[0]}.*$/${action}/" "$om_workload_file"
fi
;;
running|iteration) # Workload States
# state="${action_arr[1]}" om_workload_file=$2
if [ "$pmlogger_running" = "true" ]; then
sed -i "s/^.*${action_arr[0]}.*$/${action}/" "$om_workload_file"
fi
;;
*)
systemd-notify --status="Unrecognized action - IGNORED"
;;
esac
## DEBUG - measure time interval for processing ACTION
postaction=$(mark_ms)
interval=$(( 10*(postaction - preaction) ))
done

# Cleanup
echo "Cleaning up"

# Reset openmetric.workload metric values prior to leaving
reset_om_metrics
1 change: 1 addition & 0 deletions pcp/README
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This directory contains infrastructure for PCP support for the Zathras family of test wrappers. It's curretnly in "proof of concept" stage.
76 changes: 76 additions & 0 deletions pcp/default.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#pmlogconf 2.0
#
## Workload Metrics - hardcoded sampling rate
log advisory on 1 second {
openmetrics.workload
openmetrics.control.fetch_time
}

## Intel RAPL & RFchassis metrics
log advisory on default {
# denki.rapl
openmetrics.RFchassis
}

## platform, filesystem and hardware configuration
log advisory on once {
hinv
kernel.uname
filesys.mountdir
filesys.uuid
filesys.type
filesys.blocksize
filesys.capacity
}

#+ tools/htop:y:default:
## metrics used by the htop command
log advisory on default {
# disk.all.read_bytes
# disk.all.write_bytes
# disk.all.avactive
# hinv.cpu.clock
kernel.all.load
kernel.all.uptime
kernel.all.cpu.user
kernel.all.cpu.nice
kernel.all.cpu.sys
kernel.all.cpu.idle
kernel.all.cpu.wait.total
kernel.all.cpu.intr
kernel.all.cpu.irq.soft
kernel.all.cpu.steal
kernel.all.cpu.guest
kernel.all.cpu.guest_nice
# kernel.all.pressure.cpu.some.avg
# kernel.all.pressure.io.some.avg
# kernel.all.pressure.io.full.avg
# kernel.all.pressure.memory.some.avg
# kernel.all.pressure.memory.full.avg
# kernel.percpu.cpu.user
# kernel.percpu.cpu.nice
# kernel.percpu.cpu.sys
# kernel.percpu.cpu.idle
# kernel.percpu.cpu.wait.total
# kernel.percpu.cpu.intr
# kernel.percpu.cpu.irq.soft
# kernel.percpu.cpu.steal
# kernel.percpu.cpu.guest
# kernel.percpu.cpu.guest_nice
mem.util.available
mem.util.free
mem.util.bufmem
mem.util.cached
mem.util.shmem
mem.util.slabReclaimable
mem.util.swapCached
mem.util.swapTotal
mem.util.swapFree
network.all.in.bytes
network.all.out.bytes
network.all.in.packets
network.all.out.packets
# zram.capacity
# zram.mm_stat.data_size.original
# zram.mm_stat.data_size.compressed
}
63 changes: 63 additions & 0 deletions pcp/pcp_commands.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
FIFO="/tmp/pcpFIFO"

#Sets up and starts the PCP service
setup_pcp() {
working_dir="/usr/local/src/PCPrecord"

mkdir -p "${working_dir}"
chmod 755 ${TOOLS_BIN}/pcp/*.sh
cp ${TOOLS_BIN}/pcp/PCPrecord.service /etc/systemd/system/.
cp ${TOOLS_BIN}/pcp/PCPrecord_actions.sh "${working_dir}/."
cp ${TOOLS_BIN}/pcp/pcp_functions.inc "${working_dir}/."

# Stop and then Restart the service
systemctl stop PCPrecord.service
systemctl stop pmcd
sleep 1
systemctl daemon-reload
sleep 1
# WHY is this issuing warning to run 'systemctl daemon-reload'?
systemctl start PCPrecord.service
systemctl start pmcd
sleep 1
}

#Starts PCP
#Resets openmetrics
#Sends "Start" command to PCPrecord_actions
#Takes three args:
#1: Directory for PCP data, should be with the workload's own data
#2: Test name
#3: PMLogger config file to use
start_pcp() {
printf "Reset\n" > $FIFO
echo "PCP metrics reset"
#Without the sleep the "Start" will be missed
sleep 2
echo "Start ${1} ${2} ${3}\n"
printf "Start ${1} ${2} ${3}\n" > $FIFO
}

#Sends value to PCP archive as "throughput"
#Uses openmetrics
result2pcp() {
printf "throughput ${1}\n" > $FIFO

#Stops PCP
#Sends "Stop" command to PCPrecord_actions
stop_pcp() {
printf "Stop\n" > $FIFO
}

#Shut the services down
shutdown_pcp() {
systemctl stop PCPrecord.service
systemctl stop pmcd
}

#Reset the openmetrics file
reset_pcp_om () {
printf "Reset\n" > $FIFO
echo "PCP metrics reset"
sleep 2
}
Loading