
The collectd cpu plugin and monitor-tools are updated to support diagnosing high cpu usage on shorter time scale. This includes tools that assist SystemEngineering determine the source where CPU time is coming from. This collectd cpu plugin is updated to support Kubernetes services under system.slice or k8splatform.slice. This changes the frequency of read function sampling to 1 second. We now see logs with instantaneous cpu spikes at the cgroup level. This dispatch of results still occurs at the original plugin interval of 30 seconds. The logging of the 1 second sampling is configurable via /etc/collectd.d/starlingx/python_plugins.conf field 'hires = <true|false>. The hiresolution samples are always collected and used for a histogram, but it is not always desired to log this due to the volume of output. This adds new logs for occupancy wait. This is similar to cpu occupancy, but instead of realtime used, it measures the aggregate percent of time a given cgroup is waiting to schedule. This is a measure of CPU contention. This adds new logs for occupancy histograms for all cgroups and aggregated groupings based on the 1 second occupancy samples. The histograms are displayed in hirunner order. This displays the histogram, the mean, 95th-percentile, and max value. The histograms are logged at 5 minute intervals. This reduces collectd cgroup to 256 CPUShare from (1024). This smoothes out behaviour of poorly behaved audits. The 'schedtop' tool is updated to display 'cgroup' field. This is the systemd cgroup name, or abbrieviated pod-name. This also handles Kernel sched output format changes for 6.6. New tool 'portscanner' is added to monitor-tools to diagnose local host processes that are using specific ports. This has been instrumental in discovering gunicorn/keystone API users. New tool 'k8smetrics' is added to monitor-tools to display the delay histogram and percentiles for kube-apiserver and etdcserver. This gives a way to quantify performance as a result of system load. Partial-Bug: 2084714 TEST PLAN: AIO-SX, AIO-DX, Standard, Storage, DC: PASS: Fresh install ISO PASS: Verify /var/log/collectd.logs for 1 second cpu/wait logs, and contains: etcd, kubelet, and containerd services. PASS: Verify we are dispatching at 30 second granularity. PASS: Verify we are displaying histograms every 5 minutes. PASS: Verify we can enable/disable the display of hiresolution logs with /etc/collectd.d/starlingx/python_plugins.conf field 'hires = <true|false>'. PASS: Verify schedtop contains 'cgroup' output. PASS: Verify output from 'k8smetrics'. Cross check against Prometheus GUI for apiserver percentile. PASS: Verify output from portscanner with port 5000. Verify 1-to-1 mapping against /var/log/keystone/keystone-all.log. Change-Id: I82d4f414afdf1cecbcc99680b360cbad702ba140 Signed-off-by: Jim Gauld <James.Gauld@windriver.com>
1728 lines
56 KiB
Perl
Executable File
1728 lines
56 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
########################################################################
|
|
#
|
|
# Copyright (c) 2015-2024 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
#
|
|
########################################################################
|
|
#
|
|
# Description:
|
|
# This displays occupancy and scheduling information per sample period.
|
|
# Output includes total occupancy, per-core occupancy, loadavg, per-task cpu,
|
|
# per-task scheduling, per-task io-wait.
|
|
#
|
|
# Usage: schedtop OPTIONS
|
|
# [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]
|
|
# [--reset-hwm] [--idle] [--sort=<cpu|io>]
|
|
# [--watch-cmd=tid1,cmd1,cmd2,...] [--watch-cgroup=cgroup1,...]
|
|
# [--watch-only] [--watch-quiet]
|
|
# [--trig-delay=time]
|
|
# [--help]
|
|
|
|
use strict;
|
|
use warnings;
|
|
use Data::Dumper;
|
|
use POSIX qw(uname strftime);
|
|
use Time::HiRes qw(clock_gettime usleep CLOCK_MONOTONIC CLOCK_REALTIME);
|
|
use Benchmark ':hireswallclock';
|
|
use Carp qw(croak carp);
|
|
use Math::BigInt;
|
|
use File::Find ();
|
|
|
|
# Define toolname
|
|
our $TOOLNAME = "schedtop";
|
|
our $VERSION = "0.1";
|
|
|
|
# Constants
|
|
use constant SI_k => 1.0E3;
|
|
use constant SI_M => 1.0E6;
|
|
use constant SI_G => 1.0E9;
|
|
use constant Ki => 1024.0;
|
|
use constant Mi => 1024.0*1024.0;
|
|
use constant Gi => 1024.0*1024.0*1024.0;
|
|
|
|
# Globals
|
|
our %opt_V = ();
|
|
our %opt_P = ();
|
|
our %percpu_0 = ();
|
|
our %percpu_1 = ();
|
|
our %task_0 = ();
|
|
our %task_1 = ();
|
|
our %tids_0 = ();
|
|
our %tids_1 = ();
|
|
our %tids_w = ();
|
|
our %D_task = ();
|
|
our %D_percpu = ();
|
|
our %loadavg = ();
|
|
our $tm_0 = ();
|
|
our $tm_1 = ();
|
|
our $tr_0 = ();
|
|
our $tr_1 = ();
|
|
our $tm_elapsed = ();
|
|
our $tr_elapsed = ();
|
|
our $tm_final = ();
|
|
our $uptime = ();
|
|
our $num_cpus = 1;
|
|
our $affinity_mask = Math::BigInt->new('0');
|
|
our $w_aff = 10;
|
|
our $num_tasks = 0;
|
|
our $num_blk = 0;
|
|
our $num_state_D = 0;
|
|
our $is_schedstat = 1;
|
|
our $USER_HZ = 100; # no easy way to get this
|
|
our $CLOCK_NS = SI_G / $USER_HZ;
|
|
our $print_host = 1;
|
|
|
|
our @cgroup_procs_paths = ();
|
|
our @cgroup_procs_match = ();
|
|
our @cgroup_tids = ();
|
|
|
|
# Print options
|
|
our ($P_none, $P_lite, $P_brief, $P_full) = (0, 1, 2, 3);
|
|
our ($P_ps, $P_cpu, $P_del, $P_io, $P_id, $P_cmd) = (0, 1, 2, 3, 4, 5);
|
|
our @P_list = ($::P_ps, $::P_cpu, $::P_del, $::P_io, $::P_id, $::P_cmd);
|
|
|
|
# Argument list parameters
|
|
our ($arg_debug,
|
|
$arg_delay,
|
|
$arg_repeat,
|
|
$arg_period,
|
|
$arg_reset_hwm,
|
|
$arg_idle,
|
|
$arg_sort,
|
|
$arg_print,
|
|
@arg_watch_cmd,
|
|
@arg_watch_cgroup,
|
|
$arg_watch_only,
|
|
$arg_watch_quiet,
|
|
$arg_trig_delay,
|
|
) = ();
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# MAIN Program
|
|
#-------------------------------------------------------------------------------
|
|
my $ONE_BILLION = 1.0E9;
|
|
my $MIN_DELAY = 0.001;
|
|
my $MAX_DELAY = 0.001;
|
|
|
|
# benchmark variables
|
|
my ($bd, $b0, $b1);
|
|
my @policies = ('OT', 'FF', 'RR', 'BA', 'ID', 'UN', 'UN');
|
|
my @delta_list = (
|
|
'nr_switches',
|
|
'nr_migrations',
|
|
'exec_runtime',
|
|
'wait_sum',
|
|
'wait_count',
|
|
'iowait_sum',
|
|
'iowait_count',
|
|
'syscr',
|
|
'syscw',
|
|
'read_bytes',
|
|
'write_bytes',
|
|
'cancelled_write_bytes',
|
|
);
|
|
|
|
my @state_list = (
|
|
'exec_max', 'wait_max', 'block_max',
|
|
'pid', 'ppid', 'state', 'cgroup', 'comm', 'cmdline', 'wchan', 'affinity',
|
|
'VmSize', 'VmRSS', 'start_time',
|
|
'nice', 'policy', 'priority', 'rt_priority', 'task_cpu'
|
|
);
|
|
|
|
# Autoflush output
|
|
select(STDERR);
|
|
$| = 1;
|
|
select(STDOUT); # default
|
|
$| = 1;
|
|
|
|
# Parse input arguments and print tool usage if necessary
|
|
&parse_schedtop_args(
|
|
\$::arg_debug,
|
|
\$::arg_delay,
|
|
\$::arg_repeat,
|
|
\$::arg_period,
|
|
\$::arg_reset_hwm,
|
|
\$::arg_idle,
|
|
\$::arg_sort,
|
|
\$::arg_print,
|
|
\@::arg_watch_cmd,
|
|
\@::arg_watch_cgroup,
|
|
\$::arg_watch_only,
|
|
\$::arg_watch_quiet,
|
|
\$::arg_trig_delay,
|
|
);
|
|
|
|
# Set default print options
|
|
if ($::arg_print eq 'full') {
|
|
for my $P (@::P_list) { $::opt_P{$P} = $::P_full; }
|
|
} elsif ($::arg_print eq 'brief') {
|
|
for my $P (@::P_list) { $::opt_P{$P} = $::P_brief; }
|
|
} else {
|
|
for my $P (@::P_list) { $::opt_P{$P} = $::P_none; }
|
|
}
|
|
# Disable some options if data not present
|
|
$::opt_V{'sched'} = &is_sched();
|
|
$::opt_V{'io'} = &is_io();
|
|
if ($::opt_V{'sched'} == 0) {
|
|
$::opt_P{$::P_cpu} = $::P_none;
|
|
$::opt_P{$::P_del} = $::P_none;
|
|
$::opt_P{$::P_io} = $::P_none;
|
|
undef $::arg_reset_hwm;
|
|
}
|
|
if ($::opt_V{'io'} == 0) {
|
|
if ($::opt_V{'sched'} == 0) {
|
|
$::opt_P{$::P_io} = $::P_none;
|
|
$::arg_sort = 'cpu';
|
|
} else {
|
|
if ($::opt_P{$::P_io} != $::P_none) {
|
|
$::opt_P{$::P_io} = $::P_lite;
|
|
}
|
|
}
|
|
}
|
|
|
|
# Check for root user
|
|
if ($>) {
|
|
warn "$::TOOLNAME: requires root/sudo.\n";
|
|
exit 1;
|
|
}
|
|
|
|
# Print out some debugging information
|
|
if (defined $::arg_debug) {
|
|
$Data::Dumper::Indent = 1;
|
|
}
|
|
|
|
# Check for schedstat support; fallback to stats
|
|
$is_schedstat = -e '/proc/schedstat' ? 1 : 0;
|
|
|
|
# Print out selected options
|
|
printf "selected options: ".
|
|
"delay = %.3fs, repeat = %d, idle=%s, hwm=%s, sort=%s, print=%s\n",
|
|
$::arg_delay, $::arg_repeat,
|
|
(defined $::arg_idle ? 'idle_tasks' : 'no_idle_tasks'),
|
|
(defined $::arg_reset_hwm ? 'reset-hwm' : 'unchanged'),
|
|
$::arg_sort, $::arg_print;
|
|
if (@::arg_watch_cmd) {
|
|
printf "selected watch/trigger options: ".
|
|
"watch-cmd=%s, watch-cgroup=%s, only=%s, quiet=%s, delay=%d ms\n",
|
|
join(',', @::arg_watch_cmd),
|
|
join(',', @::arg_watch_cgroup),
|
|
(defined $::arg_watch_only ? 'true' : 'false'),
|
|
(defined $::arg_watch_quiet ? 'true' : 'false'),
|
|
$::arg_trig_delay;
|
|
}
|
|
|
|
# Capture timestamp
|
|
$b0 = new Benchmark;
|
|
|
|
# Get number of logical cpus
|
|
&get_num_logical_cpus(\$::num_cpus);
|
|
$::affinity_mask = Math::BigInt->new('0');
|
|
for (my $i=0; $i < $::num_cpus; $i++) {
|
|
my $y = Math::BigInt->new('1');
|
|
$y->blsft($i);
|
|
$::affinity_mask->bior($y);
|
|
}
|
|
$w_aff = &max(length 'AFF', length $::affinity_mask->as_hex());
|
|
|
|
# Find cgroup.proc paths matching specified cgroup patterns
|
|
&find_matching_cgroup_procs(\@::cgroup_procs_match, \@::arg_watch_cgroup);
|
|
for my $file (@::cgroup_procs_match) {
|
|
print "matched cgroup:", $file, "\n";
|
|
}
|
|
|
|
# Reset scheduling hi-water marks
|
|
if (defined $::arg_reset_hwm) {
|
|
&get_tids(\%::tids_1);
|
|
&reset_sched_hwm(\%::tids_1);
|
|
sleep(0.001);
|
|
}
|
|
|
|
# Get current hires epoc timestamp
|
|
$::tm_1 = clock_gettime(CLOCK_MONOTONIC);
|
|
$::tr_1 = clock_gettime(CLOCK_REALTIME);
|
|
$::tm_final = $::tm_1 + $::arg_delay*$::arg_repeat;
|
|
|
|
# Set initial delay
|
|
$::tm_elapsed = $::arg_delay;
|
|
$MAX_DELAY = $::arg_delay + $MIN_DELAY;
|
|
|
|
|
|
# Get overall per-cpu stats
|
|
if ($is_schedstat) {
|
|
&read_schedstat(\%::percpu_1);
|
|
} else {
|
|
&read_stat(\%::percpu_1);
|
|
}
|
|
# Get list of pids and tids
|
|
&get_tids(\%::tids_1);
|
|
# Get current scheduling and io info for all tids
|
|
&read_sched(\%::tids_1, \%::task_1);
|
|
# Track watched tids for monitoring
|
|
&track_watched_tids(\%::tids_1, \%::tids_w, \%::task_1, \@::arg_watch_cmd, \@::arg_watch_cgroup);
|
|
|
|
# determine column sort order
|
|
my $s_keyw = 'watched';
|
|
my ($s_key1, $s_key2, $s_key3) = ();
|
|
if ($::arg_sort eq 'cpu') {
|
|
($s_key1, $s_key2, $s_key3) = ('exec_runtime', 'nr_switches', 'pid');
|
|
} elsif ($::arg_sort eq 'io') {
|
|
($s_key1, $s_key2, $s_key3) = ('io', 'ios', 'exec_runtime');
|
|
} else {
|
|
($s_key1, $s_key2, $s_key3) = ('exec_runtime', 'nr_switches', , 'pid');
|
|
}
|
|
|
|
# Main loop
|
|
REPEAT_LOOP: for (my $repeat=1; $repeat <= $::arg_repeat; $repeat++) {
|
|
|
|
# copy all state variables
|
|
$::tm_0 = (); $::tr_0 = (); %::percpu_0 = (); %::tids_0 = (); %::task_0 = ();
|
|
$::tm_0 = $::tm_1; $::tr_0 = $::tr_1;
|
|
foreach my $cpu (keys %::percpu_1) { $::percpu_0{$cpu} = $::percpu_1{$cpu}; }
|
|
foreach my $tid (keys %::tids_1) { $::tids_0{$tid} = $::tids_1{$tid}; }
|
|
foreach my $tid (keys %::task_1) {
|
|
foreach my $var (keys %{$::task_1{$tid}}) {
|
|
$::task_0{$tid}{$var} = $::task_1{$tid}{$var};
|
|
}
|
|
}
|
|
|
|
# estimate sleep delay to achieve desired interarrival by subtracting out
|
|
# the measured cpu runtime of the tool.
|
|
my $delay = $::arg_delay;
|
|
if (defined $::D_task{$$}{'exec_runtime'}) {
|
|
$delay -= ($::D_task{$$}{'exec_runtime'}/SI_k);
|
|
}
|
|
$delay = $MIN_DELAY if ($delay < $MIN_DELAY);
|
|
$delay = $MAX_DELAY if ($delay > $MAX_DELAY);
|
|
usleep( SI_M*$delay );
|
|
|
|
# Collect current state
|
|
$::tm_1 = (); $::tr_1 = (); %::percpu_1 = (); %::tids_1 = (); %::task_1 = ();
|
|
# Get current hires epoc timestamp
|
|
$::tm_1 = clock_gettime(CLOCK_MONOTONIC);
|
|
$::tr_1 = clock_gettime(CLOCK_REALTIME);
|
|
# Get overall per-cpu stats
|
|
if ($is_schedstat) {
|
|
&read_schedstat(\%::percpu_1);
|
|
} else {
|
|
&read_stat(\%::percpu_1);
|
|
}
|
|
if (defined $::arg_watch_only) {
|
|
# This determines a subset of pids and tids
|
|
# based on previous watched tids and matching cgroups.
|
|
# This should reduce cpu impact dramatically.
|
|
|
|
# Get list of pids and tids
|
|
&get_tids(\%::tids_1);
|
|
|
|
# Get array of tids corresponding to matching cgroups
|
|
&read_cgroup_procs(\@::cgroup_tids, \@::cgroup_procs_match);
|
|
my %cgroup_tids_h = map { $_ => 1 } @::cgroup_tids;
|
|
|
|
# Keep previous watched tids and find new matches from cgroup.procs
|
|
my @del_tids = ();
|
|
foreach my $tid (keys %::tids_1) {
|
|
my $pid = $::tids_1{$tid};
|
|
next if (exists $::tids_w{$tid});
|
|
if (exists $cgroup_tids_h{$tid}) {
|
|
$::tids_w{$tid} = $pid;
|
|
printf "ADD watching: tid=%7d\n", $tid;
|
|
next;
|
|
}
|
|
push(@del_tids, $tid);
|
|
}
|
|
|
|
# Prune tids not actually being watched
|
|
foreach my $tid (@del_tids) {
|
|
delete $::tids_1{$tid};
|
|
}
|
|
|
|
# Prune watched tids that not longer exist
|
|
my @del_tids_w = ();
|
|
foreach my $tid (keys %::tids_w) {
|
|
next if (exists $::tids_1{$tid});
|
|
push(@del_tids_w, $tid);
|
|
}
|
|
foreach my $tid (@del_tids_w) {
|
|
printf "REM watching: tid=%7d\n", $tid;
|
|
delete $::tids_w{$tid};
|
|
}
|
|
|
|
} else {
|
|
# Get list of pids and tids
|
|
&get_tids(\%::tids_1);
|
|
}
|
|
# Get current scheduling and io info for all tids
|
|
&read_sched(\%::tids_1, \%::task_1);
|
|
# Get current uptime
|
|
&get_uptime(\$::uptime);
|
|
# Get current loadavg
|
|
&get_loadavg(\%::loadavg, \$::runq, \$::num_tasks);
|
|
# Get current processes blocked
|
|
&get_blocked(\$::num_blk);
|
|
|
|
# Get current processes in uninterruptible disk sleep.
|
|
$num_state_D = 0;
|
|
foreach my $tid (keys %::task_1) {
|
|
$::num_state_D++ if ($::task_1{$tid}{'state'} eq 'D');
|
|
}
|
|
|
|
# Delta calculation
|
|
%::D_task = (); %::D_percpu = ();
|
|
$::tm_elapsed = $::tm_1 - $::tm_0;
|
|
$::tr_elapsed = $::tr_1 - $::tr_0;
|
|
foreach my $tid (keys %::task_1) {
|
|
next if ( !(exists $::task_0{$tid}) );
|
|
|
|
# simple delta
|
|
foreach my $var (@delta_list) {
|
|
$::D_task{$tid}{$var} = ($::task_1{$tid}{$var} - $::task_0{$tid}{$var});
|
|
}
|
|
# state information
|
|
foreach my $state (@state_list) {
|
|
$::D_task{$tid}{$state} = $::task_1{$tid}{$state};
|
|
}
|
|
if (exists $::tids_w{$tid}) {
|
|
$::D_task{$tid}{'watched'} = 1;
|
|
} else {
|
|
$::D_task{$tid}{'watched'} = 0;
|
|
}
|
|
|
|
# derived calculations
|
|
my $exec_runtime = $::D_task{$tid}{'exec_runtime'};
|
|
my $nr_switches = $::D_task{$tid}{'nr_switches'};
|
|
my $iowait_sum = $::D_task{$tid}{'iowait_sum'};
|
|
if ($nr_switches > 0.0) {
|
|
$::D_task{$tid}{'tlen'} = $exec_runtime / $nr_switches;
|
|
} else {
|
|
$::D_task{$tid}{'tlen'} = 0.0;
|
|
}
|
|
if ($::tm_elapsed > 0.0) {
|
|
$::D_task{$tid}{'occ'} = 100.0*$exec_runtime/1.0E3/$::tm_elapsed;
|
|
$::D_task{$tid}{'iowait'} = 100.0*$iowait_sum/1.0E3/$::tm_elapsed;
|
|
} else {
|
|
$::D_task{$tid}{'occ'} = 0.0;
|
|
$::D_task{$tid}{'iowait'} = 0.0;
|
|
}
|
|
$::D_task{$tid}{'io'} = $::D_task{$tid}{'read_bytes'}
|
|
+ $::D_task{$tid}{'write_bytes'}
|
|
+ $::D_task{$tid}{'cancelled_write_bytes'};
|
|
$::D_task{$tid}{'ios'} = $::D_task{$tid}{'syscw'}
|
|
+ $::D_task{$tid}{'iowait_count'};
|
|
}
|
|
|
|
foreach my $cpu (keys %::percpu_1) {
|
|
$::D_percpu{$cpu}{'runtime'} = ($::percpu_1{$cpu} - $::percpu_0{$cpu})/1.0E6;
|
|
if ($::tm_elapsed > 0.0) {
|
|
$::D_percpu{$cpu}{'occ'} = 100.0*$D_percpu{$cpu}{'runtime'}/1.0E3/$::tm_elapsed;
|
|
} else {
|
|
$::D_percpu{$cpu}{'occ'} = 0.0;
|
|
}
|
|
}
|
|
my $occ_total = 0.0;
|
|
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
|
|
$occ_total += $::D_percpu{$cpu}{'occ'};
|
|
}
|
|
|
|
# Trigger sysrq and coredump if we exceed watch trigger threshold
|
|
my $trigger = 0;
|
|
if ($::arg_trig_delay > 0) {
|
|
foreach my $tid (keys %::tids_w) {
|
|
if ($::D_task{$tid}{'wait_max'} > $::arg_trig_delay) {
|
|
$trigger = 1;
|
|
if (!defined $::arg_watch_quiet) {
|
|
printf "TRIGGER: delay: %.3f > %.3f milliseconds, tid: %d, comm: %s\n",
|
|
$::D_task{$tid}{'wait_max'}, $::arg_trig_delay, $tid, $::task_1{$tid}{'comm'};
|
|
}
|
|
}
|
|
if ($::D_task{$tid}{'block_max'} > $::arg_trig_delay) {
|
|
$trigger = 1;
|
|
if (!defined $::arg_watch_quiet) {
|
|
printf "TRIGGER: block: %.3f > %.3f milliseconds, tid: %d, comm: %s\n",
|
|
$::D_task{$tid}{'block_max'}, $::arg_trig_delay, $tid, $::task_1{$tid}{'comm'};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if ($trigger) {
|
|
if (!defined $::arg_watch_quiet) {
|
|
printf "TRIGGER sysrq.";
|
|
}
|
|
&sysrq_trigger_crash();
|
|
}
|
|
|
|
# Suppress all output
|
|
next if (defined $::arg_watch_quiet);
|
|
|
|
# Print summary
|
|
&schedtop_header(
|
|
\$::tr_1,
|
|
\$::tm_elapsed,
|
|
\$::tr_elapsed,
|
|
\$::uptime,
|
|
\$::loadavg,
|
|
\$::runq,
|
|
\$::num_blk,
|
|
\$::num_state_D,
|
|
\$::num_tasks,
|
|
\$::print_host
|
|
);
|
|
|
|
printf "%-5s %7s ", 'core:', 'total';
|
|
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
|
|
printf "%5s ", $cpu;
|
|
}
|
|
print "\n";
|
|
printf "%-5s %7.1f ", 'occ:', $occ_total;
|
|
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
|
|
printf "%5.1f ", $::D_percpu{$cpu}{'occ'};
|
|
}
|
|
print "\n";
|
|
print "\n";
|
|
|
|
# Build up output line by specific area
|
|
my $L = ();
|
|
$L = '';
|
|
$L .= sprintf "%7s %7s %7s ", "TID", "PID", "PPID";
|
|
if ($::opt_P{$::P_ps} != $::P_none) {
|
|
$L .= sprintf "%1s %2s %*s %2s %3s %4s ",
|
|
"S", "P", $w_aff, "AFF", "PO", "NI", "PR";
|
|
}
|
|
if ($::opt_P{$::P_cpu} == $::P_brief) {
|
|
$L .= sprintf "%6s %7s ", "ctxt", "occ";
|
|
} elsif ($::opt_P{$::P_cpu} == $::P_full) {
|
|
$L .= sprintf "%6s %6s %7s ", "ctxt", "migr", "occ";
|
|
}
|
|
if ($::opt_P{$::P_del} != $::P_none) {
|
|
$L .= sprintf "%7s %7s %7s %7s %7s ", "tlen", "tmax", "delay", "dmax", "bmax";
|
|
}
|
|
if ($::opt_P{$::P_io} == $::P_lite) {
|
|
$L .= sprintf "%7s %6s ", "iowt", "iocnt";
|
|
} elsif ($::opt_P{$::P_io} == $::P_brief) {
|
|
$L .= sprintf "%7s %8s %8s ", "iowt", "read", "write";
|
|
} elsif ($::opt_P{$::P_io} == $::P_full) {
|
|
$L .= sprintf "%7s %8s %8s %8s %8s %8s ",
|
|
"iowt", "read", "write", "wcncl", "rsysc", "wsysc";
|
|
}
|
|
if ($::opt_P{$::P_id} != $::P_none) {
|
|
$L .= sprintf "%-22s ", "wchan";
|
|
}
|
|
if ($::opt_P{$::P_cmd} == $::P_brief) {
|
|
$L .= sprintf "%s", "cmdline";
|
|
} elsif ($::opt_P{$::P_cmd} == $::P_full) {
|
|
$L .= sprintf "%-16s %-15s %s", "cgroup", "comm", "cmdline";
|
|
}
|
|
print $L, "\n";
|
|
|
|
foreach my $tid (sort {($D_task{$b}{$s_keyw} <=> $D_task{$a}{$s_keyw}) or
|
|
($D_task{$b}{$s_key1} <=> $D_task{$a}{$s_key1}) or
|
|
($D_task{$b}{$s_key2} <=> $D_task{$a}{$s_key2}) or
|
|
($D_task{$b}{$s_key3} <=> $D_task{$a}{$s_key3})} keys %D_task) {
|
|
my $exec_runtime = $::D_task{$tid}{'exec_runtime'};
|
|
my $nr_switches = $::D_task{$tid}{'nr_switches'};
|
|
my $aff = $::D_task{$tid}{'affinity'}->as_hex();
|
|
|
|
# skip printing if there is no actual delta
|
|
if ( !(defined $::arg_idle) ) {
|
|
next if (($exec_runtime == 0.0) && ($nr_switches == 0));
|
|
}
|
|
|
|
# Build up output line by specific area
|
|
$L = '';
|
|
$L .= sprintf "%7d %7d %7d ",
|
|
$tid, $::D_task{$tid}{'pid'}, $::D_task{$tid}{'ppid'};
|
|
if ($::opt_P{$::P_ps} != $::P_none) {
|
|
$L .= sprintf "%1s %2d %*s %2s %3d %4d ",
|
|
$::D_task{$tid}{'state'}, $::D_task{$tid}{'task_cpu'}, $w_aff, $aff,
|
|
$policies[$::D_task{$tid}{'policy'}], $::D_task{$tid}{'nice'},
|
|
$::D_task{$tid}{'priority'};
|
|
}
|
|
if ($::opt_P{$::P_cpu} == $::P_brief) {
|
|
$L .= sprintf "%6d %7.2f ",
|
|
$::D_task{$tid}{'nr_switches'}, $::D_task{$tid}{'occ'};
|
|
} elsif ($::opt_P{$::P_cpu} == $::P_full) {
|
|
$L .= sprintf "%6d %6d %7.2f ",
|
|
$::D_task{$tid}{'nr_switches'}, $::D_task{$tid}{'nr_migrations'},
|
|
$::D_task{$tid}{'occ'},
|
|
}
|
|
if ($::opt_P{$::P_del} != $::P_none) {
|
|
$L .= sprintf "%7.3f %7.1f %7.3f %7.1f %7.1f ",
|
|
$::D_task{$tid}{'tlen'}, $::D_task{$tid}{'exec_max'},
|
|
$::D_task{$tid}{'wait_sum'}, $::D_task{$tid}{'wait_max'},
|
|
$::D_task{$tid}{'block_max'};
|
|
}
|
|
if ($::opt_P{$::P_io} == $::P_lite) {
|
|
$L .= sprintf "%7.2f %6d ",
|
|
$::D_task{$tid}{'iowait'}, $::D_task{$tid}{'iowait_count'};
|
|
} elsif ($::opt_P{$::P_io} == $::P_brief) {
|
|
$L .= sprintf "%7.2f %8s %8s ",
|
|
$::D_task{$tid}{'iowait'},
|
|
&format_SI($::D_task{$tid}{'read_bytes'}),
|
|
&format_SI($::D_task{$tid}{'write_bytes'});
|
|
} elsif ($::opt_P{$::P_io} == $::P_full) {
|
|
$L .= sprintf "%7.2f %8s %8s %8s %8s %8s ",
|
|
$::D_task{$tid}{'iowait'},
|
|
&format_SI($::D_task{$tid}{'read_bytes'}),
|
|
&format_SI($::D_task{$tid}{'write_bytes'}),
|
|
&format_SI($::D_task{$tid}{'cancelled_write_bytes'}),
|
|
&format_SI($::D_task{$tid}{'syscr'}),
|
|
&format_SI($::D_task{$tid}{'syscw'});
|
|
}
|
|
if ($::opt_P{$::P_id} != $::P_none) {
|
|
$L .= sprintf "%-22s ", substr($::D_task{$tid}{'wchan'}, 0, 22);
|
|
}
|
|
if ($::opt_P{$::P_cmd} == $::P_brief) {
|
|
$L .= sprintf "%s", $::D_task{$tid}{'cmdline'};
|
|
} elsif ($::opt_P{$::P_cmd} == $::P_full) {
|
|
$L .= sprintf "%-16s %-15s %s",
|
|
substr($::D_task{$tid}{'cgroup'}, 0, 16),
|
|
substr($::D_task{$tid}{'comm'}, 0, 15),
|
|
$::D_task{$tid}{'cmdline'};
|
|
}
|
|
print $L, "\n";
|
|
}
|
|
print "\n";
|
|
|
|
# exit repeat loop if we have exceeded overall time
|
|
last if ($::tm_1 > $::tm_final);
|
|
|
|
} # REPEAT LOOP
|
|
|
|
# Print that tool has finished
|
|
print "done\n";
|
|
|
|
# Capture timestamp and report delta
|
|
$b1 = new Benchmark; $bd = Benchmark::timediff($b1, $b0);
|
|
printf "processing time: %s\n", timestr($bd);
|
|
exit 0;
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Convert a number to SI unit xxx.yyyG
|
|
sub format_SI
|
|
{
|
|
(my $value) = @_;
|
|
if ($value >= SI_G) {
|
|
return sprintf("%.3fG", $value/SI_G);
|
|
} elsif ($value >= SI_M) {
|
|
return sprintf("%.3fM", $value/SI_M);
|
|
} elsif ($value >= SI_k) {
|
|
return sprintf("%.3fk", $value/SI_k);
|
|
} else {
|
|
return sprintf("%.0f", $value);
|
|
}
|
|
}
|
|
|
|
# Convert to IEC binary unit xxx.yyyGi
|
|
# Since underlying memory units are in pages, don't need decimals for Ki
|
|
sub format_IEC
|
|
{
|
|
(my $value) = @_;
|
|
if ($value >= Gi) {
|
|
return sprintf("%.3fGi", $value/Gi);
|
|
} elsif ($value >= Mi) {
|
|
return sprintf("%.3fMi", $value/Mi);
|
|
} elsif ($value >= Ki) {
|
|
return sprintf("%.0fKi", $value/Ki);
|
|
} else {
|
|
return sprintf("%.0f", $value);
|
|
}
|
|
}
|
|
|
|
# Determine whether scheduler stats are available
|
|
sub is_sched
|
|
{
|
|
return (-e '/proc/1/task/1/sched') ? 1 : 0;
|
|
}
|
|
|
|
# Determine whether IO stats are available
|
|
sub is_io
|
|
{
|
|
return (-e '/proc/1/task/1/io') ? 1 : 0;
|
|
}
|
|
|
|
# Determine max of array
|
|
sub max {
|
|
my ($max, @vars) = @_;
|
|
for (@vars) {
|
|
$max = $_ if $_ > $max;
|
|
}
|
|
return $max;
|
|
}
|
|
|
|
# Determine tids and pid mapping by walking /proc/<pid>/task/<tid>
|
|
sub get_tids
|
|
{
|
|
(local *::tids) = @_;
|
|
my (@pids_, @tids_) = ();
|
|
my ($dh, $pid, $tid);
|
|
|
|
# get pid list
|
|
my $dir = '/proc';
|
|
opendir($dh, $dir) || croak "Cannot open directory: $dir ($!)";
|
|
@pids_ = grep { /^\d+$/ && -d "$dir/$_" } readdir($dh);
|
|
closedir $dh;
|
|
|
|
# get tid list
|
|
foreach $pid (@pids_) {
|
|
$dir = '/proc/' . $pid . '/task';
|
|
opendir(my $dh, $dir) || next;
|
|
@tids_ = grep { /^\d+$/ && -d "$dir/$_" } readdir($dh);
|
|
closedir $dh;
|
|
foreach $tid (@tids_) { $::tids{$tid} = $pid; }
|
|
}
|
|
}
|
|
|
|
# Reset scheduling hi-water-marks
|
|
# NOTE: Reset by write 0 to sched is finicky; use brute force
|
|
sub reset_sched_hwm
|
|
{
|
|
(local *::tids) = @_;
|
|
|
|
# reset scheduling hi-water-marks by writing '0' to each task
|
|
my (%pids_) = ();
|
|
foreach my $tid (keys %::tids) {
|
|
my $pid = $::tids{$tid};
|
|
$pids_{$pid} = 1;
|
|
}
|
|
foreach my $pid (keys %pids_) {
|
|
my $file = '/proc/' . $pid . '/sched';
|
|
open(my $fh, "> $file") || next;
|
|
print $fh "0\n";
|
|
close($fh);
|
|
}
|
|
foreach my $tid (keys %::tids) {
|
|
my $file = '/proc/' . $tid . '/sched';
|
|
open(my $fh, "> $file") || next;
|
|
print $fh "0\n";
|
|
close($fh);
|
|
}
|
|
foreach my $tid (keys %::tids) {
|
|
my $pid = $::tids{$tid};
|
|
my $file = '/proc/' . $pid . '/task/' . $tid . '/sched';
|
|
open(my $fh, "> $file") || next;
|
|
print $fh "0\n";
|
|
close($fh);
|
|
}
|
|
}
|
|
|
|
# Trigger a crash dump via sysrq, result in /var/crash .
|
|
# The following requires root privilege:
|
|
# echo 1 > /proc/sys/kernel/sysrq
|
|
# echo c > /proc/sysrq-trigger
|
|
sub sysrq_trigger_crash
|
|
{
|
|
my $file1 = '/proc/sys/kernel/sysrq';
|
|
open(my $fh1, "> $file1") || croak "Cannot open file: $file1 ($!)";
|
|
print $fh1 "1\n";
|
|
close($fh1);
|
|
|
|
my $file2 = '/proc/sysrq-trigger';
|
|
open(my $fh2, "> $file2") || croak "Cannot open file: $file2 ($!)";
|
|
print $fh2 "c\n";
|
|
close($fh2);
|
|
}
|
|
|
|
# Track watched tids for monitoring
|
|
sub track_watched_tids
|
|
{
|
|
(local *::tids, local *::tids_w, local *::task, local *::arg_watch_cmd, local *::arg_watch_cgroup) = @_;
|
|
|
|
foreach my $tid (keys %::tids) {
|
|
my $pid = $::tids{$tid};
|
|
my $comm = $::task{$tid}{'comm'};
|
|
my $cgroup = $::task{$tid}{'cgroup'};
|
|
my $cmdline = $::task{$tid}{'cmdline'};
|
|
my $watched = 0;
|
|
|
|
next if (exists $::tids_w{$tid});
|
|
|
|
foreach my $cmd (@::arg_watch_cmd) {
|
|
if (($cmd =~ /^\d+$/) && (($tid == $cmd) || ($pid == $cmd))) {
|
|
$::tids_w{$tid} = $pid;
|
|
printf "watching: tid=%7d, cgroup=%s, comm=%s, cmdline=%.40s\n", $tid, $cgroup, $comm, $cmdline;
|
|
}
|
|
if ((defined $comm) && ($comm =~ /^\Q$cmd\E/)) {
|
|
$::tids_w{$tid} = $pid;
|
|
printf "watching: tid=%7d, cgroup=%s, comm=%s, cmdline=%.40s\n", $tid, $cgroup, $comm, $cmdline;
|
|
}
|
|
}
|
|
foreach my $cg (@::arg_watch_cgroup) {
|
|
if ((defined $cgroup) && ($cgroup =~ /^\Q$cg\E/)) {
|
|
$::tids_w{$tid} = $pid;
|
|
printf "watching: tid=%7d, cgroup=%s, comm=%s, cmdline=%.40s\n", $tid, $cgroup, $comm, $cmdline;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Find module difficult, storing result in global variable
|
|
sub wanted_cgroup_procs {
|
|
my $F = $File::Find::name;
|
|
if ($_ eq 'cgroup.procs') {
|
|
push @::cgroup_procs_paths, $F;
|
|
}
|
|
}
|
|
|
|
# Find cgroup.proc paths matching specified cgroup patterns
|
|
sub find_matching_cgroup_procs
|
|
{
|
|
(local *::cgroup_procs_match, local *::arg_watch_cgroup) = @_;
|
|
|
|
# Find all cgroup.procs paths for the pids cgroup controller
|
|
File::Find::find(\&wanted_cgroup_procs, '/sys/fs/cgroup/pids');
|
|
|
|
foreach my $file (@::cgroup_procs_paths) {
|
|
foreach my $cg (@::arg_watch_cgroup) {
|
|
if ($file =~ /\Q$cg\E(\.service|\.scope)/) {
|
|
push(@::cgroup_procs_match, $file);
|
|
} elsif ($file =~ /kubepods\/\w+\/\Q$cg\E/) {
|
|
push(@::cgroup_procs_match, $file);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Get array of tids corresponding to matching cgroups
|
|
sub read_cgroup_procs
|
|
{
|
|
(local *::tids, local *::cgroup_procs_match) = @_;
|
|
|
|
my $tid = ();
|
|
|
|
# reset scheduling hi-water-marks by writing '0' to each task
|
|
foreach my $cgroup_procs (@::cgroup_procs_match) {
|
|
open(my $fh, $cgroup_procs) || goto SKIP_PROCS;
|
|
while (<$fh>) {
|
|
if (/^(\d+)$/) {
|
|
$tid = $1;
|
|
push @::tids, $tid;
|
|
}
|
|
}
|
|
close($fh);
|
|
SKIP_PROCS:;
|
|
}
|
|
}
|
|
|
|
# Parse cpu and scheduling info for each tid
|
|
# - ignore the specific tid if there is incomplete data,
|
|
# (i.e., cannot obtain info because task has died,
|
|
# eg. missing ./stat, ./status, ./cmdline, ./wchan)
|
|
#
|
|
sub read_sched
|
|
{
|
|
(local *::tids, local *::task) = @_;
|
|
|
|
%::task = ();
|
|
foreach my $tid (keys %::tids) {
|
|
my ($fh, $file, $pid, $comm, $cmdline, $wchan, $id) = ();
|
|
my ($tpid, $tcomm, $state, $ppid, $pgrp, $sid,
|
|
$tty_nr, $tty_pgrp, $flags,
|
|
$min_flt, $cmin_flt, $maj_flt, $cmaj_flt,
|
|
$utime, $stime, $cutime, $cstime,
|
|
$priority, $nice, $num_threads,
|
|
$it_real_value, $start_time,
|
|
$vsize, $rss, $rsslim,
|
|
$start_code, $end_code, $start_stack, $esp, $eip,
|
|
$pending, $blocked, $sigign, $sigcatch, $wchan_addr,
|
|
$dum1, $dum2, $exit_signal, $task_cpu,
|
|
$rt_priority, $policy, $blkio_ticks,
|
|
$gtime, $cgtime,
|
|
$start_data, $end_data, $start_brk, $arg_start, $arg_end,
|
|
$env_start, $env_end, $exit_code) = ();
|
|
my ($cgroup) = ();
|
|
my ($nr_switches, $nr_migrations) = (0,0);
|
|
my ($exec_runtime, $exec_max) = (0.0, 0.0);
|
|
my ($wait_max, $wait_sum, $wait_count) = (0.0, 0.0, 0);
|
|
my ($block_max) = (0.0);
|
|
my ($iowait_sum, $iowait_count) = (0.0, 0);
|
|
my ($VmSize, $VmRSS) = ();
|
|
my $Cpus_allowed = Math::BigInt->new('0');
|
|
my $affinity = Math::BigInt->new('0');
|
|
my ($rchar, $wchar, $syscr, $syscw, $read_bytes, $write_bytes,
|
|
$cancelled_write_bytes) = (0,0,0,0,0,0,0);
|
|
|
|
my ($sched_valid, $io_valid, $status_valid, $cmdline_valid,
|
|
$wchan_valid, $stat_valid, $cgroup_valid) = ();
|
|
|
|
$pid = $::tids{$tid};
|
|
|
|
# NOTE: Format change over time: OLD: se.statistics.X, NEW: se.statistics->X
|
|
#cat /proc/1/sched
|
|
#systemd (1, #threads: 1)
|
|
#-------------------------------------------------------------------
|
|
#se.exec_start : 33792676.285222
|
|
#se.vruntime : 28019997.693224
|
|
#se.sum_exec_runtime : 21918.207287
|
|
#se.nr_migrations : 5413
|
|
#se.statistics->sum_sleep_runtime : 1166561.198533
|
|
#se.statistics->wait_start : 0.000000
|
|
#se.statistics->sleep_start : 33792676.285222
|
|
#se.statistics->block_start : 0.000000
|
|
#se.statistics->sleep_max : 18951.679990
|
|
#se.statistics->block_max : 0.000000
|
|
#se.statistics->exec_max : 0.909747
|
|
#se.statistics->slice_max : 1.790123
|
|
#se.statistics->wait_max : 4.026544
|
|
#se.statistics->wait_sum : 507.245963
|
|
#se.statistics->wait_count : 2540
|
|
#se.statistics->iowait_sum : 0.000000
|
|
#se.statistics->iowait_count : 0
|
|
#se.statistics->nr_migrations_cold : 0
|
|
#se.statistics->nr_failed_migrations_affine : 67
|
|
#se.statistics->nr_failed_migrations_running : 1
|
|
#se.statistics->nr_failed_migrations_hot : 1
|
|
#se.statistics->nr_forced_migrations : 0
|
|
#se.statistics->nr_wakeups : 2472
|
|
#se.statistics->nr_wakeups_sync : 34
|
|
#se.statistics->nr_wakeups_migrate : 176
|
|
#se.statistics->nr_wakeups_local : 1442
|
|
#se.statistics->nr_wakeups_remote : 1030
|
|
#se.statistics->nr_wakeups_affine : 155
|
|
#se.statistics->nr_wakeups_affine_attempts : 969
|
|
#se.statistics->nr_wakeups_passive : 0
|
|
#se.statistics->nr_wakeups_idle : 0
|
|
#avg_atom : 0.286970
|
|
#avg_per_cpu : 4.049179
|
|
#nr_switches : 76378
|
|
#nr_voluntary_switches : 72308
|
|
#nr_involuntary_switches : 4070
|
|
#se.load.weight : 1024
|
|
#policy : 0
|
|
#prio : 120
|
|
#clock-delta : 28
|
|
|
|
# Changes for 6.6.0 kernel
|
|
#cat /proc/1/sched
|
|
#systemd (1, #threads: 1)
|
|
#-------------------------------------------------------------------
|
|
#se.exec_start : 251536392.418317
|
|
#se.vruntime : 542073.435409
|
|
#se.sum_exec_runtime : 1097697.572750
|
|
#se.nr_migrations : 35039
|
|
#sum_sleep_runtime : 249925608.224346
|
|
#sum_block_runtime : 234992.983051
|
|
#wait_start : 0.000000
|
|
#sleep_start : 251536392.418317
|
|
#block_start : 0.000000
|
|
#sleep_max : 11967.794377
|
|
#block_max : 1230.041276
|
|
#exec_max : 147.808142
|
|
#slice_max : 78.070544
|
|
#wait_max : 180.271599
|
|
#wait_sum : 440802.706697
|
|
#wait_count : 1022180
|
|
#iowait_sum : 81.179285
|
|
#iowait_count : 63
|
|
#nr_migrations_cold : 0
|
|
#nr_failed_migrations_affine : 145872
|
|
#nr_failed_migrations_running : 67209
|
|
#nr_failed_migrations_hot : 82715
|
|
#nr_forced_migrations : 12
|
|
#nr_wakeups : 264124
|
|
#nr_wakeups_sync : 41
|
|
#nr_wakeups_migrate : 205
|
|
#nr_wakeups_local : 146458
|
|
#nr_wakeups_remote : 117666
|
|
#nr_wakeups_affine : 204
|
|
#nr_wakeups_affine_attempts : 409
|
|
#nr_wakeups_passive : 0
|
|
#nr_wakeups_idle : 0
|
|
#avg_atom : 1.072258
|
|
#avg_per_cpu : 31.327879
|
|
#nr_switches : 1023725
|
|
#nr_voluntary_switches : 264916
|
|
#nr_involuntary_switches : 758809
|
|
#se.load.weight : 1048576
|
|
#se.avg.load_sum : 1490
|
|
#se.avg.runnable_sum : 1526937
|
|
#se.avg.util_sum : 365568
|
|
#se.avg.load_avg : 32
|
|
#se.avg.runnable_avg : 32
|
|
#se.avg.util_avg : 7
|
|
#se.avg.last_update_time : 251536392418304
|
|
#se.avg.util_est.ewma : 163
|
|
#se.avg.util_est.enqueued : 7
|
|
#policy : 0
|
|
#prio : 120
|
|
#clock-delta : 112
|
|
#mm->numa_scan_seq : 0
|
|
#numa_pages_migrated : 0
|
|
#numa_preferred_nid : -1
|
|
#total_numa_faults : 0
|
|
#current_node=0, numa_group_id=0
|
|
#numa_faults node=0 task_private=0 task_shared=0 group_private=0 group_shared=0
|
|
|
|
# parse /proc/<pid>/task/<tid>/sched
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/sched';
|
|
open($fh, $file) || goto SKIP_SCHED;
|
|
$_ = <$fh>;
|
|
if (/^(.*)\s+\((\d+),\s+#threads:/) {
|
|
$comm = $1; $id = $2;
|
|
}
|
|
my ($k, $v, $c0);
|
|
LOOP_SCHED: while (<$fh>) {
|
|
if (/^wait_max\s+:\s+(\S+)/ || /^se\.statistics.{1,2}wait_max\s+:\s+(\S+)/) {
|
|
$wait_max = $1;
|
|
} elsif (/^block_max\s+:\s+(\S+)/ || /^se\.statistics.{1,2}block_max\s+:\s+(\S+)/) {
|
|
$block_max = $1;
|
|
} elsif (/^wait_sum\s+:\s+(\S+)/ || /^se\.statistics.{1,2}wait_sum\s+:\s+(\S+)/) {
|
|
$wait_sum = $1;
|
|
} elsif (/^wait_count\s+:\s+(\S+)/ || /^se\.statistics.{1,2}wait_count\s+:\s+(\S+)/) {
|
|
$wait_count = $1;
|
|
} elsif (/^exec_max\s+:\s+(\S+)/ || /^se\.statistics.{1,2}exec_max\s+:\s+(\S+)/) {
|
|
$exec_max = $1;
|
|
} elsif (/^iowait_sum\s+:\s+(\S+)/ || /^se\.statistics.{1,2}iowait_sum\s+:\s+(\S+)/) {
|
|
$iowait_sum = $1;
|
|
} elsif (/^iowait_count\s+:\s+(\S+)/ || /^se\.statistics.{1,2}iowait_count\s+:\s+(\S+)/) {
|
|
$iowait_count = $1;
|
|
} elsif (/^se\.sum_exec_runtime\s+:\s+(\S+)/) {
|
|
$exec_runtime = $1;
|
|
} elsif (/^se\.nr_migrations\s+:\s+(\S+)/) {
|
|
$nr_migrations = $1;
|
|
} elsif (/^nr_switches\s+:\s+(\S+)/) {
|
|
$nr_switches = $1;
|
|
$sched_valid = 1;
|
|
last LOOP_SCHED;
|
|
}
|
|
}
|
|
close($fh);
|
|
SKIP_SCHED:;
|
|
|
|
#cat /proc/1/io
|
|
#rchar: 3432590242
|
|
#wchar: 438665986
|
|
#syscr: 316595
|
|
#syscw: 104722
|
|
#read_bytes: 1586438144
|
|
#write_bytes: 246829056
|
|
#cancelled_write_bytes: 7798784
|
|
|
|
# parse /proc/<pid>/task/<tid>/io
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/io';
|
|
open($fh, $file) || goto SKIP_IO;
|
|
LOOP_IO: while (<$fh>) {
|
|
if (/^rchar:\s+(\S+)/) {
|
|
$rchar = $1;
|
|
} elsif (/^wchar:\s+(\S+)/) {
|
|
$wchar = $1;
|
|
} elsif (/^syscr:\s+(\S+)/) {
|
|
$syscr = $1;
|
|
} elsif (/^syscw:\s+(\S+)/) {
|
|
$syscw = $1;
|
|
} elsif (/^read_bytes:\s+(\S+)/) {
|
|
$read_bytes = $1;
|
|
} elsif (/^write_bytes:\s+(\S+)/) {
|
|
$write_bytes = $1;
|
|
} elsif (/^cancelled_write_bytes:\s+(\S+)/) {
|
|
$cancelled_write_bytes = $1;
|
|
$io_valid = 1;
|
|
last LOOP_IO;
|
|
}
|
|
}
|
|
close($fh);
|
|
SKIP_IO:;
|
|
|
|
# parse /proc/<pid>/task/<tid>/status
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/status';
|
|
open($fh, $file) || next;
|
|
LOOP_STATUS: while (<$fh>) {
|
|
if (/^Name:\s+(.*)/) {
|
|
$comm = $1;
|
|
} elsif (/^State:\s+(\S+)/) {
|
|
$state = $1;
|
|
} elsif (/^PPid:\s+(\S+)/) {
|
|
$ppid = $1;
|
|
} elsif (/^VmSize:\s+(\S+)/) {
|
|
$VmSize = $1;
|
|
} elsif (/^VmRSS:\s+(\S+)/) {
|
|
$VmRSS = $1;
|
|
} elsif (/^Cpus_allowed:\s+([0]+,)*(\S+)/) {
|
|
my $h = $2; $h =~ tr/,/_/;
|
|
$Cpus_allowed = Math::BigInt->from_hex($h);
|
|
$affinity = $Cpus_allowed->band($::affinity_mask);
|
|
$status_valid = 1;
|
|
last LOOP_STATUS;
|
|
}
|
|
}
|
|
close($fh);
|
|
|
|
# parse /proc/<pid>/task/<tid>/cmdline
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/cmdline';
|
|
open($fh, $file) || next;
|
|
LOOP_CMDLINE: while (<$fh>) {
|
|
if (/^(.*)$/) {
|
|
$cmdline = $1;
|
|
$cmdline =~ s/\000/ /g;
|
|
$cmdline_valid = 1;
|
|
last LOOP_CMDLINE;
|
|
}
|
|
}
|
|
if (!$cmdline_valid) {
|
|
$cmdline_valid = 1;
|
|
$cmdline = $comm;
|
|
}
|
|
close($fh);
|
|
|
|
# parse /proc/<pid>/task/<tid>/wchan
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/wchan';
|
|
open($fh, $file) || next;
|
|
LOOP_WCHAN: while (<$fh>) {
|
|
if (/^(.*)$/) {
|
|
$wchan = $1;
|
|
$wchan_valid = 1;
|
|
last LOOP_WCHAN;
|
|
}
|
|
}
|
|
close($fh);
|
|
|
|
#Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
|
|
#..............................................................................
|
|
# Field Content
|
|
# tpid process id (or tid, if /proc/<pid>/task/<tid>/stat)
|
|
# tcomm filename of the executable
|
|
# state state (R is running, S is sleeping, D is sleeping in an
|
|
# uninterruptible wait, Z is zombie, T is traced or stopped)
|
|
# ppid process id of the parent process
|
|
# pgrp pgrp of the process
|
|
# sid session id
|
|
# tty_nr tty the process uses
|
|
# tty_pgrp pgrp of the tty
|
|
# flags task flags
|
|
# min_flt number of minor faults
|
|
# cmin_flt number of minor faults with child's
|
|
# maj_flt number of major faults
|
|
# cmaj_flt number of major faults with child's
|
|
# utime user mode jiffies
|
|
# stime kernel mode jiffies
|
|
# cutime user mode jiffies with child's
|
|
# cstime kernel mode jiffies with child's
|
|
# priority priority level
|
|
# nice nice level
|
|
# num_threads number of threads
|
|
# it_real_value (obsolete, always 0)
|
|
# start_time time the process started after system boot
|
|
# vsize virtual memory size
|
|
# rss resident set memory size
|
|
# rsslim current limit in bytes on the rss
|
|
# start_code address above which program text can run
|
|
# end_code address below which program text can run
|
|
# start_stack address of the start of the main process stack
|
|
# esp current value of ESP
|
|
# eip current value of EIP
|
|
# pending bitmap of pending signals
|
|
# blocked bitmap of blocked signals
|
|
# sigign bitmap of ignored signals
|
|
# sigcatch bitmap of catched signals
|
|
# wchan address where process went to sleep
|
|
# 0 (place holder)
|
|
# 0 (place holder)
|
|
# exit_signal signal to send to parent thread on exit
|
|
# task_cpu which CPU the task is scheduled on
|
|
# rt_priority realtime priority
|
|
# policy scheduling policy (man sched_setscheduler)
|
|
# blkio_ticks time spent waiting for block IO
|
|
# gtime guest time of the task in jiffies
|
|
# cgtime guest time of the task children in jiffies
|
|
# start_data address above which program data+bss is placed
|
|
# end_data address below which program data+bss is placed
|
|
# start_brk address above which program heap can be expanded with brk()
|
|
# arg_start address above which program command line is placed
|
|
# arg_end address below which program command line is placed
|
|
# env_start address above which program environment is placed
|
|
# env_end address below which program environment is placed
|
|
# exit_code the thread's exit_code in the form reported by the waitpid system call
|
|
|
|
# parse /proc/<pid>/task/<tid>/stat
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/stat';
|
|
my $dummy;
|
|
open($fh, $file) || next;
|
|
$_ = <$fh>;
|
|
($tpid, $tcomm, $dummy) = /^(\d+)\s+\((.*)\)\s+(.*)/;
|
|
($state, $ppid, $pgrp, $sid,
|
|
$tty_nr, $tty_pgrp, $flags,
|
|
$min_flt, $cmin_flt, $maj_flt, $cmaj_flt,
|
|
$utime, $stime, $cutime, $cstime,
|
|
$priority, $nice, $num_threads,
|
|
$it_real_value, $start_time,
|
|
$vsize, $rss, $rsslim,
|
|
$start_code, $end_code, $start_stack, $esp, $eip,
|
|
$pending, $blocked, $sigign, $sigcatch, $wchan_addr,
|
|
$dum1, $dum2, $exit_signal, $task_cpu,
|
|
$rt_priority, $policy, $blkio_ticks, $gtime, $cgtime,
|
|
$start_data, $end_data, $start_brk, $arg_start, $arg_end,
|
|
$env_start, $env_end, $exit_code) = split(/\s+/, $dummy);
|
|
$stat_valid = 1;
|
|
close($fh);
|
|
|
|
#cat /proc/1/task/1/cgroup
|
|
#12:cpu,cpuacct:/init.scope
|
|
#11:pids:/init.scope
|
|
#10:hugetlb:/
|
|
#9:memory:/init.scope
|
|
#8:rdma:/
|
|
#7:cpuset:/
|
|
#6:net_cls,net_prio:/
|
|
#5:devices:/init.scope
|
|
#4:blkio:/init.scope
|
|
#3:freezer:/
|
|
#2:perf_event:/
|
|
#1:name=systemd:/init.scope
|
|
#0::/init.scope
|
|
|
|
# Extract the pod id:
|
|
# /k8s-infra/kubepods/burstable/pode84531c2-0bb1-45f8-b27f-e779b858552d/fdeaea0e577a525a3d9e41655ee05dd9b4edf17ce4b1bf95803cae1518f43ca2
|
|
# Extract *.service or *.scope name:
|
|
# /system.slice/acpid.service
|
|
# /system.slice/system-ceph.slice/ceph-mds.scope
|
|
|
|
# parse /proc/<pid>/task/<tid>/cgroup
|
|
$file = '/proc/' . $pid . '/task/' . $tid . '/cgroup';
|
|
open($fh, $file) || next;
|
|
LOOP_CGROUP: while (<$fh>) {
|
|
if (/^\d+:(pids|cpu,cpuacct):(.*)/) {
|
|
$_ = $2;
|
|
if (/kubepods\/\w+\/(pod[a-z0-9-]+)\/\w+$/) {
|
|
$cgroup = $1;
|
|
} elsif (/\/([a-zA-Z0-9_-@:]+)\.\w+$/) {
|
|
$cgroup = $1;
|
|
} else {
|
|
$cgroup = '-'; # '-' looks prettier than '/'
|
|
}
|
|
$cgroup_valid = 1;
|
|
last LOOP_CGROUP;
|
|
}
|
|
}
|
|
close($fh);
|
|
|
|
# sched
|
|
if (defined $sched_valid) {
|
|
$::task{$tid}{'exec_runtime'} = $exec_runtime;
|
|
$::task{$tid}{'exec_max'} = $exec_max;
|
|
$::task{$tid}{'block_max'} = $block_max;
|
|
$::task{$tid}{'wait_max'} = $wait_max;
|
|
$::task{$tid}{'wait_sum'} = $wait_sum;
|
|
$::task{$tid}{'wait_count'} = $wait_count;
|
|
$::task{$tid}{'iowait_sum'} = $iowait_sum;
|
|
$::task{$tid}{'iowait_count'} = $iowait_count;
|
|
$::task{$tid}{'nr_migrations'} = $nr_migrations;
|
|
$::task{$tid}{'nr_switches'} = $nr_switches;
|
|
} else {
|
|
$::task{$tid}{'exec_runtime'} = 0;
|
|
$::task{$tid}{'exec_max'} = 0;
|
|
$::task{$tid}{'block_max'} = 0;
|
|
$::task{$tid}{'wait_max'} = 0;
|
|
$::task{$tid}{'wait_sum'} = 0;
|
|
$::task{$tid}{'wait_count'} = 0;
|
|
$::task{$tid}{'iowait_sum'} = 0;
|
|
$::task{$tid}{'iowait_count'} = 0;
|
|
$::task{$tid}{'nr_migrations'} = 0;
|
|
$::task{$tid}{'nr_switches'} = 0;
|
|
}
|
|
|
|
# io
|
|
if (defined $io_valid) {
|
|
$::task{$tid}{'rchar'} = $rchar;
|
|
$::task{$tid}{'wchar'} = $wchar;
|
|
$::task{$tid}{'syscr'} = $syscr;
|
|
$::task{$tid}{'syscw'} = $syscw;
|
|
$::task{$tid}{'read_bytes'} = $read_bytes;
|
|
$::task{$tid}{'write_bytes'} = $write_bytes;
|
|
$::task{$tid}{'cancelled_write_bytes'} = $cancelled_write_bytes;
|
|
} else {
|
|
$::task{$tid}{'rchar'} = 0;
|
|
$::task{$tid}{'wchar'} = 0;
|
|
$::task{$tid}{'syscr'} = 0;
|
|
$::task{$tid}{'syscw'} = 0;
|
|
$::task{$tid}{'read_bytes'} = 0;
|
|
$::task{$tid}{'write_bytes'} = 0;
|
|
$::task{$tid}{'cancelled_write_bytes'} = 0;
|
|
}
|
|
|
|
# status
|
|
if (defined $status_valid) {
|
|
$::task{$tid}{'pid'} = $pid;
|
|
$::task{$tid}{'comm'} = $comm;
|
|
$::task{$tid}{'state'} = $state;
|
|
$::task{$tid}{'ppid'} = $ppid;
|
|
$::task{$tid}{'VmSize'} = $VmSize;
|
|
$::task{$tid}{'VmRSS'} = $VmRSS;
|
|
$::task{$tid}{'affinity'} = $affinity;
|
|
} else {
|
|
$::task{$tid}{'pid'} = 0;
|
|
$::task{$tid}{'comm'} = '-';
|
|
$::task{$tid}{'state'} = '-';
|
|
$::task{$tid}{'ppid'} = 0;
|
|
$::task{$tid}{'VmSize'} = 0;
|
|
$::task{$tid}{'VmRSS'} = 0;
|
|
$::task{$tid}{'affinity'} = Math::BigInt->new('0');
|
|
}
|
|
|
|
# cmdline
|
|
if (defined $cmdline_valid) {
|
|
$::task{$tid}{'cmdline'} = $cmdline;
|
|
} else {
|
|
$::task{$tid}{'cmdline'} = $comm;
|
|
}
|
|
|
|
# wchan
|
|
if (defined $cmdline_valid) {
|
|
$::task{$tid}{'wchan'} = $wchan;
|
|
} else {
|
|
$::task{$tid}{'wchan'} = '-';
|
|
}
|
|
|
|
# stat
|
|
if (defined $stat_valid) {
|
|
$::task{$tid}{'nice'} = $nice;
|
|
$::task{$tid}{'policy'} = $policy;
|
|
$::task{$tid}{'priority'} = $priority;
|
|
$::task{$tid}{'rt_priority'} = $rt_priority;
|
|
$::task{$tid}{'start_time'} = $start_time;
|
|
$::task{$tid}{'task_cpu'} = $task_cpu;
|
|
} else {
|
|
$::task{$tid}{'nice'} = 0;
|
|
$::task{$tid}{'policy'} = '-';
|
|
$::task{$tid}{'priority'} = 0;
|
|
$::task{$tid}{'rt_priority'} = 0;
|
|
$::task{$tid}{'start_time'} = '';
|
|
$::task{$tid}{'task_cpu'} = 0;
|
|
}
|
|
|
|
# cgroup
|
|
if (defined $cgroup_valid) {
|
|
$::task{$tid}{'cgroup'} = $cgroup;
|
|
} else {
|
|
$::task{$tid}{'cgroup'} = '-';
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
# Parse per-cpu hi-resolution scheduling stats
|
|
sub read_schedstat
|
|
{
|
|
(local *::percpu) = @_;
|
|
my ($version, $timestamp);
|
|
my ($cpu, $cputime);
|
|
my ($fh, $file);
|
|
|
|
%::percpu = ();
|
|
|
|
# parse /proc/schedstat
|
|
$file = '/proc/schedstat';
|
|
open($fh, $file) || croak "Cannot open file: $file ($!)";
|
|
$_ = <$fh>; ($version) = /^version\s+(\d+)/;
|
|
$_ = <$fh>; ($timestamp) = /^timestamp\s+(\d+)/;
|
|
|
|
if ($version == 15) {
|
|
LOOP_SCHEDSTAT: while (<$fh>) {
|
|
# version 15: cputime is 7th field
|
|
if (/^cpu(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)\s+/) {
|
|
$cpu = $1; $cputime = $2;
|
|
$::percpu{$cpu} = $cputime;
|
|
}
|
|
}
|
|
} else {
|
|
croak "schedstat version: $version method not implemented.";
|
|
}
|
|
close($fh);
|
|
SKIP_SCHED:;
|
|
}
|
|
|
|
# Parse per-cpu jiffie stats; cputime excludes iowait.
|
|
sub read_stat
|
|
{
|
|
(local *::percpu) = @_;
|
|
my ($cpu, $cputime);
|
|
my ($user, $sys, $nice, $idle, $iowt, $hirq, $sirq);
|
|
my ($fh, $file);
|
|
|
|
%::percpu = ();
|
|
|
|
# parse /proc/stat
|
|
$file = '/proc/stat';
|
|
open($fh, $file) || croak "Cannot open file: $file ($!)";
|
|
LOOP_STAT: while (<$fh>) {
|
|
if (/^cpu(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+/) {
|
|
$cpu =$1; $user = $2; $sys = $3; $nice = $4; $idle = $5; $iowt = $6; $hirq = $7; $sirq = $8;
|
|
$cputime = $CLOCK_NS * ($user + $sys + $nice + $iowt + $hirq + $sirq);
|
|
$::percpu{$cpu} = $cputime;
|
|
}
|
|
}
|
|
close($fh);
|
|
}
|
|
|
|
# Parse load-average from /proc/loadavg
|
|
sub get_loadavg
|
|
{
|
|
(local *::loadavg, local *::runq, local *::num_tasks) = @_;
|
|
|
|
$::loadavg{'1'} = 0.0;
|
|
$::loadavg{'5'} = 0.0;
|
|
$::loadavg{'15'} = 0.0;
|
|
$::runq = 0;
|
|
$::num_tasks = 0;
|
|
|
|
my $file = '/proc/loadavg';
|
|
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
|
$_ = <$fh>;
|
|
if (/^(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\/(\d+)\s+\d+/) {
|
|
$::loadavg{'1'} = $1;
|
|
$::loadavg{'5'} = $2;
|
|
$::loadavg{'15'} = $3;
|
|
$::runq = $4;
|
|
$::num_tasks = $5;
|
|
}
|
|
close($fh);
|
|
}
|
|
|
|
# Parse blocked from /proc/stat
|
|
sub get_blocked
|
|
{
|
|
(local *::num_blk) = @_;
|
|
|
|
$::num_blk = 0;
|
|
|
|
my $file = '/proc/stat';
|
|
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
|
while ($_ = <$fh>) {
|
|
if (/^procs_blocked\s+(\d+)/) {
|
|
$::num_blk = $1;
|
|
}
|
|
}
|
|
close($fh);
|
|
}
|
|
|
|
# Parse uptime from /proc/uptime
|
|
sub get_uptime
|
|
{
|
|
(local *::uptime) = @_;
|
|
$::uptime = 0.0;
|
|
|
|
my $file = '/proc/uptime';
|
|
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
|
$_ = <$fh>;
|
|
if (/^(\S+)\s+\S+/) {
|
|
$::uptime = $1;
|
|
}
|
|
close($fh);
|
|
}
|
|
|
|
# Get number of online logical cpus
|
|
sub get_num_logical_cpus {
|
|
(local *::num_cpus) = @_;
|
|
$::num_cpus = 0;
|
|
|
|
my $file = "/proc/cpuinfo";
|
|
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
|
LOOP_CPUINFO: while (<$fh>) {
|
|
if (/^[Pp]rocessor\s+:\s\d+/) {
|
|
$::num_cpus++;
|
|
}
|
|
}
|
|
close($fh);
|
|
}
|
|
|
|
# Print header
|
|
sub schedtop_header {
|
|
(local *::tr_1,
|
|
local *::tm_elapsed,
|
|
local *::tr_elapsed,
|
|
local *::uptime,
|
|
local *::loadavg,
|
|
local *::runq,
|
|
local *::num_blk,
|
|
local *::num_state_D,
|
|
local *::num_tasks,
|
|
local *::print_host,
|
|
) = @_;
|
|
|
|
# process epoch to get current timestamp
|
|
my $mm_in_s = 60;
|
|
my $hh_in_s = 60*60;
|
|
my $dd_in_s = 24*60*60;
|
|
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
|
|
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($::tr_1);
|
|
my $msec = 1000.0*($::tr_1 - int($::tr_1));
|
|
|
|
# convert uptime to elapsed <d>:<hh>:<mm>:<ss>
|
|
my ($up, $up_dd, $up_hh, $up_mm, $up_ss);
|
|
$up = int($::uptime);
|
|
$up_dd = int($up/$dd_in_s);
|
|
$up -= $dd_in_s*$up_dd;
|
|
$up_hh = int($up/$hh_in_s);
|
|
$up -= $hh_in_s*$up_hh;
|
|
$up_mm = int($up/$mm_in_s);
|
|
$up -= $mm_in_s*$up_mm;
|
|
$up_ss = $up;
|
|
|
|
# Calculate skew of CLOCK_REALTIME vs CLOCK_MONOTONIC,
|
|
# and display skew if > 5% relative difference.
|
|
my $skew_ms = ($::tr_elapsed - $::tm_elapsed)*1000.0;
|
|
my $skew = "";
|
|
if (abs($skew_ms)/$::tm_elapsed > 50.0) {
|
|
$skew = sprintf " skew:%.3f ms", $skew_ms;
|
|
}
|
|
|
|
#schedtop -- 2014/03/03 02:00:21.357 dt:2050.003 ms ldavg:0.07, 0.09, 0.08 runq:1 blk:0 D:0 nproc:440 up:6:13:00:56 skew:0.001 ms
|
|
printf "%s %s -- ".
|
|
"%4d-%02d-%02d %02d:%02d:%02d.%03d ".
|
|
"dt:%.3f ms ".
|
|
"ldavg:%.2f, %.2f, %.2f runq:%d blk:%d D:%d nproc:%d ".
|
|
"up:%d:%02d:%02d:%02d %s\n",
|
|
$::TOOLNAME, $::VERSION,
|
|
1900+$year, 1+$mon, $mday, $hour, $min, $sec, $msec,
|
|
$::tm_elapsed*1000.0,
|
|
$::loadavg{'1'}, $::loadavg{'5'}, $::loadavg{'15'},
|
|
$::runq, $::num_blk, $::num_state_D, $::num_tasks,
|
|
$up_dd, $up_hh, $up_mm, $up_ss,
|
|
$skew;
|
|
|
|
return if (!($::print_host));
|
|
|
|
# After first print, disable print host information
|
|
$::print_host = 0;
|
|
|
|
# Get host specific information
|
|
my ($OSTYPE, $NODENAME, $OSRELEASE, $version, $MACHINE);
|
|
($OSTYPE, $NODENAME, $OSRELEASE, $version, $MACHINE) = POSIX::uname();
|
|
my ($NODETYPE, $SUBFUNCTION, $BUILDINFO) = ('-', '-', '-');
|
|
my ($SW_VERSION, $BUILD_ID) = ('-', '-');
|
|
|
|
# Get platform nodetype and subfunction
|
|
PLATFORM: {
|
|
my $file = "/etc/platform/platform.conf";
|
|
open(FILE, $file) || next;
|
|
while($_ = <FILE>) {
|
|
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
|
if (/^nodetype=(\S+)/) {
|
|
$NODETYPE = $1;
|
|
}
|
|
if (/^subfunction=(\S+)/) {
|
|
$SUBFUNCTION = $1;
|
|
}
|
|
}
|
|
close(FILE);
|
|
}
|
|
|
|
# Get loadbuild info
|
|
BUILD: {
|
|
my $file = "/etc/build.info";
|
|
open(FILE, $file) || next;
|
|
while($_ = <FILE>) {
|
|
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
|
if (/^SW_VERSION=\"([^"]+)\"/) {
|
|
$SW_VERSION = $1;
|
|
}
|
|
if (/^BUILD_ID=\"([^"]+)\"/) {
|
|
$BUILD_ID = $1;
|
|
}
|
|
}
|
|
close(FILE);
|
|
}
|
|
$BUILDINFO = join(' ', $SW_VERSION, $BUILD_ID);
|
|
|
|
# Parse /proc/cpuinfo to get specific processor info
|
|
my ($n_cpu, $model_name, $cpu_MHz) = (0, '-', 0);
|
|
CPUINFO: {
|
|
my $file = "/proc/cpuinfo";
|
|
open(FILE, $file) || croak "Cannot open file: $file ($!)";
|
|
while($_ = <FILE>) {
|
|
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
|
if (/^[Pp]rocessor\s+:\s+\d+/) {
|
|
$n_cpu++;
|
|
} elsif (/^model name\s+:\s+(.*)$/) {
|
|
$_ = $1; s/\s+/ /g;
|
|
$model_name = $_;
|
|
} elsif (/^cpu MHz\s+:\s+(\S+)/) {
|
|
$cpu_MHz = $1;
|
|
} elsif (/^bogomips\s+:\s+(\S+)/) {
|
|
$cpu_MHz = $1 if ($cpu_MHz == 0);
|
|
}
|
|
}
|
|
close(FILE);
|
|
}
|
|
|
|
printf " host:%s nodetype:%s subfunction:%s\n",
|
|
$NODENAME, $NODETYPE, $SUBFUNCTION;
|
|
printf " arch:%s processor:%s speed:%.0f #CPUs:%d\n",
|
|
$MACHINE, $model_name, $cpu_MHz, $n_cpu;
|
|
printf " %s %s build:%s\n", $OSTYPE, $OSRELEASE, $BUILDINFO;
|
|
|
|
}
|
|
|
|
# Parse and validate command line arguments
|
|
sub parse_schedtop_args {
|
|
(local *::arg_debug,
|
|
local *::arg_delay,
|
|
local *::arg_repeat,
|
|
local *::arg_period,
|
|
local *::arg_reset_hwm,
|
|
local *::arg_idle,
|
|
local *::arg_sort,
|
|
local *::arg_print,
|
|
local *::arg_watch_cmd,
|
|
local *::arg_watch_cgroup,
|
|
local *::arg_watch_only,
|
|
local *::arg_watch_quiet,
|
|
local *::arg_trig_delay,
|
|
) = @_;
|
|
|
|
# Local variables
|
|
my ($fail, $arg_help);
|
|
|
|
# Use the Argument processing module
|
|
use Getopt::Long;
|
|
|
|
# Print usage if no arguments
|
|
if (!@::ARGV) {
|
|
&Usage();
|
|
exit 0;
|
|
}
|
|
|
|
# Process input arguments
|
|
$fail = 0;
|
|
GetOptions(
|
|
"debug:i", \$::arg_debug,
|
|
"delay=f", \$::arg_delay,
|
|
"period=i", \$::arg_period,
|
|
"repeat=i", \$::arg_repeat,
|
|
"reset-hwm", \$::arg_reset_hwm,
|
|
"idle", \$::arg_idle,
|
|
"sort=s", \$::arg_sort,
|
|
"print=s", \$::arg_print,
|
|
"watch-cmd=s@", \@::arg_watch_cmd,
|
|
"watch-cgroup=s@", \@::arg_watch_cgroup,
|
|
"watch-only", \$::arg_watch_only,
|
|
"watch-quiet", \$::arg_watch_quiet,
|
|
"trig-delay=i", \$::arg_trig_delay,
|
|
"help|h", \$arg_help
|
|
) || GetOptionsMessage();
|
|
|
|
# Print help documentation if user has selected --help
|
|
&ListHelp() if (defined $arg_help);
|
|
|
|
# Validate options
|
|
if ((defined $::arg_repeat) && (defined $::arg_period)) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: cannot specify both --repeat and --period options.\n";
|
|
}
|
|
if ((defined $::arg_delay) && ($::arg_delay < 0.01)) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --delay %f is less than 0.01.\n",
|
|
$::arg_delay;
|
|
}
|
|
if ((defined $::arg_sort) && !(($::arg_sort eq 'cpu') || ($::arg_sort eq 'io'))) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --sort=$::arg_sort invalid; valid options are: cpu, io.\n";
|
|
}
|
|
if ((defined $::arg_print) && !(($::arg_print eq 'brief') || ($::arg_print eq 'full'))) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --print=$::arg_print invalid; valid options are: brief, full\n";
|
|
}
|
|
if ((defined $::arg_watch_only) && !(@::arg_watch_cmd || @::arg_watch_cgroup)) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --watch-only requires --watch-cmd or --watch-cgroup option.\n";
|
|
}
|
|
if ((defined $::arg_watch_quiet) && !(@::arg_watch_cmd || @::arg_watch_cgroup)) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --watch-quiet requires --watch-cmd or --watch-cgroup option.\n";
|
|
}
|
|
if ((defined $::arg_trig_delay) && !(@::arg_watch_cmd || @::arg_watch_cgroup)) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --trig-delay requires --watch-cmd or --watch-cgroup option.\n";
|
|
}
|
|
if ((defined $::arg_trig_delay) && ($::arg_trig_delay < 1)) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: --trig-delay %d is less than 1.\n",
|
|
$::arg_trig_delay;
|
|
}
|
|
if (@::arg_watch_cmd) {
|
|
my @cmds = @::arg_watch_cmd;
|
|
@::arg_watch_cmd = ();
|
|
for my $cmd (@cmds) {
|
|
push(@::arg_watch_cmd, split(',', $cmd));
|
|
}
|
|
}
|
|
if (@::arg_watch_cgroup) {
|
|
my @cgroups = @::arg_watch_cgroup;
|
|
@::arg_watch_cgroup = ();
|
|
for my $cgroup (@cgroups) {
|
|
push(@::arg_watch_cgroup, split(',', $cgroup));
|
|
}
|
|
}
|
|
|
|
if (@::ARGV) {
|
|
$fail = 1;
|
|
warn "$::TOOLNAME: Input error: not expecting these options: '@::ARGV'.\n";
|
|
}
|
|
|
|
# Set reasonable defaults
|
|
$::arg_delay ||= 1.0;
|
|
$::arg_repeat ||= 1;
|
|
if ($::arg_period) {
|
|
$::arg_repeat = $::arg_period / $::arg_delay;
|
|
} else {
|
|
$::arg_period = $::arg_delay * $::arg_repeat;
|
|
}
|
|
$::arg_sort ||= 'cpu';
|
|
$::arg_print ||= 'full';
|
|
$::arg_trig_delay ||= 0;
|
|
|
|
# Upon missing or invalid options, print usage
|
|
if ($fail == 1) {
|
|
&Usage();
|
|
exit 1;
|
|
}
|
|
}
|
|
|
|
# Print out a warning message and usage
|
|
sub GetOptionsMessage {
|
|
warn "$::TOOLNAME: Error processing input arguments.\n";
|
|
&Usage();
|
|
exit 1;
|
|
}
|
|
|
|
# Print out program usage
|
|
sub Usage {
|
|
printf "Usage: $::TOOLNAME OPTIONS\n";
|
|
printf " [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]\n";
|
|
printf " [--reset-hwm] [--idle] [--sort=<cpu|io>] [--print=<brief|full>]\n";
|
|
printf " [--watch-cmd=tid1,cmd1,cmd2,...] [--watch-cgroup=cgroup1,...]\n";
|
|
printf " [--watch-only] [--watch-quiet]\n";
|
|
printf " [--trig-delay=time]\n";
|
|
printf " [--help]\n";
|
|
|
|
printf "\n";
|
|
}
|
|
|
|
# Print tool help
|
|
sub ListHelp {
|
|
printf "$::TOOLNAME -- display per-task scheduling occupancy\n";
|
|
&Usage();
|
|
printf "Options: miscellaneous\n";
|
|
printf " --delay=<seconds> : output interval (seconds): default: 1.0\n";
|
|
printf " --repeat=<num> : number of repeat samples: default: 1\n";
|
|
printf " --period=<seconds> : overall tool duration (seconds): default: --\n";
|
|
printf " --reset-hwm : reset scheduling delay hi-water marks\n";
|
|
printf " --idle : specify printing of idle tasks\n";
|
|
printf " --sort=<cpu|io> : sort order, select from 'cpu' or 'io'\n";
|
|
printf " --print=<brief|full> : select 'brief' or 'full' fields to display\n";
|
|
printf("Watch specific tasks or commands:\n");
|
|
printf(" --watch-cmd=tid1,cmd1,... : watch specific tids or 'comm' names\n");
|
|
printf(" (matches from beginning of comm with partial name, eg, --watch-cmd=sirq)\n");
|
|
printf(" --watch-cgroup=cgroup1,... : watch specific cgroup names\n");
|
|
printf(" (matches from beginning of cgroup with partial name, eg, --watch-cgroup=sm)\n");
|
|
printf(" --watch-only : display only watched tasks (reduces impact of tool)\n");
|
|
printf(" --watch-quiet : suppress output after watch starts\n");
|
|
printf("Trigger crash dump via sysrq:\n");
|
|
printf " --trig-delay=time : trigger delay threshold (ms)\n";
|
|
printf " --help : this help\n";
|
|
exit 0;
|
|
}
|
|
|
|
1;
|