2 actions – HP Insight Cluster Management Utility User Manual
Page 97
#This is a CMU action and alerts description file
#=============================================================
#
#
ACTIONS
#
#
#
#-------------KERNEL VERSION, RELEASE, BIOS VERSIONS---------#
kernel_version "kernel version" 9999999 string Instantaneous release uname -r
#-------------CPU--------------------------------------------#
#
#- Native
cpuload "% cpu load (raw)" 1 numerical MeanOverTime 100 % awk '/cpu /
{printf"%d\n",$2+$3+$4}' /proc/stat
#- Collectl
#cpuload "% cpu load (normalized)" 1 numerical Instantaneous 100 % COLLECTL (cputotals.user) + (cputotals.nice)
+ (cputotals.sys)
#cpuload "% cpu load (normalized)" 1 numerical Instantaneous 100 % COLLECTL 100 - (cputotals.idle)
#
#-------------MEMORY-----------------------------------------#
#
#- Native
#memory_used "% memory used" 1 numerical Instantaneous 100 % free | awk '
BEGIN { freemem=0; totalmemory=0; } /cache:/ { freemem=$4; } /Mem:/ { totalmemory=$2; } END { printf "%d\n",
(((totalmemory-freemem)*100)/totalmemory); }'
#
#
#
ALERTS
#
#
#cpu_freq_alert "CPU frequency is not nominal" 1 24 100 < % sh -c "b=`cat
/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq`;a=`cat
/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq`;echo 100 \* \$b / \$a |bc"
login_alert "Someone is connected" 3 24 0 > login(s) w -h | wc -l
root_fs_used "The / filesystem is above 90% full" 4 24 90 > % df / | awk '{
if ($6=="/") print $5}' | cut -f 1 -d % -
#reboot_alert "Node rebooted" 4 24 5 < rebooted awk '{printf "%.1f\n",$1/60}' /proc/uptime
# The line below allows to report MCE errors; be careful for possible false positives
#mce_alert "The kernel has logged MCE errors; please check /var/log/mcelog" 5 60 1 > lines wc -l
/var/log/mcelog |cut -f 1 -d ' '
#
#
ALERT_REACTIONS
#
#
#login_alert "Sending mail to root" ReactOnRaise echo -e "Alert 'CMU_ALERT_NAME' raised on node(s)
CMU_ALERT_NODES. \n\nDetails:\n`/opt/cmu/bin/pdsh -w CMU_ALERT_NODES 'w -h'`" | mailx -s "CMU: Alert
'CMU_ALERT_NAME' raised." root
#
#root_fs_used "Sending mail to root" ReactOnRaise echo -e "Alert 'CMU_ALERT_NAME' raised on node(s)
CMU_ALERT_NODES. \n\nDetails:\n`/opt/cmu/bin/pdsh -w CMU_ALERT_NODES 'df /'`" | mailx -s "CMU: Alert
'CMU_ALERT_NAME' raised!" root
#
#reboot_alert "Sending mail to root" ReactOnRaise echo -e "Alert 'CMU_ALERT_NAME' raised on node(s)
CMU_ALERT_NODES. \n\nDetails:\n`/opt/cmu/bin/pdsh -w CMU_ALERT_NODES 'uptime'`" | mailx -s "CMU: Alert
'CMU_ALERT_NAME' raised." root
#
Lines prefixed with # are ignored. Lines cannot begin with a leading white space. Each line
corresponds to a sensor, alert, or an alert reaction. Sensors are placed at the beginning of the
file, between the ACTIONS and ALERTS tags. Each alert is in the middle of the file between the
ALERTS and ALERT_REACTIONS tags, and each alert reaction is at the end of the file below the
ALERT_REACTIONS tag.
Most sensors have both a “native” line and a commented “collectl” line. To use collectl for
collecting monitoring data, enable it by removing the comment from the corresponding sensor line.
NOTE:
Using collectl requires additional steps described in
6.5.2 Actions
Each action contains the following fields:
Name
The name of the sensor as it appears in the Java GUI. It must consist of letters only.
6.5 Customizing HP Insight CMU monitoring, alerting, and reactions
97