Page Menu
Home
Miraheze
Search
Configure Global Search
Log In
Paste
P166
(An Untitled Masterwork)
Active
Public
Actions
Authored by
Paladox
on Jan 4 2019, 16:36.
Edit Paste
Archive Paste
View Raw File
Subscribe
Mute Notifications
Flag For Later
Award Token
Tags
None
Referenced Files
F876826: raw.txt
Jan 4 2019, 16:36
2019-01-04 16:36:48 (UTC+0)
Subscribers
None
#!/usr/bin/ruby
# A simple nagios check that should be run as root an
# can check when the last run was done of puppet.
# It can also check fail counts and skip machines
# that are not enabled
#
# The script will use the puppet last_run-summary.yaml
# file to determine when last Puppet ran
require
'optparse'
require
'safe_yaml'
PRINT_FAILED_RESOURCES_NO
=
3
# Number of failed resources to print. Don't set this high
runlockfile
=
"/var/lib/puppet/state/agent_catalog_run.lock"
adminlockfile
=
"/var/lib/puppet/state/agent_disabled.lock"
summaryfile
=
"/var/lib/puppet/state/last_run_summary.yaml"
reportfile
=
"/var/lib/puppet/state/last_run_report.yaml"
enabled
=
true
lastrun
=
0
failcount
=
0
warn
=
0
crit
=
0
enabled_only
=
false
SafeYAML
::
OPTIONS
[
:default_mode
]
=
:safe
opt
=
OptionParser
.
new
opt
.
on
(
"--critical [CRIT]"
,
"-c"
,
Integer
,
"Critical staleness threshold, time in seconds"
)
do
|
f
|
crit
=
f
.
to_i
end
opt
.
on
(
"--warn [WARN]"
,
"-w"
,
Integer
,
"Warning staleness threshold, time in seconds"
)
do
|
f
|
warn
=
f
.
to_i
end
opt
.
on
(
"--only-enabled"
,
"-e"
,
"Only alert if Puppet is enabled"
)
do
enabled_only
=
true
end
opt
.
on
(
"--runlock-file [FILE]"
,
"-l"
,
"Location of the run lock file, default
#{
runlockfile
}
"
)
do
|
f
|
runlockfile
=
f
end
opt
.
on
(
"--adminlock-file [FILE]"
,
"-a"
,
"Location of the admin lock file, default
#{
adminlockfile
}
"
)
do
|
f
|
adminlockfile
=
f
end
opt
.
on
(
"--summary-file [FILE]"
,
"-s"
,
"Location of the summary file, default
#{
summaryfile
}
"
)
do
|
f
|
summaryfile
=
f
end
opt
.
on
(
"--report-file [FILE]"
,
"-r"
,
"Location of the report file, default
#{
reportfile
}
"
)
do
|
f
|
reportfile
=
f
end
opt
.
parse!
if
warn
.
zero?
||
crit
.
zero?
puts
"Please specify a warning and critical level"
exit
3
end
if
File
.
exists?
(
adminlockfile
)
enabled
=
false
disabled_message
=
YAML
.
load_file
(
adminlockfile
)
[
"disabled_message"
]
end
if
File
.
exists?
(
summaryfile
)
begin
summary
=
YAML
.
load_file
(
summaryfile
)
if
!
enabled
summary
[
"time"
][
"last_disabled_check"
]
=
Time
.
now
.
to_i
File
.
open
(
summaryfile
,
'w'
)
{
|
f
|
f
.
write
(
YAML
.
dump
summary
)
}
end
lastrun
=
summary
[
"time"
][
"last_run"
]
if
summary
[
"time"
].
key?
"last_disabled_check"
last_disabled_check
=
summary
[
"time"
][
"last_disabled_check"
]
end
# machines that outright failed to run like on missing dependencies
# are treated as huge failures. The yaml file will be valid but
# it wont have anything but last_run in it
if
summary
.
include?
(
"events"
)
failcount
=
summary
[
"events"
][
"failure"
]
else
failcount
=
:failed
end
rescue
failcount
=
:failed_to_parse_summary_file
summary
=
nil
end
else
failcount
=
:no_summary_file
end
def
time_ago
(
s
)
units
=
{
24
*
60
*
60
=>
'day'
,
60
*
60
=>
'hour'
,
60
=>
'minute'
,
1
=>
'second'
,
}
if
s
.
zero?
return
"0 seconds"
end
units
.
sort
.
reverse
.
each
do
|
len
,
unit
|
return
"
#{
s
/
len
}
#{
unit
}#{
's'
if
s
/
len
>
1
}
"
if
s
>=
len
end
"Indeterminate amount of time (see time_ago)"
end
if
failcount
==
:failed
puts
"CRITICAL: Catalog fetch fail. Either compilation failed or puppetmaster has issues"
exit
2
end
if
failcount
==
:failed_to_parse_summary_file
||
failcount
==
:no_summary_file
puts
"UNKNOWN: Failed to check. Reason is:
#{
failcount
}
"
exit
3
end
time_since_last_run
=
Time
.
now
.
to_i
-
lastrun
# If puppet has recently been reenabled, pretend that the time passed since the
# last run is less. That mutes some unwanted spam
if
last_disabled_check
&&
enabled
time_since_last_run
=
Time
.
now
.
to_i
-
last_disabled_check
end
human_time_since_last_run
=
time_ago
(
time_since_last_run
)
if
enabled_only
&&
enabled
==
false
puts
"OK: Puppet is currently disabled, not alerting. Last run
#{
human_time_since_last_run
}
ago with
#{
failcount
}
failures"
exit
0
end
if
!
enabled
puts
"WARNING: Puppet is currently disabled, message:
#{
disabled_message
}
, last run
#{
human_time_since_last_run
}
ago with
#{
failcount
}
failures"
exit
1
end
if
failcount
>
0
report
=
YAML
.
load_file
(
reportfile
)
failed_resources
=
report
[
"resource_statuses"
].
select
{
|
_
,
r
|
r
[
"failed"
]
}
.
map
{
|
_
,
r
|
r
[
"resource"
]
}
failed_resources
=
failed_resources
[
0
..
PRINT_FAILED_RESOURCES_NO
]
puts
"CRITICAL: Puppet has
#{
failcount
}
failures. "
+
"Last run
#{
human_time_since_last_run
}
ago with
#{
failcount
}
failures. "
+
"Failed resources (up to
#{
PRINT_FAILED_RESOURCES_NO
}
shown):
#{
failed_resources
.
join
','
}
"
exit
2
end
if
time_since_last_run
>=
crit
puts
"CRITICAL: Puppet last ran
#{
human_time_since_last_run
}
ago"
exit
2
end
if
time_since_last_run
>=
warn
puts
"WARNING: Puppet last ran
#{
human_time_since_last_run
}
ago"
exit
1
end
puts
"OK: Puppet is currently enabled, last run
#{
human_time_since_last_run
}
ago with
#{
failcount
}
failures"
exit
0
Event Timeline
Paladox
created this paste.
Jan 4 2019, 16:36
2019-01-04 16:36:48 (UTC+0)
Log In to Comment