Rev 77 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2 | daniel-mar | 1 | <?php |
2 | |||
3 | /* |
||
4 | * VNag - Nagios Framework for PHP |
||
5 | * Developed by Daniel Marschall, ViaThinkSoft <www.viathinksoft.com> |
||
6 | * Licensed under the terms of the Apache 2.0 license |
||
7 | * |
||
77 | daniel-mar | 8 | * Revision 2023-10-13 |
2 | daniel-mar | 9 | */ |
10 | |||
11 | declare(ticks=1); |
||
12 | |||
13 | class MdStatCheck extends VNag { |
||
14 | public function __construct() { |
||
15 | parent::__construct(); |
||
16 | |||
17 | if ($this->is_http_mode()) { |
||
18 | // Don't allow the standard arguments via $_REQUEST |
||
19 | $this->registerExpectedStandardArguments(''); |
||
20 | } else { |
||
21 | $this->registerExpectedStandardArguments('Vhtv'); |
||
22 | } |
||
23 | |||
24 | $this->getHelpManager()->setPluginName('vnag_mdstat'); |
||
79 | daniel-mar | 25 | $this->getHelpManager()->setVersion('2023-10-13'); |
2 | daniel-mar | 26 | $this->getHelpManager()->setShortDescription('This plugin checks the contents of /proc/mdstat and warns when a harddisk has failed.'); |
27 | $this->getHelpManager()->setCopyright('Copyright (C) 2011-$CURYEAR$ Daniel Marschall, ViaThinkSoft.'); |
||
28 | $this->getHelpManager()->setSyntax('$SCRIPTNAME$ (no additional arguments expected)'); |
||
29 | $this->getHelpManager()->setFootNotes('If you encounter bugs, please contact ViaThinkSoft at www.viathinksoft.com'); |
||
30 | } |
||
31 | |||
32 | private function getDisks($device) { |
||
33 | $disks = glob("/sys/block/$device/md/dev-*"); |
||
34 | foreach ($disks as &$disk) { |
||
35 | $ary = explode('/', $disk); |
||
36 | $disk = substr(array_pop($ary), 4); |
||
37 | } |
||
38 | return $disks; |
||
39 | } |
||
40 | |||
41 | private function raidLevel($device) { |
||
42 | $level_file = "/sys/block/$device/md/level"; |
||
43 | if (!file_exists($level_file)) { |
||
44 | throw new VNagException("Kernel too old to fetch RAID level of array $device"); |
||
45 | } |
||
77 | daniel-mar | 46 | $cont = @file_get_contents($level_file); |
47 | if ($cont === false) { |
||
48 | throw new VNagException("Cannot read $level_file"); |
||
49 | } |
||
50 | $level = file_exists($level_file) ? trim($cont) : 'RAID?'; |
||
2 | daniel-mar | 51 | return $level; |
52 | } |
||
53 | |||
54 | private function raidState($device) { |
||
74 | daniel-mar | 55 | // mdadm outputs "clean, degraded", but /sys/block/md0/md/array_state only outputs "clean" |
56 | $output = []; |
||
57 | exec("mdadm --detail /dev/".escapeshellarg($device)." | grep -e '^\s*State : '", $output, $ec); |
||
58 | if ($ec == 0) { |
||
59 | $state = trim(implode("\n", $output)); |
||
60 | $state = trim(explode(':', $state)[1]); |
||
61 | return $state; |
||
62 | } |
||
63 | |||
64 | // Fallback |
||
2 | daniel-mar | 65 | $state_file = "/sys/block/$device/md/array_state"; |
66 | if (!file_exists($state_file)) { |
||
67 | throw new VNagException("Kernel too old to fetch state of array $device"); |
||
68 | } |
||
77 | daniel-mar | 69 | $cont = @file_get_contents($state_file); |
70 | if ($cont === false) { |
||
71 | throw new VNagException("Cannot read $state_file"); |
||
72 | } |
||
73 | $state = trim($cont); |
||
2 | daniel-mar | 74 | return $state; |
75 | } |
||
76 | |||
77 | private function check_disk_state($array, $disk) { |
||
78 | $disk_state_file = "/sys/block/$array/md/dev-$disk/state"; |
||
79 | if (!file_exists($disk_state_file)) { |
||
80 | throw new VNagException("Kernel too old to fetch state of disk $array:$disk"); |
||
81 | } |
||
77 | daniel-mar | 82 | $cont = @file_get_contents($disk_state_file); |
83 | if ($cont === false) { |
||
84 | throw new VNagException("Cannot read $disk_state_file"); |
||
85 | } |
||
86 | $disk_states = trim($cont); |
||
2 | daniel-mar | 87 | $disk_state_ary = explode(',', $disk_states); |
88 | $disk_state_ary = array_map('trim', $disk_state_ary); |
||
89 | |||
90 | $status = VNag::STATUS_OK; |
||
91 | $verbosity = VNag::VERBOSITY_ADDITIONAL_INFORMATION; |
||
92 | |||
93 | foreach ($disk_state_ary as $disk_state) { |
||
94 | // https://www.kernel.org/doc/html/v4.15/admin-guide/md.html |
||
95 | // CRIT faulty: device has been kicked from active use due to a detected fault, or it has unacknowledged bad blocks |
||
96 | // OK in_sync: device is a fully in-sync member of the array |
||
97 | // OK writemostly: device will only be subject to read requests if there are no other options. This applies only to raid1 arrays. |
||
98 | // CRIT blocked: device has failed, and the failure hasn.t been acknowledged yet by the metadata handler. Writes that would write to this device if it were not faulty are blocked. |
||
99 | // WARN spare: device is working, but not a full member. This includes spares that are in the process of being recovered to |
||
100 | // WARN write_error: device has ever seen a write error. |
||
101 | // WARN want_replacement: device is (mostly) working but probably should be replaced, either due to errors or due to user request. |
||
102 | // OK replacement: device is a replacement for another active device with same raid_disk. |
||
103 | |||
104 | if (($disk_state == 'faulty') || ($disk_state == 'blocked')) { |
||
105 | $status = max($status, VNag::STATUS_CRITICAL); |
||
106 | $verbosity = min($verbosity, VNag::VERBOSITY_SUMMARY); |
||
107 | } |
||
108 | if (($disk_state == 'spare') || ($disk_state == 'write_error') || ($disk_state == 'want_replacement')) { |
||
109 | $status = max($status, VNag::STATUS_WARNING); |
||
110 | $verbosity = min($verbosity, VNag::VERBOSITY_SUMMARY); |
||
111 | } |
||
112 | } |
||
113 | |||
114 | return array($status, $verbosity, $disk_states); |
||
115 | } |
||
116 | |||
117 | private function get_raid_arrays() { |
||
118 | $arrays = array(); |
||
75 | daniel-mar | 119 | $devices = glob('/dev/md/'.'*'); |
2 | daniel-mar | 120 | foreach ($devices as $device) { |
121 | $ary = explode('/', $device); |
||
122 | $arrays[] = 'md'.array_pop($ary); |
||
123 | } |
||
124 | return $arrays; |
||
125 | } |
||
126 | |||
127 | protected function cbRun() { |
||
128 | $disks_total = 0; |
||
129 | $disks_critical = 0; |
||
130 | $disks_warning = 0; |
||
131 | |||
132 | $arrays = $this->get_raid_arrays(); |
||
133 | foreach ($arrays as $array) { |
||
134 | $level = $this->raidLevel($array); |
||
135 | $state = $this->raidState($array); |
||
136 | |||
75 | daniel-mar | 137 | // https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/tree/Detail.c#n491 |
138 | if (stripos($state, ', FAILED') !== false) $this->setStatus(VNag::STATUS_CRITICAL); |
||
139 | if (stripos($state, ', degraded') !== false) $this->setStatus(VNag::STATUS_CRITICAL); |
||
74 | daniel-mar | 140 | |
2 | daniel-mar | 141 | $disk_texts = array(); |
142 | $verbosity = VNag::VERBOSITY_ADDITIONAL_INFORMATION; |
||
143 | $disks = $this->getDisks($array); |
||
144 | foreach ($disks as $disk) { |
||
145 | $disks_total++; |
||
146 | list($status, $verbosity_, $disk_states) = $this->check_disk_state($array, $disk); |
||
147 | $verbosity = min($verbosity, $verbosity_); |
||
148 | $this->setStatus($status); |
||
149 | if ($status == VNag::STATUS_WARNING) $disks_warning++; |
||
150 | if ($status == VNag::STATUS_CRITICAL) $disks_critical++; |
||
151 | $status_text = VNagLang::status($status, VNag::STATUSMODEL_SERVICE); |
||
152 | $disk_texts[] = "$disk ($status_text: $disk_states)"; |
||
153 | } |
||
154 | |||
155 | # Example output: |
||
156 | # Array md0 (raid1, degraded): sda1 (Warning: faulty, blocked), sdb1 (OK: in_sync) |
||
157 | $this->addVerboseMessage("Array $array ($level, $state): ".implode(', ', $disk_texts), $verbosity); |
||
158 | } |
||
159 | |||
160 | $this->setHeadline(sprintf('%s disks in %s arrays (%s warnings, %s critical)', $disks_total, count($arrays), $disks_warning, $disks_critical)); |
||
161 | } |
||
162 | } |