#!/usr/bin/perl -w

# Checks Cool Loop temperatures.
# Copyleft Michael Meier / RRZE 2010

# Usage hints, i.e. what you should put into your Nagios configuration
# define command {
#        command_name    check_coolloop.pl
#        command_line    $USER1$/check_coolloop.pl $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $HOSTADDRESS$
# }
#
# define service {
#        ....
#        service_description             intemp
#        check_command                   check_coolloop.pl!--in!--crit!65.0!--warn!55.0!
# }
#
# define service {
#        ....
#        service_description             outtemp
#        check_command                   check_coolloop.pl!--out!--crit!25.0!--warn!22.0!
# }
#
# define service {
#        ....
#        service_description             watersensor
#        check_command                   check_coolloop.pl!--water!
# }
#
# define service {
#        ....
#        service_description             smokesensor
#        check_command                   check_coolloop.pl!--smoke!
# }
#
# define service {
#        ....
#        service_description             fan1
#        check_command                   check_coolloop.pl!--fan1!
# }
# (fan2 - fan4 are available too)

# Where is snmpget?
$SNMPCMD = "/usr/bin/snmpget -v 2c -c public -O vq";

# Default warning resp. critical temperature (>=)
$warntemp = 22.0;
$crittemp = 30.0;

# How often do we try the SNMP query?
$retries = 3;

# Nagios return codes
$STATE_OK        = 0;
$STATE_WARNING   = 1;
$STATE_CRITICAL  = 2;
$STATE_UNKNOWN   = 3;
#$STATE_DEPENDENT = 4;

sub showhelp () {
  print("Syntax: $0 --out|--in|--water|--smoke [--warn deg] [--crit deg] hostname\n");
  print("Checks if the coolloop is reachable and if the in resp. out temperatures are in range.\n");
  print("--warn and --crit set the warning resp. crit temperature.\n");
  return;
}

$hn = undef; $mode = undef;
for ($i = 0; $i < @ARGV; $i++) {
  if      ($ARGV[$i] eq '--out') {
    $mode = 0;
  } elsif ($ARGV[$i] eq '--in') {
    $mode = 1;
  } elsif ($ARGV[$i] eq '--fan1') {
    $mode = 2;
  } elsif ($ARGV[$i] eq '--fan2') {
    $mode = 3;
  } elsif ($ARGV[$i] eq '--fan3') {
    $mode = 4;
  } elsif ($ARGV[$i] eq '--fan4') {
    $mode = 5;
  } elsif ($ARGV[$i] eq '--smoke') {
    $mode = 7;
  } elsif ($ARGV[$i] eq '--water') {
    $mode = 6;
  } elsif ($ARGV[$i] eq '--warn') {
    $i++;
    if ($i >= @ARGV) {
      print(STDERR "--warn requires an parameter.\n"); showhelp(); exit(1);
    }
    $warntemp = $ARGV[$i];
  } elsif ($ARGV[$i] eq '--crit') {
    $i++;
    if ($i >= @ARGV) {
      print(STDERR "--crit requires an parameter.\n"); showhelp(); exit(1);
    }
    $crittemp = $ARGV[$i];
  } else {
    if (defined($hn)) {
      print(STDERR "Unknown parameter $hn\n"); showhelp(); exit(1);
    }
    $hn = $ARGV[$i];
  }
}
unless (defined($hn)) {
  print(STDERR "no hostname given.\n"); showhelp(); exit(1);
}
unless (defined($mode)) {
  print(STDERR "no mode selected.\n"); showhelp(); exit(1);
}
if      ($mode == 0) { # Out
  $wholecmdline = "${SNMPCMD} ${hn} 1.3.6.1.4.1.2769.2.1.1.2.3.0 2>&1 |";
} elsif ($mode == 1) { # In
  $wholecmdline = "${SNMPCMD} ${hn} 1.3.6.1.4.1.2769.2.1.1.1.3.0 2>&1 |";
} elsif ($mode == 2) { # Status Luefter 1
  $wholecmdline = "${SNMPCMD} ${hn} 1.3.6.1.4.1.2769.2.1.2.4.1.0 2>&1 |";
} elsif ($mode == 3) { # Status Luefter 2
  $wholecmdline = "${SNMPCMD} ${hn} 1.3.6.1.4.1.2769.2.1.2.4.2.0 2>&1 |";
} elsif ($mode == 4) { # Status Luefter 3
  $wholecmdline = "${SNMPCMD} ${hn} 1.3.6.1.4.1.2769.2.1.2.4.3.0 2>&1 |";
} elsif ($mode == 5) { # Status Luefter 4
  $wholecmdline = "${SNMPCMD} ${hn} 1.3.6.1.4.1.2769.2.1.2.4.4.0 2>&1 |";
} elsif ($mode == 6) { # Status Wassermelder
  $wholecmdline = "${SNMPCMD} ${hn} 1.3.6.1.4.1.2769.2.1.2.4.5.0 2>&1 |";
} elsif ($mode == 7) { # Status Rauchmelder
  $wholecmdline = "${SNMPCMD} ${hn} 1.3.6.1.4.1.2769.2.1.2.4.6.0 2>&1 |";
} else {
  print("Internal error: Invalid mode.\n"); exit(1);
}
for ($r = 0; $r < $retries; $r++) {
  undef($cmd);
  unless (open($cmd, $wholecmdline)) {
    print("Could not read from snmpget command.\n");
    exit($STATE_UNKNOWN);
  }
  $temp = undef;
  while ($ll = <$cmd>) {
    $ll =~ s/[\r\n]//g;
    if ($ll =~ m/^\d+$/) {
      $temp = $ll / 10.0;  # For temperature measurements
      $status = $ll; # For OK / Fail values.
    }
  }
  close($cmd);
  if (defined($temp)) {
    last;
  }
  sleep(int(rand(10))); # Random delay
}
unless (defined($temp)) {
  print("UNKNOWN: no valid reply from cool loop (after $r retries).\n");
  exit($STATE_UNKNOWN);
}
if ($mode < 2) { # One of the temperature returning commands
  if ($temp < 5.0) {
    print("UNKNOWN: no valid reply from cool loop (sanity check failed).\n");
    exit($STATE_UNKNOWN);
  }
  if ($temp > 120.0) {
    print("UNKNOWN: no valid reply from cool loop (sanity check failed).\n");
    exit($STATE_UNKNOWN);
  }
  if ($temp >= $crittemp) {
    print("CRITICAL: Temperature $temp >= critical limit $crittemp | temp=$temp;\n");
    exit($STATE_CRITICAL);
  }
  if ($temp >= $warntemp) {
    print("WARNING: Temperature $temp >= warning limit $warntemp | temp=$temp;\n");
    exit($STATE_WARNING);
  }
  print("OK: Temperature $temp | temp=$temp;\n");
  exit($STATE_OK);
} else { # Just returning 0/1 status.
  if ($status eq '0') {
    print("OK: sensor not triggered (status = $status) | status=$status;\n");
    exit($STATE_OK);
  }
  if ($status eq '1') {
    print("CRITICAL: sensor triggered (status = $status) | status=$status;\n");
    exit($STATE_CRITICAL);
  }
  print("UNKNOWN: Cannot interpret status value $status | status=$status;\n");
  exit($STATE_UNKNOWN);
}
