Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • stustanet/temperature-daemon
  • roman/temperature-daemon
  • 007638/temperature-daemon
3 results
Show changes
......@@ -3,6 +3,8 @@ from email.mime.text import MIMEText
from email.utils import formatdate
import smtplib
from . import Plugin
UNKNOWN_SENSOR_SUBJECT = "WARNING: Unconfigured Sensor ID: {owid}"
UNKNOWN_SENSOR_BODY = """Hello Guys,
......@@ -71,18 +73,29 @@ I will try to fix this issue by reconnecting...
Regards, Temperature
"""
SENSOR_TEMPERATURE_WARNING_SUBJECT = "Temperaturwarnung Serverraum"
SENSOR_TEMPERATURE_WARNING_BODY = """Hi Guys,
Die Temperaturen im Serverraum werden langsam Bedenklich:
def init(monitor):
"""
Plugin initialization method to be called from the outside
"""
return PluginMail(monitor)
{temperatures}
Auslöser: {reason}
Aktuelle Temperaturen:
{alltemperatures}
Bitte haltet die Temperaturen im Auge und fahrt eventuell heiß laufende Server herunter
class PluginMail:
with love,
Temperator"""
class Mail(Plugin):
"""
Handle all the mail sending stuff
"""
def __init__(self, monitor):
self.monitor = monitor
self.config = self.monitor.config
......@@ -108,14 +121,14 @@ class PluginMail:
# Ratelimit the emails
time_since_last_mail = time.time() - self._mail_rate_limit.get(subject, 0)
if time_since_last_mail < int(self.config['mail']['min_delay_between_messages']):
print("Not sending due to ratelimiting")
print("Not sending due to ratelimiting: %i", time_since_last_mail)
return
print("Body: {}".format(body))
self._mail_rate_limit[subject] = time.time()
smtp = smtplib.SMTP("mail.stusta.mhn.de")
#smtp.sendmail(msg['From'], recipients, msg.as_string())
smtp.sendmail(msg['From'], recipients, msg.as_string())
smtp.quit()
async def err_nodata(self, **kwargs):
......@@ -143,26 +156,7 @@ class PluginMail:
SENSOR_MEASUREMENT_MISSED_SUBJECT,
SENSOR_MEASUREMENT_MISSED_BODY.format(**kwargs))
async def temperature_warning(self, source, urgent=False, **kwargs):
subject = "Temperaturwarnung Serverraum"
body = """Hi Guys,
Die Temperaturen im Serverraum werden langsam Bedenklich:
{temperatures}
Auslöser: {reason}
Aktuelle Temperaturen:
{alltemperatures}
Bitte haltet die Temperaturen im Auge und fahrt eventuell heiß laufende Server herunter
with love,
Temperator"""
if source == "tempdiff":
temperatures = "{name1}:{temp1}\n{name2}:{temp2}".format(**kwargs)
reason = "Differenztemperatur: {tempdiff}".format(**kwargs)
......@@ -173,8 +167,13 @@ Temperator"""
alltemperatures = '\n'.join([
"{}: {}".format(sensor.name, sensor.temperature) if sensor.valid
else "{}: INVALID".format(sensor.name)
for sensor in self.monitor.sensors.values() ])
for sensor in self.monitor.sensors.values()])
await self.send_mail(subject, body.format(
temperatures=temperatures, reason=reason, alltemperatures=alltemperatures),
urgent=urgent)
await self.send_mail(
SENSOR_TEMPERATURE_WARNING_SUBJECT,
SENSOR_TEMPERATURE_WARNING_BODY.format(
temperatures=temperatures,
reason=reason,
alltemperatures=alltemperatures),
urgent=urgent
)
import re
import asyncio
from prometheus_client import start_http_server, Gauge
from . import Plugin
stats_name_re = re.compile(r'^temperature-(?P<group>\w+)-(?P<type>\w+)$')
class Prometheus(Plugin):
def __init__(self, monitor):
self.loop = asyncio.get_event_loop()
self.config = monitor.config
self.last_store = 0
self.monitor = monitor
self.sensor_metrics = Gauge(
name=self.config["prometheus"]["sensor_metric_name"],
documentation="Container Temperature Measurements",
labelnames=["sensor"]
)
self.aggregated_metrics = Gauge(
name=self.config["prometheus"]["aggregated_metric_name"],
documentation="Container Temperature Aggregations",
labelnames=["group", "type"]
)
start_http_server(
addr=self.config["prometheus"].get('address', 'localhost'),
port=int(self.config["prometheus"]["port"])
)
print("started prometheus http server")
async def send_stats_graph(self, graph, stattype, stattime, statval):
"""
to be called as a plugin callback to export aggregated measurements
"""
m = stats_name_re.match(stattype)
if not m:
return
self.aggregated_metrics.labels(group=m.group('group'), type=m.group('type')).set(statval)
async def sensor_update(self):
"""
Receive sensor data to store them regularely into collectd
"""
for sensor in self.monitor.sensors.values():
if sensor.valid:
self.sensor_metrics.labels(sensor=sensor.name).set(sensor.temperature)
import time
def init(monitor):
""" Plugin interface method """
return PluginWarning(monitor)
from . import Plugin
class PluginWarning:
class Warnings(Plugin):
"""
Generate all kind of warnings whenever needed and observe the sensor
if they see a problematic situation in the container
"""
def __init__(self, monitor):
self.monitor = monitor
self.revmapping = {
sensor.name : sensor
sensor.name: sensor
for sensor in self.monitor.sensors.values()
}
......@@ -44,7 +44,7 @@ class PluginWarning:
return [], 0, 0, 0, 0
avg = sum(sensor.temperature for sensor in sensors) / len(sensors)
var = sum((sensor.temperature - avg)**2 for sensor in sensors) / len(sensors)
var = sum((sensor.temperature - avg) ** 2 for sensor in sensors) / len(sensors)
sensormin = +9999
sensormax = -9999
......@@ -56,7 +56,6 @@ class PluginWarning:
return sensors, sensormin, sensormax, avg, var
async def sensor_update(self):
"""
First generate stats and relay them to the collectd module, then use these stats
......@@ -74,30 +73,30 @@ class PluginWarning:
if floor_sensors:
await self.monitor.call_plugin(
"send_stats_graph", graph="stats",
stattype="temperature-floormin", stattime=now, statval=floor_min)
stattype="temperature-floor-min", stattime=now, statval=floor_min)
await self.monitor.call_plugin(
"send_stats_graph", graph="stats",
stattype="temperature-floormax", stattime=now, statval=floor_max)
stattype="temperature-floor-max", stattime=now, statval=floor_max)
await self.monitor.call_plugin(
"send_stats_graph", graph="stats",
stattype="temperature-flooravg", stattime=now, statval=floor_avg)
stattype="temperature-floor-avg", stattime=now, statval=floor_avg)
await self.monitor.call_plugin(
"send_stats_graph", graph="stats",
stattype="temperature-floorvar", stattime=now, statval=floor_var)
stattype="temperature-floor-var", stattime=now, statval=floor_var)
if ceil_sensors:
await self.monitor.call_plugin(
"send_stats_graph", graph="stats",
stattype="temperature-ceilmin", stattime=now, statval=ceil_min)
stattype="temperature-ceil-min", stattime=now, statval=ceil_min)
await self.monitor.call_plugin(
"send_stats_graph", graph="stats",
stattype="temperature-ceilmax", stattime=now, statval=ceil_max)
stattype="temperature-ceil-max", stattime=now, statval=ceil_max)
await self.monitor.call_plugin(
"send_stats_graph", graph="stats",
stattype="temperature-ceilavg", stattime=now, statval=ceil_avg)
stattype="temperature-ceil-avg", stattime=now, statval=ceil_avg)
await self.monitor.call_plugin(
"send_stats_graph", graph="stats",
stattype="temperature-ceilvar", stattime=now, statval=ceil_var)
stattype="temperature-ceil-var", stattime=now, statval=ceil_var)
if floor_sensors and ceil_sensors:
# Else we already have sent warning messages for broken sensors
......@@ -105,14 +104,13 @@ class PluginWarning:
tempdiff = ceil_avg - floor_avg
await self.monitor.call_plugin(
"send_stats_graph", graph="stats",
stattype="temperature-floor_ceil_diff", stattime=now, statval=tempdiff)
stattype="temperature-floor_ceil-diff", stattime=now, statval=tempdiff)
print("floor: min {:05.2f} max {:05.2f} avg {:05.2f} var {:05.2f}".format(
floor_min, floor_max, floor_avg, floor_var))
print("ceil: min {:05.2f} max {:05.2f} avg {:05.2f} var {:05.2f}".format(
ceil_min, ceil_max, ceil_avg, ceil_var))
# Here comes the warning magic
# Critical: ceiling temperature > threshold (sane default: 45)
......@@ -132,7 +130,7 @@ class PluginWarning:
# Warning: temperature difference > threshold (sane default: 17)
if ceil_max > int(self.warning_conf['min_ceiling_warning']):
if tempdiff > int(self.warning_conf['floor_ceiling_diff']):
if tempdiff > int(self.warning_conf['floor_ceiling_diff']):
await self.monitor.call_plugin("temperature_warning",
source="tempdiff",
name1="floor",
......@@ -140,4 +138,3 @@ class PluginWarning:
temp1=floor_avg,
temp2=ceil_avg,
tempdiff=tempdiff)
......@@ -9,6 +9,7 @@ under the terms as stated in the LICENSE.md file.
Changelog:
2018-08 jotweh: reimplemented using a micropython-esp32
2020-04 milo: added prometheus plugin
Open issues:
......@@ -20,31 +21,34 @@ import asyncio
import configparser
import sys
import time
import importlib
from datetime import datetime
from pathlib import Path
import serial_asyncio
import serial
from .plugins import PLUGINS
class Sensor:
"""
One instance as sensor posing as measurementproxy
One instance as sensor posing as measurement proxy
"""
def __init__(self, config, owid):
self.temperature = None
self.last_update = 0
self.calibration = 0
self.valid = True
try:
if owid in config:
self.name = config[owid]['name']
self.calibration = config[owid]['calibration']
else:
print("Invalid Config: missing section {}".format(owid))
except KeyError as exc:
print("Invalid Config: for {}: {}".format(owid, exc))
raise
if owid not in config:
print(f"Invalid Config: missing section {owid}")
return
if 'name' not in config[owid] or 'calibration' not in config[owid]:
print(f"Invalid Config for: {owid}")
raise RuntimeError(f"Invalid Config for: {owid}")
self.name = config[owid]['name']
self.calibration = config[owid]['calibration']
def update(self, temperature):
"""
......@@ -53,6 +57,7 @@ class Sensor:
self.temperature = float(temperature)
self.last_update = time.time()
class TempMonitor:
"""
Interact with the esp-one-wire interface that sends:
......@@ -80,6 +85,7 @@ class TempMonitor:
# Test if all necessary config fields are set, that are not part of the normal
# startup
configtest = [
self.config['general']['plugins'],
self.config['collectd']['hostname'],
self.config['collectd']['interval'],
self.config['mail']['from'],
......@@ -95,7 +101,7 @@ class TempMonitor:
print("connecting to", self.config['serial']['port'])
for owid in self.config:
# Skip all known and predefined sections
if owid in ['DEFAULT', 'serial', 'collectd', 'mail', 'warning']:
if owid in ['DEFAULT', 'serial', 'collectd', 'mail', 'warning', 'prometheus', 'general']:
continue
self.sensors[owid] = Sensor(self.config, owid)
self._run_task = loop.create_task(self.run())
......@@ -104,10 +110,16 @@ class TempMonitor:
"""
Connect to the ESP chip
"""
self._reader, self._writer = await serial_asyncio.open_serial_connection(
url=self.config['serial']['port'],
baudrate=self.config['serial']['baudrate'],
loop=self.loop)
try:
self._reader, self._writer = await serial_asyncio.open_serial_connection(
url=self.config['serial']['port'],
baudrate=self.config['serial']['baudrate'],
loop=self.loop)
except serial.SerialException:
print("Connection failed!")
self.loop.stop()
raise
# upon startup we only see garbage. (micropython starting up),
# also it will produce warnings if the recording is started in the middle
# of a message, so wait until the end of a message block to start the game
......@@ -127,17 +139,23 @@ class TempMonitor:
await self.reconnect()
last_valid_data_received = time.time()
line = ""
reconnected_on_error = False
while True:
# Wait for the next line
if time.time() - last_valid_data_received > 1800:
self.call_plugin("err_no_valid_data", last_line=line)
if time.time() - last_valid_data_received > 10:
await self.call_plugin("err_no_valid_data", last_line=line)
if not reconnected_on_error:
reconnected_on_error = True
await self.reconnect()
try:
line = await asyncio.wait_for(
self._reader.readline(),
timeout=int(self.config['serial']['timeout']))
print("Received: ", line)
except asyncio.TimeoutError:
print("No Data")
await self.call_plugin("err_nodata")
continue
except serial.SerialException as exc:
......@@ -148,14 +166,17 @@ class TempMonitor:
try:
line = line.decode('ascii').strip()
except UnicodeError:
print("Unicode error")
continue
#print("recv:", line)
# print("recv:", line)
if line == '':
# Block has ended
print("Done block, storing sensors")
await self.store_sensors()
print("Done")
continue
# Try to parse the line
try:
owid, temp = line.split(' ')
......@@ -166,15 +187,18 @@ class TempMonitor:
## we have at least a valid line
last_valid_data_received = time.time()
reconnected_on_error = False
sensor = self.sensors.get(owid, None)
if not sensor:
# If the sensor is new - notify the operators
print("Unknown sensor")
await self.call_plugin("err_unknown_sensor",
config=self._configname,
owid=owid,
temp=temp)
elif temp > 1000 or temp < -1000:
print("Sensor invalid")
sensor.valid = False
# if the sensor is giving bullshit data - notify the operators
await self.call_plugin("err_problem_sensor",
......@@ -230,37 +254,27 @@ class TempMonitor:
self._last_store = time.time()
def setup_plugin(filename, plugin):
"""
Setup and fix plugins
"""
if not getattr(plugin, "name", None):
plugin.name = filename
def main():
"""
Start the tempmonitor
"""
loop = asyncio.get_event_loop()
configfile = "/etc/temperature/tempermon.ini"
configfile = "/etc/tempermonitor.ini"
if len(sys.argv) == 2:
configfile = sys.argv[1]
print("Configuring temperature monitoring system from {}.".format(configfile))
print(f"Configuring temperature monitoring system from {configfile}.")
monitor = TempMonitor(loop, configfile)
plugin_path = Path(__file__).resolve().parent / "plugins"
print("Loading plugins from {}".format(plugin_path))
for filename in plugin_path.glob("*.py"):
if (plugin_path / filename).exists():
print("loading {}".format(filename.name))
modname = "plugins." + filename.name.split('.')[0]
module = importlib.import_module(modname)
plugin = module.init(monitor)
setup_plugin(filename, plugin)
monitor.plugins.append(plugin)
print("Loaded: {}".format(plugin.name))
active_plugins = monitor.config["general"]["plugins"].split(",")
print(f"Active plugins: {active_plugins}")
for plugin in active_plugins:
if plugin in PLUGINS:
p = PLUGINS[plugin](monitor)
monitor.plugins.append(p)
print(f"Loaded plugin: {plugin}")
try:
loop.run_forever()
......@@ -269,5 +283,3 @@ def main():
finally:
loop.run_until_complete(monitor.teardown())
if __name__ == "__main__":
main()