Guard registration of process-wide metrics by existence of the requisite /proc entries

This commit is contained in:
Paul "LeoNerd" Evans 2016-10-19 15:34:38 +01:00
parent 981f852d54
commit 1b179455fc
1 changed files with 45 additions and 40 deletions

View File

@ -115,6 +115,10 @@ def render_all():
TICKS_PER_SEC = 100 TICKS_PER_SEC = 100
BYTES_PER_PAGE = 4096 BYTES_PER_PAGE = 4096
HAVE_PROC_STAT = os.path.exists("/proc/stat")
HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat")
HAVE_PROC_SELF_LIMITS = os.path.exists("/proc/self/limits")
rusage = None rusage = None
stats = None stats = None
fd_counts = None fd_counts = None
@ -122,13 +126,11 @@ fd_counts = None
# In order to report process_start_time_seconds we need to know the machine's # In order to report process_start_time_seconds we need to know the machine's
# boot time, because the value in /proc/self/stat is relative to this # boot time, because the value in /proc/self/stat is relative to this
boot_time = None boot_time = None
try: if HAVE_PROC_STAT:
with open("/proc/stat") as _procstat: with open("/proc/stat") as _procstat:
for line in _procstat: for line in _procstat:
if line.startswith("btime "): if line.startswith("btime "):
boot_time = int(line.split()[1]) boot_time = int(line.split()[1])
except IOError:
pass
TYPES = { TYPES = {
stat.S_IFSOCK: "SOCK", stat.S_IFSOCK: "SOCK",
@ -144,6 +146,7 @@ def update_resource_metrics():
global rusage global rusage
rusage = getrusage(RUSAGE_SELF) rusage = getrusage(RUSAGE_SELF)
if HAVE_PROC_SELF_STAT:
global stats global stats
with open("/proc/self/stat") as s: with open("/proc/self/stat") as s:
line = s.read() line = s.read()
@ -195,28 +198,34 @@ get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"
## New prometheus-standard metric names ## New prometheus-standard metric names
process_metrics = get_metrics_for("process"); process_metrics = get_metrics_for("process");
process_metrics.register_callback( if HAVE_PROC_SELF_STAT:
process_metrics.register_callback(
"cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC "cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC
) )
process_metrics.register_callback( process_metrics.register_callback(
"cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC "cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC
) )
process_metrics.register_callback( process_metrics.register_callback(
"cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC
) )
process_metrics.register_callback( process_metrics.register_callback(
"virtual_memory_bytes", lambda: int(stats[20]) "virtual_memory_bytes", lambda: int(stats[20])
) )
process_metrics.register_callback( process_metrics.register_callback(
"resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE
) )
process_metrics.register_callback( process_metrics.register_callback(
"open_fds", lambda: sum(fd_counts.values()) "open_fds", lambda: sum(fd_counts.values())
) )
def _get_max_fds(): process_metrics.register_callback(
"start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC
)
if HAVE_PROC_SELF_LIMITS:
def _get_max_fds():
with open("/proc/self/limits") as limits: with open("/proc/self/limits") as limits:
for line in limits: for line in limits:
if not line.startswith("Max open files "): if not line.startswith("Max open files "):
@ -225,13 +234,9 @@ def _get_max_fds():
return int(line.split()[3]) return int(line.split()[3])
return None return None
process_metrics.register_callback( process_metrics.register_callback(
"max_fds", lambda: _get_max_fds() "max_fds", lambda: _get_max_fds()
) )
process_metrics.register_callback(
"start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC
)
reactor_metrics = get_metrics_for("reactor") reactor_metrics = get_metrics_for("reactor")
tick_time = reactor_metrics.register_distribution("tick_time") tick_time = reactor_metrics.register_distribution("tick_time")