When daemonizing, restart synapse process if it dies

This commit is contained in:
Erik Johnston 2015-09-29 11:30:19 +01:00
parent 301141515a
commit ce7051df61
1 changed files with 93 additions and 49 deletions

View File

@ -71,6 +71,8 @@ from synapse import events
from daemonize import Daemonize from daemonize import Daemonize
import twisted.manhole.telnet import twisted.manhole.telnet
from multiprocessing import Process
import synapse import synapse
import contextlib import contextlib
@ -78,6 +80,7 @@ import logging
import os import os
import re import re
import resource import resource
import signal
import subprocess import subprocess
import time import time
@ -368,15 +371,16 @@ def change_resource_limit(soft_file_no):
logger.warn("Failed to set file limit: %s", e) logger.warn("Failed to set file limit: %s", e)
def setup(config_options): def load_config(config_options):
""" """
Args: Args:
config_options_options: The options passed to Synapse. Usually config_options_options: The options passed to Synapse. Usually
`sys.argv[1:]`. `sys.argv[1:]`.
Returns: Returns:
HomeServer HomeServerConfig
""" """
config = HomeServerConfig.load_config( config = HomeServerConfig.load_config(
"Synapse Homeserver", "Synapse Homeserver",
config_options, config_options,
@ -385,9 +389,17 @@ def setup(config_options):
config.setup_logging() config.setup_logging()
# check any extra requirements we have now we have a config return config
check_requirements(config)
def setup(config):
"""
Args:
config (Homeserver)
Returns:
HomeServer
"""
version_string = get_version_string() version_string = get_version_string()
logger.info("Server hostname: %s", config.server_name) logger.info("Server hostname: %s", config.server_name)
@ -441,6 +453,42 @@ def setup(config_options):
hs.get_datastore().start_profiling() hs.get_datastore().start_profiling()
hs.get_replication_layer().start_get_pdu_cache() hs.get_replication_layer().start_get_pdu_cache()
start_time = time.time()
@defer.inlineCallbacks
def phone_stats_home():
now = int(time.time())
uptime = int(now - start_time)
if uptime < 0:
uptime = 0
stats = {}
stats["homeserver"] = config.server_name
stats["timestamp"] = now
stats["uptime_seconds"] = uptime
stats["total_users"] = yield hs.get_datastore().count_all_users()
all_rooms = yield hs.get_datastore().get_rooms(False)
stats["total_room_count"] = len(all_rooms)
stats["daily_active_users"] = yield hs.get_datastore().count_daily_users()
daily_messages = yield hs.get_datastore().count_daily_messages()
if daily_messages is not None:
stats["daily_messages"] = daily_messages
logger.info("Reporting stats to matrix.org: %s" % (stats,))
try:
yield hs.get_simple_http_client().put_json(
"https://matrix.org/report-usage-stats/push",
stats
)
except Exception as e:
logger.warn("Error reporting stats: %s", e)
if hs.config.report_stats:
phone_home_task = task.LoopingCall(phone_stats_home)
phone_home_task.start(60 * 60 * 24, now=False)
return hs return hs
@ -649,7 +697,7 @@ def _resource_id(resource, path_seg):
return "%s-%s" % (resource, path_seg) return "%s-%s" % (resource, path_seg)
def run(hs): def run(config):
PROFILE_SYNAPSE = False PROFILE_SYNAPSE = False
if PROFILE_SYNAPSE: if PROFILE_SYNAPSE:
def profile(func): def profile(func):
@ -663,7 +711,7 @@ def run(hs):
profile.disable() profile.disable()
ident = current_thread().ident ident = current_thread().ident
profile.dump_stats("/tmp/%s.%s.%i.pstat" % ( profile.dump_stats("/tmp/%s.%s.%i.pstat" % (
hs.hostname, func.__name__, ident config.server_name, func.__name__, ident
)) ))
return profiled return profiled
@ -672,56 +720,52 @@ def run(hs):
ThreadPool._worker = profile(ThreadPool._worker) ThreadPool._worker = profile(ThreadPool._worker)
reactor.run = profile(reactor.run) reactor.run = profile(reactor.run)
start_time = hs.get_clock().time()
@defer.inlineCallbacks
def phone_stats_home():
now = int(hs.get_clock().time())
uptime = int(now - start_time)
if uptime < 0:
uptime = 0
stats = {}
stats["homeserver"] = hs.config.server_name
stats["timestamp"] = now
stats["uptime_seconds"] = uptime
stats["total_users"] = yield hs.get_datastore().count_all_users()
all_rooms = yield hs.get_datastore().get_rooms(False)
stats["total_room_count"] = len(all_rooms)
stats["daily_active_users"] = yield hs.get_datastore().count_daily_users()
daily_messages = yield hs.get_datastore().count_daily_messages()
if daily_messages is not None:
stats["daily_messages"] = daily_messages
logger.info("Reporting stats to matrix.org: %s" % (stats,))
try:
yield hs.get_simple_http_client().put_json(
"https://matrix.org/report-usage-stats/push",
stats
)
except Exception as e:
logger.warn("Error reporting stats: %s", e)
if hs.config.report_stats:
phone_home_task = task.LoopingCall(phone_stats_home)
phone_home_task.start(60 * 60 * 24, now=False)
def in_thread(): def in_thread():
hs = setup(config)
with LoggingContext("run"): with LoggingContext("run"):
change_resource_limit(hs.config.soft_file_limit) change_resource_limit(hs.config.soft_file_limit)
reactor.run() reactor.run()
if hs.config.daemonize: def start_in_process_checker():
p = None
should_restart = [True]
if hs.config.print_pidfile: def proxy_signal(signum, stack):
print hs.config.pid_file logger.info("Got signal: %r", signum)
if p is not None:
os.kill(p.pid, signum)
if signum == signal.SIGTERM:
should_restart[0] = False
if getattr(signal, "SIGHUP"):
signal.signal(signal.SIGHUP, proxy_signal)
signal.signal(signal.SIGTERM, proxy_signal)
last_start = 0
next_delay = 1
while should_restart[0]:
last_start = time.time()
p = Process(target=in_thread, args=())
p.start()
p.join()
if time.time() - last_start < 120:
next_delay = min(next_delay * 5, 5 * 60)
else:
next_delay = 1
time.sleep(next_delay)
if config.daemonize:
if config.print_pidfile:
print config.pid_file
daemon = Daemonize( daemon = Daemonize(
app="synapse-homeserver", app="synapse-homeserver",
pid=hs.config.pid_file, pid=config.pid_file,
action=lambda: in_thread(), action=lambda: start_in_process_checker(),
auto_close_fds=False, auto_close_fds=False,
verbose=True, verbose=True,
logger=logger, logger=logger,
@ -736,8 +780,8 @@ def main():
with LoggingContext("main"): with LoggingContext("main"):
# check base requirements # check base requirements
check_requirements() check_requirements()
hs = setup(sys.argv[1:]) config = load_config(sys.argv[1:])
run(hs) run(config)
if __name__ == '__main__': if __name__ == '__main__':