Merge pull request #3044 from matrix-org/michaelk/performance_stats
Add basic performance statistics to phone home
This commit is contained in:
commit
a89f9f830c
12
UPGRADE.rst
12
UPGRADE.rst
|
@ -48,6 +48,18 @@ returned by the Client-Server API:
|
||||||
# configured on port 443.
|
# configured on port 443.
|
||||||
curl -kv https://<host.name>/_matrix/client/versions 2>&1 | grep "Server:"
|
curl -kv https://<host.name>/_matrix/client/versions 2>&1 | grep "Server:"
|
||||||
|
|
||||||
|
Upgrading to $NEXT_VERSION
|
||||||
|
====================
|
||||||
|
|
||||||
|
This release expands the anonymous usage stats sent if the opt-in
|
||||||
|
``report_stats`` configuration is set to ``true``. We now capture RSS memory
|
||||||
|
and cpu use at a very coarse level. This requires administrators to install
|
||||||
|
the optional ``psutil`` python module.
|
||||||
|
|
||||||
|
We would appreciate it if you could assist by ensuring this module is available
|
||||||
|
and ``report_stats`` is enabled. This will let us see if performance changes to
|
||||||
|
synapse are having an impact to the general community.
|
||||||
|
|
||||||
Upgrading to v0.15.0
|
Upgrading to v0.15.0
|
||||||
====================
|
====================
|
||||||
|
|
||||||
|
|
|
@ -402,6 +402,10 @@ def run(hs):
|
||||||
|
|
||||||
stats = {}
|
stats = {}
|
||||||
|
|
||||||
|
# Contains the list of processes we will be monitoring
|
||||||
|
# currently either 0 or 1
|
||||||
|
stats_process = []
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def phone_stats_home():
|
def phone_stats_home():
|
||||||
logger.info("Gathering stats for reporting")
|
logger.info("Gathering stats for reporting")
|
||||||
|
@ -428,6 +432,13 @@ def run(hs):
|
||||||
daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages()
|
daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages()
|
||||||
stats["daily_sent_messages"] = daily_sent_messages
|
stats["daily_sent_messages"] = daily_sent_messages
|
||||||
|
|
||||||
|
if len(stats_process) > 0:
|
||||||
|
stats["memory_rss"] = 0
|
||||||
|
stats["cpu_average"] = 0
|
||||||
|
for process in stats_process:
|
||||||
|
stats["memory_rss"] += process.memory_info().rss
|
||||||
|
stats["cpu_average"] += int(process.cpu_percent(interval=None))
|
||||||
|
|
||||||
logger.info("Reporting stats to matrix.org: %s" % (stats,))
|
logger.info("Reporting stats to matrix.org: %s" % (stats,))
|
||||||
try:
|
try:
|
||||||
yield hs.get_simple_http_client().put_json(
|
yield hs.get_simple_http_client().put_json(
|
||||||
|
@ -437,10 +448,32 @@ def run(hs):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warn("Error reporting stats: %s", e)
|
logger.warn("Error reporting stats: %s", e)
|
||||||
|
|
||||||
|
def performance_stats_init():
|
||||||
|
try:
|
||||||
|
import psutil
|
||||||
|
process = psutil.Process()
|
||||||
|
# Ensure we can fetch both, and make the initial request for cpu_percent
|
||||||
|
# so the next request will use this as the initial point.
|
||||||
|
process.memory_info().rss
|
||||||
|
process.cpu_percent(interval=None)
|
||||||
|
logger.info("report_stats can use psutil")
|
||||||
|
stats_process.append(process)
|
||||||
|
except (ImportError, AttributeError):
|
||||||
|
logger.warn(
|
||||||
|
"report_stats enabled but psutil is not installed or incorrect version."
|
||||||
|
" Disabling reporting of memory/cpu stats."
|
||||||
|
" Ensuring psutil is available will help matrix.org track performance"
|
||||||
|
" changes across releases."
|
||||||
|
)
|
||||||
|
|
||||||
if hs.config.report_stats:
|
if hs.config.report_stats:
|
||||||
logger.info("Scheduling stats reporting for 3 hour intervals")
|
logger.info("Scheduling stats reporting for 3 hour intervals")
|
||||||
clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000)
|
clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000)
|
||||||
|
|
||||||
|
# We need to defer this init for the cases that we daemonize
|
||||||
|
# otherwise the process ID we get is that of the non-daemon process
|
||||||
|
clock.call_later(0, performance_stats_init)
|
||||||
|
|
||||||
# We wait 5 minutes to send the first set of stats as the server can
|
# We wait 5 minutes to send the first set of stats as the server can
|
||||||
# be quite busy the first few minutes
|
# be quite busy the first few minutes
|
||||||
clock.call_later(5 * 60, phone_stats_home)
|
clock.call_later(5 * 60, phone_stats_home)
|
||||||
|
|
Loading…
Reference in New Issue