Merge pull request #3722 from matrix-org/erikj/bg_process_iteration

LaterGauge needs to call thread safe functions
This commit is contained in:
Erik Johnston 2018-08-21 16:47:34 +01:00 committed by GitHub
commit 2b1a4b2596
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 32 additions and 8 deletions

1
changelog.d/3722.bugfix Normal file
View File

@ -0,0 +1 @@
Fix error collecting prometheus metrics when run on dedicated thread due to threading concurrency issues

View File

@ -15,6 +15,7 @@
# limitations under the License.
import logging
import threading
from prometheus_client.core import Counter, Histogram
@ -111,6 +112,9 @@ in_flight_requests_db_sched_duration = Counter(
# The set of all in flight requests, set[RequestMetrics]
_in_flight_requests = set()
# Protects the _in_flight_requests set from concurrent accesss
_in_flight_requests_lock = threading.Lock()
def _get_in_flight_counts():
"""Returns a count of all in flight requests by (method, server_name)
@ -120,7 +124,8 @@ def _get_in_flight_counts():
"""
# Cast to a list to prevent it changing while the Prometheus
# thread is collecting metrics
reqs = list(_in_flight_requests)
with _in_flight_requests_lock:
reqs = list(_in_flight_requests)
for rm in reqs:
rm.update_metrics()
@ -154,10 +159,12 @@ class RequestMetrics(object):
# to the "in flight" metrics.
self._request_stats = self.start_context.get_resource_usage()
_in_flight_requests.add(self)
with _in_flight_requests_lock:
_in_flight_requests.add(self)
def stop(self, time_sec, request):
_in_flight_requests.discard(self)
with _in_flight_requests_lock:
_in_flight_requests.discard(self)
context = LoggingContext.current_context()

View File

@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import threading
import six
from prometheus_client.core import REGISTRY, Counter, GaugeMetricFamily
@ -78,6 +80,9 @@ _background_process_counts = dict() # type: dict[str, int]
# of process descriptions that no longer have any active processes.
_background_processes = dict() # type: dict[str, set[_BackgroundProcess]]
# A lock that covers the above dicts
_bg_metrics_lock = threading.Lock()
class _Collector(object):
"""A custom metrics collector for the background process metrics.
@ -92,7 +97,11 @@ class _Collector(object):
labels=["name"],
)
for desc, processes in six.iteritems(_background_processes):
# We copy the dict so that it doesn't change from underneath us
with _bg_metrics_lock:
_background_processes_copy = dict(_background_processes)
for desc, processes in six.iteritems(_background_processes_copy):
background_process_in_flight_count.add_metric(
(desc,), len(processes),
)
@ -167,19 +176,26 @@ def run_as_background_process(desc, func, *args, **kwargs):
"""
@defer.inlineCallbacks
def run():
count = _background_process_counts.get(desc, 0)
_background_process_counts[desc] = count + 1
with _bg_metrics_lock:
count = _background_process_counts.get(desc, 0)
_background_process_counts[desc] = count + 1
_background_process_start_count.labels(desc).inc()
with LoggingContext(desc) as context:
context.request = "%s-%i" % (desc, count)
proc = _BackgroundProcess(desc, context)
_background_processes.setdefault(desc, set()).add(proc)
with _bg_metrics_lock:
_background_processes.setdefault(desc, set()).add(proc)
try:
yield func(*args, **kwargs)
finally:
proc.update_metrics()
_background_processes[desc].remove(proc)
with _bg_metrics_lock:
_background_processes[desc].remove(proc)
with PreserveLoggingContext():
return run()