Reduce set building in get_entities_changed

This line shows up as about 5% of cpu time on a synchrotron:

    not_known_entities = set(entities) - set(self._entity_to_key)

Presumably the problem here is that _entity_to_key can be largeish, and
building a set for its keys every time this function is called is slow.

Here we rewrite the logic to avoid building so many sets.
This commit is contained in:
Richard van der Hoff 2018-07-12 11:37:44 +01:00
parent 0456e05977
commit fa5c2bc082
1 changed files with 11 additions and 7 deletions

View File

@ -74,14 +74,18 @@ class StreamChangeCache(object):
assert type(stream_pos) is int
if stream_pos >= self._earliest_known_stream_pos:
not_known_entities = set(entities) - set(self._entity_to_key)
result = (
{self._cache[k] for k in self._cache.islice(
start=self._cache.bisect_right(stream_pos))}
.intersection(entities)
.union(not_known_entities)
changed_entities = {
self._cache[k] for k in self._cache.islice(
start=self._cache.bisect_right(stream_pos),
)
}
# we need to include entities which we don't know about, as well as
# those which are known to have changed since the stream pos.
result = {
e for e in entities
if e in changed_entities or e not in self._entity_to_key
}
self.metrics.inc_hits()
else: