Merge pull request #4627 from matrix-org/erikj/user_ips_analyze

Analyze user_ips before running deduplication
This commit is contained in:
Erik Johnston 2019-02-12 13:05:09 +00:00 committed by GitHub
commit cf82338930
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 32 additions and 3 deletions

1
changelog.d/4627.misc Normal file
View file

@ -0,0 +1 @@
Improve 'user_ips' table deduplication background update

View file

@ -65,6 +65,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
columns=["last_seen"], columns=["last_seen"],
) )
self.register_background_update_handler(
"user_ips_analyze",
self._analyze_user_ip,
)
self.register_background_update_handler( self.register_background_update_handler(
"user_ips_remove_dupes", "user_ips_remove_dupes",
self._remove_user_ip_dupes, self._remove_user_ip_dupes,
@ -108,6 +113,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore):
yield self._end_background_update("user_ips_drop_nonunique_index") yield self._end_background_update("user_ips_drop_nonunique_index")
defer.returnValue(1) defer.returnValue(1)
@defer.inlineCallbacks
def _analyze_user_ip(self, progress, batch_size):
# Background update to analyze user_ips table before we run the
# deduplication background update. The table may not have been analyzed
# for ages due to the table locks.
#
# This will lock out the naive upserts to user_ips while it happens, but
# the analyze should be quick (28GB table takes ~10s)
def user_ips_analyze(txn):
txn.execute("ANALYZE user_ips")
yield self.runInteraction(
"user_ips_analyze", user_ips_analyze
)
yield self._end_background_update("user_ips_analyze")
defer.returnValue(1)
@defer.inlineCallbacks @defer.inlineCallbacks
def _remove_user_ip_dupes(self, progress, batch_size): def _remove_user_ip_dupes(self, progress, batch_size):
# This works function works by scanning the user_ips table in batches # This works function works by scanning the user_ips table in batches

View file

@ -13,9 +13,13 @@
* limitations under the License. * limitations under the License.
*/ */
-- delete duplicates -- analyze user_ips, to help ensure the correct indices are used
INSERT INTO background_updates (update_name, progress_json) VALUES INSERT INTO background_updates (update_name, progress_json) VALUES
('user_ips_remove_dupes', '{}'); ('user_ips_analyze', '{}');
-- delete duplicates
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('user_ips_remove_dupes', '{}', 'user_ips_analyze');
-- add a new unique index to user_ips table -- add a new unique index to user_ips table
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES