forked from MirrorHub/synapse
Fix user_threepids schema delta
The delta `37/user_threepids.sql` aimed to update all the email addresses to be lower case, however duplicate emails may exist in the table already. This commit adds a step where the delta moves the duplicate emails to a new `medium` `email_old`. Only the most recently used account keeps the binding intact. We move rather than delete so that we retain some record of which emails were associated with which account.
This commit is contained in:
parent
db0609f1ec
commit
1fc1bc2a51
1 changed files with 38 additions and 1 deletions
|
@ -17,7 +17,44 @@
|
||||||
* Update any email addresses that were stored with mixed case into all
|
* Update any email addresses that were stored with mixed case into all
|
||||||
* lowercase
|
* lowercase
|
||||||
*/
|
*/
|
||||||
UPDATE user_threepids SET address = LOWER(address) where medium = 'email';
|
|
||||||
|
-- There may be "duplicate" emails (with different case) already in the table,
|
||||||
|
-- so we find them and move all but the most recently used account.
|
||||||
|
UPDATE user_threepids
|
||||||
|
SET medium = 'email_old'
|
||||||
|
WHERE medium = 'email'
|
||||||
|
AND address IN (
|
||||||
|
-- `user_last_seen` maps user_ids to the last time we saw them
|
||||||
|
WITH user_last_seen AS (
|
||||||
|
SELECT user_id, max(last_seen) AS ts FROM user_ips GROUP BY user_id
|
||||||
|
),
|
||||||
|
-- `duplicate_addresses` is a table of all the email addresses that
|
||||||
|
-- appear multiple times and the most recently we saw any of their users
|
||||||
|
duplicate_addresses AS (
|
||||||
|
SELECT lower(u1.address) AS address, max(ts.ts) AS max_ts
|
||||||
|
FROM user_threepids AS u1
|
||||||
|
INNER JOIN user_threepids AS u2 ON u1.medium = u2.medium AND lower(u1.address) = lower(u2.address) AND u1.address != u2.address
|
||||||
|
INNER JOIN user_last_seen as ts ON ts.user_id = u1.user_id
|
||||||
|
WHERE u1.medium = 'email' AND u2.medium = 'email'
|
||||||
|
GROUP BY lower(u1.address)
|
||||||
|
)
|
||||||
|
-- We select all the addresses that are linked to the user_id that is NOT
|
||||||
|
-- the most recently seen.
|
||||||
|
SELECT u.address
|
||||||
|
FROM
|
||||||
|
user_threepids AS u,
|
||||||
|
duplicate_addresses,
|
||||||
|
user_last_seen AS ts
|
||||||
|
WHERE
|
||||||
|
lower(u.address) = duplicate_addresses.address
|
||||||
|
AND u.user_id = ts.user_id
|
||||||
|
AND ts.ts != max_ts -- NOT the most recently used
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
-- This update is now safe since we've removed the duplicate addresses.
|
||||||
|
UPDATE user_threepids SET address = LOWER(address) WHERE medium = 'email';
|
||||||
|
|
||||||
|
|
||||||
/* Add an index for the select we do on passwored reset */
|
/* Add an index for the select we do on passwored reset */
|
||||||
CREATE INDEX user_threepids_medium_address on user_threepids (medium, address);
|
CREATE INDEX user_threepids_medium_address on user_threepids (medium, address);
|
||||||
|
|
Loading…
Reference in a new issue