Fix user_threepids schema delta

The delta `37/user_threepids.sql` aimed to update all the email
addresses to be lower case, however duplicate emails may exist in the
table already.

This commit adds a step where the delta moves the duplicate emails to a
new `medium` `email_old`. Only the most recently used account keeps the
binding intact. We move rather than delete so that we retain some record
of which emails were associated with which account.
This commit is contained in:
Erik Johnston 2016-10-27 14:14:44 +01:00
parent db0609f1ec
commit 1fc1bc2a51

View file

@ -17,7 +17,44 @@
* Update any email addresses that were stored with mixed case into all * Update any email addresses that were stored with mixed case into all
* lowercase * lowercase
*/ */
UPDATE user_threepids SET address = LOWER(address) where medium = 'email';
-- There may be "duplicate" emails (with different case) already in the table,
-- so we find them and move all but the most recently used account.
UPDATE user_threepids
SET medium = 'email_old'
WHERE medium = 'email'
AND address IN (
-- `user_last_seen` maps user_ids to the last time we saw them
WITH user_last_seen AS (
SELECT user_id, max(last_seen) AS ts FROM user_ips GROUP BY user_id
),
-- `duplicate_addresses` is a table of all the email addresses that
-- appear multiple times and the most recently we saw any of their users
duplicate_addresses AS (
SELECT lower(u1.address) AS address, max(ts.ts) AS max_ts
FROM user_threepids AS u1
INNER JOIN user_threepids AS u2 ON u1.medium = u2.medium AND lower(u1.address) = lower(u2.address) AND u1.address != u2.address
INNER JOIN user_last_seen as ts ON ts.user_id = u1.user_id
WHERE u1.medium = 'email' AND u2.medium = 'email'
GROUP BY lower(u1.address)
)
-- We select all the addresses that are linked to the user_id that is NOT
-- the most recently seen.
SELECT u.address
FROM
user_threepids AS u,
duplicate_addresses,
user_last_seen AS ts
WHERE
lower(u.address) = duplicate_addresses.address
AND u.user_id = ts.user_id
AND ts.ts != max_ts -- NOT the most recently used
);
-- This update is now safe since we've removed the duplicate addresses.
UPDATE user_threepids SET address = LOWER(address) WHERE medium = 'email';
/* Add an index for the select we do on passwored reset */ /* Add an index for the select we do on passwored reset */
CREATE INDEX user_threepids_medium_address on user_threepids (medium, address); CREATE INDEX user_threepids_medium_address on user_threepids (medium, address);