From 9a20811575f62670b43c2e7d861592855aed6dd2 Mon Sep 17 00:00:00 2001 From: Laura Hausmann Date: Sat, 9 Sep 2023 16:51:27 +0200 Subject: [PATCH] [e2net] Add user dedupe functionality --- packages/backend/src/boot/master.ts | 15 +++ packages/backend/src/db/postgre.ts | 2 +- packages/backend/src/misc/dedupe-users.ts | 115 ++++++++++++++++++++++ 3 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 packages/backend/src/misc/dedupe-users.ts diff --git a/packages/backend/src/boot/master.ts b/packages/backend/src/boot/master.ts index 61cf95012..61921d589 100644 --- a/packages/backend/src/boot/master.ts +++ b/packages/backend/src/boot/master.ts @@ -14,6 +14,7 @@ import { lessThan } from "@/prelude/array.js"; import { envOption } from "../env.js"; import { showMachineInfo } from "@/misc/show-machine-info.js"; import { db, initDb } from "../db/postgre.js"; +import dedupeUsers from "@/misc/dedupe-users.js" const _filename = fileURLToPath(import.meta.url); const _dirname = dirname(_filename); @@ -135,6 +136,7 @@ async function connectDb(): Promise { .query("SHOW server_version") .then((x) => x[0].server_version); dbLogger.succ(`Connected: v${v}`); + await runDbStartupTasks(); } catch (e) { dbLogger.error("Cannot connect", null, true); dbLogger.error(e); @@ -142,6 +144,19 @@ async function connectDb(): Promise { } } +async function runDbStartupTasks(): Promise { + const dbLogger = bootLogger.createSubLogger("db"); + try { + await dedupeUsers(); + process.exit(0); + } + catch (e) { + dbLogger.error("Failed to run startup tasks:"); + dbLogger.error(e); + process.exit(0); + } +} + async function spawnWorkers(limit = 1) { const workers = Math.min(limit, os.cpus().length); bootLogger.info(`Starting ${workers} worker${workers === 1 ? "" : "s"}...`); diff --git a/packages/backend/src/db/postgre.ts b/packages/backend/src/db/postgre.ts index 2e24bf670..e95aef83e 100644 --- a/packages/backend/src/db/postgre.ts +++ b/packages/backend/src/db/postgre.ts @@ -191,7 +191,7 @@ export const db = new DataSource({ password: config.db.pass, database: config.db.db, extra: { - statement_timeout: 1000 * 10, + statement_timeout: 1000 * 1000, ...config.db.extra, }, synchronize: process.env.NODE_ENV === "test", diff --git a/packages/backend/src/misc/dedupe-users.ts b/packages/backend/src/misc/dedupe-users.ts new file mode 100644 index 000000000..9ca6d71b1 --- /dev/null +++ b/packages/backend/src/misc/dedupe-users.ts @@ -0,0 +1,115 @@ +import { CacheableUser } from "@/models/entities/user.js"; +import {Notes, Users, Followings} from "@/models/index.js"; +import { dbLogger } from "@/db/logger.js"; +import { updatePerson } from "@/remote/activitypub/models/person.js"; + +const logger = dbLogger.createSubLogger('dedupe-users'); + +export default async function dedupeUsers() { + logger.info('starting user dedupe task'); + const users = await getDuplicateUsers(); + logger.info(`Found ${users.length} duplicate users`); + if (users.length == 0) return; + for (const user of users) { + await dedupeUser(user); + } +} + +async function dedupeUser(user: {usernameLower: string, host: string, count: number}){ + // get all dupe users + let dupes = await Users.findBy({usernameLower: user.usernameLower, host: user.host}); + + // decide the final one + const finalUser = dupes.sort((a,b) => { + if ((a.updatedAt ?? a.createdAt) < (b.updatedAt ?? b.createdAt)) { + return 1; + } + if ((a.updatedAt ?? a.createdAt) > (b.updatedAt ?? b.createdAt)) { + return -1; + } + + return 0; + })[0]; + + logger.info(`Processing user: ${user.usernameLower}@${user.host}`); + + // remove the final user from the dupe array + dupes = dupes.filter(p => p.id !== finalUser.id); + + // get a list of all the dupe users for sql purposes + const ids = `'${dupes.map(p => p.id).join("', '")}'`; + + // make all notes be owned by the final user + const noteRes = await Notes.createQueryBuilder("note") + .update() + .set({ + userId: finalUser.id + }) + .where(`"note"."userId" IN (${ids})`) + .execute(); + + logger.info(`Updated ${noteRes.affected ?? 0} notes`); + + // update all the following relationships + // get all user followers + const followers = (await Followings.createQueryBuilder("following") + .select('id') + .where(`"following"."followeeId" IN (${ids})`) + .execute()) as string[]; + + // get all user followings + const followings = (await Followings.createQueryBuilder("following") + .select('id') + .where(`"following"."followerId" IN (${ids})`) + .execute()) as string[]; + + // update all user followers, delete them if constraint fails + let followerUpdated = 0; + let followerDeleted = 0; + for (const follower of followers) { + try { + await Followings.update(follower, { followeeId: finalUser.id }); + followerUpdated++; + } + catch { + await Followings.delete(follower); + followerDeleted++; + } + } + logger.info(`Updated ${followerUpdated} follower relationships, deleted ${followerDeleted} duplicates`); + + // update all user followings, delete them if constraint fails + let followingUpdated = 0; + let followingDeleted = 0; + for (const following of followings) { + try { + await Followings.update(following, { followerId: finalUser.id }); + followerUpdated++; + } + catch { + await Followings.delete(following); + followerDeleted++; + } + } + logger.info(`Updated ${followingUpdated} following relationships, deleted ${followingDeleted} duplicates`); + + // delete the rest + const delRes = await Users.createQueryBuilder("user") + .delete() + .where(`"user"."id" IN (${ids})`) + .execute(); + logger.info(`Deleted ${delRes.affected ?? 0} users`); +} + +async function getDuplicateUsers(): Promise> { + const query = Users.createQueryBuilder("user") + .select("user.usernameLower", "usernameLower") + .addSelect("user.host", "host") + .addSelect("COUNT(*)") + .groupBy("user.usernameLower") + .addGroupBy("user.host") + .having("COUNT(*) > 1") + .orderBy("count", "DESC"); + + return query.execute(); +}