Compare commits

...

1 commit

Author SHA1 Message Date
Laura Hausmann 9a20811575 [e2net] Add user dedupe functionality 2023-11-10 19:41:59 +01:00
3 changed files with 131 additions and 1 deletions

View file

@ -14,6 +14,7 @@ import { lessThan } from "@/prelude/array.js";
import { envOption } from "../env.js";
import { showMachineInfo } from "@/misc/show-machine-info.js";
import { db, initDb } from "../db/postgre.js";
import dedupeUsers from "@/misc/dedupe-users.js"
const _filename = fileURLToPath(import.meta.url);
const _dirname = dirname(_filename);
@ -135,6 +136,7 @@ async function connectDb(): Promise<void> {
.query("SHOW server_version")
.then((x) => x[0].server_version);
dbLogger.succ(`Connected: v${v}`);
await runDbStartupTasks();
} catch (e) {
dbLogger.error("Cannot connect", null, true);
dbLogger.error(e);
@ -142,6 +144,19 @@ async function connectDb(): Promise<void> {
}
}
async function runDbStartupTasks(): Promise<void> {
const dbLogger = bootLogger.createSubLogger("db");
try {
await dedupeUsers();
process.exit(0);
}
catch (e) {
dbLogger.error("Failed to run startup tasks:");
dbLogger.error(e);
process.exit(0);
}
}
async function spawnWorkers(limit = 1) {
const workers = Math.min(limit, os.cpus().length);
bootLogger.info(`Starting ${workers} worker${workers === 1 ? "" : "s"}...`);

View file

@ -191,7 +191,7 @@ export const db = new DataSource({
password: config.db.pass,
database: config.db.db,
extra: {
statement_timeout: 1000 * 10,
statement_timeout: 1000 * 1000,
...config.db.extra,
},
synchronize: process.env.NODE_ENV === "test",

View file

@ -0,0 +1,115 @@
import { CacheableUser } from "@/models/entities/user.js";
import {Notes, Users, Followings} from "@/models/index.js";
import { dbLogger } from "@/db/logger.js";
import { updatePerson } from "@/remote/activitypub/models/person.js";
const logger = dbLogger.createSubLogger('dedupe-users');
export default async function dedupeUsers() {
logger.info('starting user dedupe task');
const users = await getDuplicateUsers();
logger.info(`Found ${users.length} duplicate users`);
if (users.length == 0) return;
for (const user of users) {
await dedupeUser(user);
}
}
async function dedupeUser(user: {usernameLower: string, host: string, count: number}){
// get all dupe users
let dupes = await Users.findBy({usernameLower: user.usernameLower, host: user.host});
// decide the final one
const finalUser = dupes.sort((a,b) => {
if ((a.updatedAt ?? a.createdAt) < (b.updatedAt ?? b.createdAt)) {
return 1;
}
if ((a.updatedAt ?? a.createdAt) > (b.updatedAt ?? b.createdAt)) {
return -1;
}
return 0;
})[0];
logger.info(`Processing user: ${user.usernameLower}@${user.host}`);
// remove the final user from the dupe array
dupes = dupes.filter(p => p.id !== finalUser.id);
// get a list of all the dupe users for sql purposes
const ids = `'${dupes.map(p => p.id).join("', '")}'`;
// make all notes be owned by the final user
const noteRes = await Notes.createQueryBuilder("note")
.update()
.set({
userId: finalUser.id
})
.where(`"note"."userId" IN (${ids})`)
.execute();
logger.info(`Updated ${noteRes.affected ?? 0} notes`);
// update all the following relationships
// get all user followers
const followers = (await Followings.createQueryBuilder("following")
.select('id')
.where(`"following"."followeeId" IN (${ids})`)
.execute()) as string[];
// get all user followings
const followings = (await Followings.createQueryBuilder("following")
.select('id')
.where(`"following"."followerId" IN (${ids})`)
.execute()) as string[];
// update all user followers, delete them if constraint fails
let followerUpdated = 0;
let followerDeleted = 0;
for (const follower of followers) {
try {
await Followings.update(follower, { followeeId: finalUser.id });
followerUpdated++;
}
catch {
await Followings.delete(follower);
followerDeleted++;
}
}
logger.info(`Updated ${followerUpdated} follower relationships, deleted ${followerDeleted} duplicates`);
// update all user followings, delete them if constraint fails
let followingUpdated = 0;
let followingDeleted = 0;
for (const following of followings) {
try {
await Followings.update(following, { followerId: finalUser.id });
followerUpdated++;
}
catch {
await Followings.delete(following);
followerDeleted++;
}
}
logger.info(`Updated ${followingUpdated} following relationships, deleted ${followingDeleted} duplicates`);
// delete the rest
const delRes = await Users.createQueryBuilder("user")
.delete()
.where(`"user"."id" IN (${ids})`)
.execute();
logger.info(`Deleted ${delRes.affected ?? 0} users`);
}
async function getDuplicateUsers(): Promise<Array<{usernameLower: string, host: string, count: number}>> {
const query = Users.createQueryBuilder("user")
.select("user.usernameLower", "usernameLower")
.addSelect("user.host", "host")
.addSelect("COUNT(*)")
.groupBy("user.usernameLower")
.addGroupBy("user.host")
.having("COUNT(*) > 1")
.orderBy("count", "DESC");
return query.execute();
}