Add high performance batch imports

This commit is contained in:
PrivateGER 2023-05-25 23:49:52 +02:00 committed by PrivateGER
parent 8d08bf3ee4
commit 5ae89a69bc
3 changed files with 50 additions and 32 deletions

View file

@ -97,36 +97,48 @@ export default hasConfig ? {
filter: constructedFilters
});
},
ingestNote: (note : Note) => {
logger.info("Indexing note in MeiliSearch: " + note.id);
let attachmentType = "";
if (note.attachedFileTypes.length > 0) {
attachmentType = note.attachedFileTypes[0].split("/")[0];
switch (attachmentType) {
case "image":
case "video":
case "audio":
case "text":
break;
default:
attachmentType = "file"
break
}
ingestNote: (note: Note | Note[]) => {
if (note instanceof Note) {
note = [note];
}
return posts.addDocuments([
{
id: note.id.toString(),
text: note.text,
userId: note.userId,
userHost: note.userHost,
channelId: note.channelId,
mediaAttachment: attachmentType,
userName: note.user?.username,
createdAt: note.createdAt.getTime() / 1000 // division by 1000 is necessary because Node returns in ms-accuracy
let indexingBatch: MeilisearchNote[] = [];
note.forEach(note => {
let attachmentType = "";
if (note.attachedFileTypes.length > 0) {
attachmentType = note.attachedFileTypes[0].split("/")[0];
switch (attachmentType) {
case "image":
case "video":
case "audio":
case "text":
break;
default:
attachmentType = "file"
break
}
}
]);
indexingBatch.push({
id: note.id.toString(),
text: note.text ? note.text : "",
userId: note.userId,
userHost: note.userHost ? note.userHost : "",
channelId: note.channelId ? note.channelId : "",
mediaAttachment: attachmentType,
userName: note.user?.username ? note.user.username : "",
createdAt: note.createdAt.getTime() / 1000 // division by 1000 is necessary because Node returns in ms-accuracy
}
)
});
let indexingIDs = indexingBatch.map(note => note.id);
logger.info("Indexing notes in MeiliSearch: " + indexingIDs.join(","));
return posts.addDocuments(indexingBatch);
},
serverStats: async () => {
let health : Health = await client.health();

View file

@ -4,7 +4,8 @@ import { queueLogger } from "../../logger.js";
import { Notes } from "@/models/index.js";
import { MoreThan } from "typeorm";
import { index } from "@/services/note/create.js";
import { Note } from "@/models/entities/note.js";
import {Note} from "@/models/entities/note.js";
import meilisearch from "../../../db/meilisearch.js";
const logger = queueLogger.createSubLogger("index-all-notes");
@ -58,11 +59,16 @@ export default async function indexAllNotes(
for (let i = 0; i < notes.length; i += batch) {
const chunk = notes.slice(i, i + batch);
await Promise.all(chunk.map((note) => index(note)));
if (meilisearch) {
await meilisearch.ingestNote(chunk)
}
await Promise.all(chunk.map((note) => index(note, true)));
indexedCount += chunk.length;
const pct = (indexedCount / total) * 100;
job.update({ indexedCount, cursor, total });
job.update({indexedCount, cursor, total});
job.progress(+pct.toFixed(1));
logger.info(`Indexed notes ${indexedCount}/${total ? total : "?"}`);
}

View file

@ -749,7 +749,7 @@ async function insertNote(
}
}
export async function index(note: Note): Promise<void> {
export async function index(note: Note, reindexing: boolean): Promise<void> {
if (!note.text) return;
if (config.elasticsearch && es) {
@ -778,7 +778,7 @@ export async function index(note: Note): Promise<void> {
);
}
if (meilisearch) {
if (meilisearch && !reindexing) {
await meilisearch.ingestNote(note);
}
}