feat(backend): Add ability to use ElasticSearch as search backend

Co-authored-by: 皐月なふ (Nafu Satsuki) <satsuki@nafusoft.dev>
This commit is contained in:
mattyatea 2024-07-13 04:52:40 +09:00 committed by 丈槍由紀
parent 60be692a0a
commit 12d2a9a5eb
6 changed files with 220 additions and 9 deletions

View file

@ -69,6 +69,7 @@
"@bull-board/fastify": "6.0.0",
"@bull-board/ui": "6.0.0",
"@discordapp/twemoji": "15.1.0",
"@elastic/elasticsearch": "^8.14.0",
"@fastify/accepts": "5.0.0",
"@fastify/cookie": "10.0.0",
"@fastify/cors": "10.0.0",

View file

@ -7,6 +7,7 @@ import { Global, Inject, Module } from '@nestjs/common';
import * as Redis from 'ioredis';
import { DataSource } from 'typeorm';
import { MeiliSearch } from 'meilisearch';
import { Client as ElasticSearch } from '@elastic/elasticsearch';
import { DI } from './di-symbols.js';
import { Config, loadConfig } from './config.js';
import { createPostgresDataSource } from './postgres.js';
@ -45,6 +46,30 @@ const $meilisearch: Provider = {
inject: [DI.config],
};
const $elasticsearch: Provider = {
provide: DI.elasticsearch,
useFactory: (config: Config) => {
if (config.elasticsearch) {
return new ElasticSearch({
nodes: {
url: new URL(`${config.elasticsearch.ssl ? 'https' : 'http'}://${config.elasticsearch.host}:${config.elasticsearch.port}`),
ssl: {
rejectUnauthorized: config.elasticsearch.rejectUnauthorized,
},
},
auth: (config.elasticsearch.user && config.elasticsearch.pass) ? {
username: config.elasticsearch.user,
password: config.elasticsearch.pass,
} : undefined,
pingTimeout: 30000,
});
} else {
return null;
}
},
inject: [DI.config],
};
const $redis: Provider = {
provide: DI.redis,
useFactory: (config: Config) => {
@ -148,8 +173,8 @@ const $meta: Provider = {
@Global()
@Module({
imports: [RepositoryModule],
providers: [$config, $db, $meta, $meilisearch, $redis, $redisForPub, $redisForSub, $redisForTimelines, $redisForReactions],
exports: [$config, $db, $meta, $meilisearch, $redis, $redisForPub, $redisForSub, $redisForTimelines, $redisForReactions, RepositoryModule],
providers: [$config, $db, $meta, $meilisearch, $elasticsearch, $redis, $redisForPub, $redisForSub, $redisForTimelines, $redisForReactions],
exports: [$config, $db, $meta, $meilisearch, $elasticsearch, $redis, $redisForPub, $redisForSub, $redisForTimelines, $redisForReactions, RepositoryModule],
})
export class GlobalModule implements OnApplicationShutdown {
constructor(

View file

@ -64,6 +64,15 @@ type Source = {
sentryForFrontend?: { options: Partial<Sentry.NodeOptions> };
publishTarballInsteadOfProvideRepositoryUrl?: boolean;
elasticsearch?: {
host: string;
port: string;
user: string;
pass: string;
ssl?: boolean;
rejectUnauthorized?: boolean;
index: string;
};
proxy?: string;
proxySmtp?: string;
@ -143,6 +152,15 @@ export type Config = {
index: string;
scope?: 'local' | 'global' | string[];
} | undefined;
elasticsearch: {
host: string;
port: string;
user: string;
pass: string;
ssl?: boolean;
rejectUnauthorized?: boolean;
index: string;
} | undefined;
proxy: string | undefined;
proxySmtp: string | undefined;
proxyBypassHosts: string[] | undefined;
@ -287,6 +305,7 @@ export function loadConfig(): Config {
dbReplications: config.dbReplications,
dbSlaves: config.dbSlaves,
meilisearch: config.meilisearch,
elasticsearch: config.elasticsearch,
redis,
redisForPubsub: config.redisForPubsub ? convertRedisOptions(config.redisForPubsub, host) : redis,
redisForJobQueue: config.redisForJobQueue ? convertRedisOptions(config.redisForJobQueue, host) : redis,

View file

@ -17,6 +17,7 @@ import { CacheService } from '@/core/CacheService.js';
import { QueryService } from '@/core/QueryService.js';
import { IdService } from '@/core/IdService.js';
import type { Index, MeiliSearch } from 'meilisearch';
import type { Client as ElasticSearch } from '@elastic/elasticsearch';
type K = string;
type V = string | number | boolean;
@ -65,7 +66,7 @@ function compileQuery(q: Q): string {
export class SearchService {
private readonly meilisearchIndexScope: 'local' | 'global' | string[] = 'local';
private meilisearchNoteIndex: Index | null = null;
private elasticsearchNoteIndex: string | null = null;
constructor(
@Inject(DI.config)
private config: Config,
@ -73,6 +74,9 @@ export class SearchService {
@Inject(DI.meilisearch)
private meilisearch: MeiliSearch | null,
@Inject(DI.elasticsearch)
private elasticsearch: ElasticSearch | null,
@Inject(DI.notesRepository)
private notesRepository: NotesRepository,
@ -106,9 +110,56 @@ export class SearchService {
},
});
}
if (this.config.meilisearch?.scope) {
this.meilisearchIndexScope = this.config.meilisearch.scope;
});
} else if (this.elasticsearch) {
this.elasticsearchNoteIndex = `${config.elasticsearch!.index}---notes`;
this.elasticsearch.indices.exists({
index: this.elasticsearchNoteIndex,
}).then((indexExists) => {
if (!indexExists) {
this.elasticsearch?.indices.create(
{
index: this.elasticsearchNoteIndex + `-${new Date().toISOString().slice(0, 7).replace(/-/g, '')}`,
mappings: {
properties: {
text: { type: 'text' },
cw: { type: 'text' },
createdAt: { type: 'long' },
userId: { type: 'keyword' },
userHost: { type: 'keyword' },
channelId: { type: 'keyword' },
tags: { type: 'keyword' },
},
},
settings: {
index: {
analysis: {
tokenizer: {
kuromoji: {
type: 'kuromoji_tokenizer',
mode: 'search',
},
},
analyzer: {
kuromoji_analyzer: {
type: 'custom',
tokenizer: 'kuromoji',
},
},
},
},
},
},
).catch((error) => {
console.error(error);
});
}
}).catch((error) => {
console.error(error);
});
}
if (config.meilisearch?.scope) {
this.meilisearchIndexScope = config.meilisearch.scope;
}
}
@ -146,6 +197,23 @@ export class SearchService {
}], {
primaryKey: 'id',
});
} else if (this.elasticsearch) {
const body = {
createdAt: this.idService.parse(note.id).date.getTime(),
userId: note.userId,
userHost: note.userHost,
channelId: note.channelId,
cw: note.cw,
text: note.text,
tags: note.tags,
};
await this.elasticsearch.index({
index: this.elasticsearchNoteIndex + `-${new Date().toISOString().slice(0, 7).replace(/-/g, '')}` as string,
id: note.id,
body: body,
}).catch((error) => {
console.error(error);
});
}
}
@ -190,7 +258,7 @@ export class SearchService {
if (opts.filetype) {
if (opts.filetype === 'image') {
filter.qs.push({ op: 'or', qs: [
{ op: '=', k: 'attachedFileTypes', v: 'image/webp' },
{ op: '=', k: 'attachedFileTypes', v: 'image/webp' },
{ op: '=', k: 'attachedFileTypes', v: 'image/png' },
{ op: '=', k: 'attachedFileTypes', v: 'image/jpeg' },
{ op: '=', k: 'attachedFileTypes', v: 'image/avif' },
@ -199,14 +267,14 @@ export class SearchService {
] });
} else if (opts.filetype === 'video') {
filter.qs.push({ op: 'or', qs: [
{ op: '=', k: 'attachedFileTypes', v: 'video/mp4' },
{ op: '=', k: 'attachedFileTypes', v: 'video/mp4' },
{ op: '=', k: 'attachedFileTypes', v: 'video/webm' },
{ op: '=', k: 'attachedFileTypes', v: 'video/mpeg' },
{ op: '=', k: 'attachedFileTypes', v: 'video/x-m4v' },
] });
} else if (opts.filetype === 'audio') {
filter.qs.push({ op: 'or', qs: [
{ op: '=', k: 'attachedFileTypes', v: 'audio/mpeg' },
{ op: '=', k: 'attachedFileTypes', v: 'audio/mpeg' },
{ op: '=', k: 'attachedFileTypes', v: 'audio/flac' },
{ op: '=', k: 'attachedFileTypes', v: 'audio/wav' },
{ op: '=', k: 'attachedFileTypes', v: 'audio/aac' },
@ -247,6 +315,67 @@ export class SearchService {
if (me && isUserRelated(note, userIdsWhoMeMuting)) return false;
return true;
});
return notes.sort((a, b) => a.id > b.id ? -1 : 1);
} else if (this.elasticsearch) {
const esFilter: any = {
bool: {
must: [],
},
};
if (pagination.untilId) esFilter.bool.must.push({ range: { createdAt: { lt: this.idService.parse(pagination.untilId).date.getTime() } } });
if (pagination.sinceId) esFilter.bool.must.push({ range: { createdAt: { gt: this.idService.parse(pagination.sinceId).date.getTime() } } });
if (opts.userId) esFilter.bool.must.push({ term: { userId: opts.userId } });
if (opts.channelId) esFilter.bool.must.push({ term: { channelId: opts.channelId } });
if (opts.host) {
if (opts.host === '.') {
esFilter.bool.must.push({ bool: { must_not: [{ exists: { field: 'userHost' } }] } });
} else {
esFilter.bool.must.push({ term: { userHost: opts.host } });
}
}
if (q !== '') {
esFilter.bool.must.push({
bool: {
should: [
{ wildcard: { 'text': { value: q } } },
{ simple_query_string: { fields: ['text'], 'query': q, default_operator: 'and' } },
{ wildcard: { 'cw': { value: q } } },
{ simple_query_string: { fields: ['cw'], 'query': q, default_operator: 'and' } },
],
minimum_should_match: 1,
},
});
}
const res = await (this.elasticsearch.search)({
index: this.elasticsearchNoteIndex + '*' as string,
body: {
query: esFilter,
sort: [{ createdAt: { order: 'desc' } }],
},
_source: ['id', 'createdAt'],
size: pagination.limit,
});
const noteIds = res.hits.hits.map((hit: any) => hit._id);
if (noteIds.length === 0) return [];
const [
userIdsWhoMeMuting,
userIdsWhoBlockingMe,
] = me ? await Promise.all([
this.cacheService.userMutingsCache.fetch(me.id),
this.cacheService.userBlockedCache.fetch(me.id),
]) : [new Set<string>(), new Set<string>()];
const notes = (await this.notesRepository.findBy({
id: In(noteIds),
})).filter(note => {
if (me && isUserRelated(note, userIdsWhoBlockingMe)) return false;
if (me && isUserRelated(note, userIdsWhoMeMuting)) return false;
return true;
});
return notes.sort((a, b) => a.id > b.id ? -1 : 1);
} else {
const query = this.queryService.makePaginationQuery(this.notesRepository.createQueryBuilder('note'), pagination.sinceId, pagination.untilId);

View file

@ -8,6 +8,7 @@ export const DI = {
db: Symbol('db'),
meta: Symbol('meta'),
meilisearch: Symbol('meilisearch'),
elasticsearch: Symbol('elasticsearch'),
redis: Symbol('redis'),
redisForPub: Symbol('redisForPub'),
redisForSub: Symbol('redisForSub'),

36
pnpm-lock.yaml generated
View file

@ -97,6 +97,9 @@ importers:
'@discordapp/twemoji':
specifier: 15.1.0
version: 15.1.0
'@elastic/elasticsearch':
specifier: ^8.14.0
version: 8.14.0
'@fastify/accepts':
specifier: 5.0.0
version: 5.0.0
@ -2056,6 +2059,18 @@ packages:
'@emnapi/runtime@1.3.0':
resolution: {integrity: sha512-XMBySMuNZs3DM96xcJmLW4EfGnf+uGmFNjzpehMjuX5PLB5j87ar2Zc4e3PVeZ3I5g3tYtAqskB28manlF69Zw==}
'@discoveryjs/json-ext@0.5.7':
resolution: {integrity: sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw==}
engines: {node: '>=10.0.0'}
'@elastic/elasticsearch@8.14.0':
resolution: {integrity: sha512-MGrgCI4y+Ozssf5Q2IkVJlqt5bUMnKIICG2qxeOfrJNrVugMCBCAQypyesmSSocAtNm8IX3LxfJ3jQlFHmKe2w==}
engines: {node: '>=18'}
'@elastic/transport@8.7.0':
resolution: {integrity: sha512-IqXT7a8DZPJtqP2qmX1I2QKmxYyN27kvSW4g6pInESE1SuGwZDp2FxHJ6W2kwmYOJwQdAt+2aWwzXO5jHo9l4A==}
engines: {node: '>=18'}
'@esbuild/aix-ppc64@0.19.11':
resolution: {integrity: sha512-FnzU0LyE3ySQk7UntJO4+qIiQgI7KoODnZg5xzXIrFJlKd2P2gwHsHY4927xj9y5PJmJSzULiUCWmv7iWnNa7g==}
engines: {node: '>=12'}
@ -12724,6 +12739,27 @@ snapshots:
jsonfile: 5.0.0
universalify: 0.1.2
'@discoveryjs/json-ext@0.5.7': {}
'@elastic/elasticsearch@8.14.0':
dependencies:
'@elastic/transport': 8.7.0
tslib: 2.6.2
transitivePeerDependencies:
- supports-color
'@elastic/transport@8.7.0':
dependencies:
'@opentelemetry/api': 1.9.0
debug: 4.3.4(supports-color@8.1.1)
hpagent: 1.2.0
ms: 2.1.3
secure-json-parse: 2.7.0
tslib: 2.6.2
undici: 6.19.2
transitivePeerDependencies:
- supports-color
'@emnapi/runtime@1.3.0':
dependencies:
tslib: 2.6.3