|
|
@@ -0,0 +1,72 @@
|
|
|
+import { Injectable, OnModuleInit } from '@nestjs/common';
|
|
|
+import { pipeline, FeatureExtractionPipeline } from '@xenova/transformers';
|
|
|
+import { MongoCoreService } from 'src/mongo/mongo-core.service';
|
|
|
+import { FFBProductionRepository } from 'src/mongo/mongo-ffb-production.repository';
|
|
|
+import { FFBProduction } from './ffb-production.schema';
|
|
|
+
|
|
|
+@Injectable()
|
|
|
+export class FFBVectorService implements OnModuleInit {
|
|
|
+ private embedder: FeatureExtractionPipeline;
|
|
|
+ private repo: FFBProductionRepository;
|
|
|
+ private readonly VECTOR_DIM = 384; // must match your index
|
|
|
+
|
|
|
+ constructor(private readonly mongoCore: MongoCoreService) { }
|
|
|
+
|
|
|
+ /** Initialize model and repository at module startup */
|
|
|
+ async onModuleInit() {
|
|
|
+ const modelName = process.env.EMBEDDING_MODEL || 'Xenova/all-MiniLM-L6-v2';
|
|
|
+ console.log(`🔹 Loading embedding model: ${modelName}...`);
|
|
|
+
|
|
|
+ this.embedder = (await pipeline('feature-extraction', modelName)) as unknown as FeatureExtractionPipeline;
|
|
|
+
|
|
|
+ const db = await this.mongoCore.getDb();
|
|
|
+ this.repo = new FFBProductionRepository(db);
|
|
|
+ await this.repo.init();
|
|
|
+
|
|
|
+ console.log(`✅ Embedding model loaded and repository ready.`);
|
|
|
+ }
|
|
|
+
|
|
|
+ /** Convert an FFBProduction record into text for embedding */
|
|
|
+ private recordToText(record: FFBProduction): string {
|
|
|
+ return `Production on ${new Date(record.productionDate).toISOString()} at ${record.site} in ${record.phase} ${record.block} produced ${record.quantity} ${record.quantityUom} with a total weight of ${record.weight} ${record.weightUom}.`;
|
|
|
+ }
|
|
|
+
|
|
|
+ /** Generate embedding vector from text */
|
|
|
+ private async embedText(text: string): Promise<number[]> {
|
|
|
+ const output = await this.embedder(text, { pooling: 'mean', normalize: true });
|
|
|
+ const vector = Array.from(output.data);
|
|
|
+ if (vector.length !== this.VECTOR_DIM) {
|
|
|
+ throw new Error(`Embedding dimension mismatch. Expected ${this.VECTOR_DIM}, got ${vector.length}`);
|
|
|
+ }
|
|
|
+ return vector;
|
|
|
+ }
|
|
|
+
|
|
|
+ /** Insert a single record with embedding vector */
|
|
|
+ async insertWithVector(record: FFBProduction) {
|
|
|
+ const text = this.recordToText(record);
|
|
|
+ const vector = await this.embedText(text);
|
|
|
+
|
|
|
+ // Explicitly tell TypeScript this object matches the repository type
|
|
|
+ const data: FFBProduction & { vector: number[] } = { ...record, vector };
|
|
|
+ return this.repo.create(data);
|
|
|
+ }
|
|
|
+
|
|
|
+ /** Search for top-k similar records using a text query */
|
|
|
+ async vectorSearch(query: string, k = 5) {
|
|
|
+ if (!query) throw new Error('Query string cannot be empty');
|
|
|
+
|
|
|
+ // Step 1: Embed the query text
|
|
|
+ const vector = await this.embedText(query);
|
|
|
+
|
|
|
+ // Step 2: Use repository aggregation for vector search
|
|
|
+ const results = await this.repo.vectorSearch(vector, k, 50); // numCandidates = 50
|
|
|
+
|
|
|
+ // Step 3: Return results directly (they now include the full document + score)
|
|
|
+ return results.map(r => ({
|
|
|
+ ...r, // all FFBProduction fields
|
|
|
+ _id: r._id.toString(), // convert ObjectId to string if needed
|
|
|
+ score: r.score // similarity score
|
|
|
+ }));
|
|
|
+ }
|
|
|
+
|
|
|
+}
|