import { Injectable, OnModuleInit } from '@nestjs/common'; import { pipeline, FeatureExtractionPipeline } from '@xenova/transformers'; import { MongoCoreService } from 'src/mongo/mongo-core.service'; import { FFBProductionRepository } from 'src/mongo/mongo-ffb-production.repository'; import { FFBProduction } from './ffb-production.schema'; @Injectable() export class FFBVectorService implements OnModuleInit { private embedder: FeatureExtractionPipeline; private repo: FFBProductionRepository; private readonly VECTOR_DIM = 384; // must match your index constructor(private readonly mongoCore: MongoCoreService) { } /** Initialize model and repository at module startup */ async onModuleInit() { const modelName = process.env.EMBEDDING_MODEL || 'Xenova/all-MiniLM-L6-v2'; console.log(`🔹 Loading embedding model: ${modelName}...`); this.embedder = (await pipeline('feature-extraction', modelName)) as unknown as FeatureExtractionPipeline; const db = await this.mongoCore.getDb(); this.repo = new FFBProductionRepository(db); await this.repo.init(); console.log(`✅ Embedding model loaded and repository ready.`); } /** Convert an FFBProduction record into text for embedding */ private recordToText(record: FFBProduction): string { return `Production on ${new Date(record.productionDate).toISOString()} at ${record.site} in ${record.phase} ${record.block} produced ${record.quantity} ${record.quantityUom} with a total weight of ${record.weight} ${record.weightUom}.`; } /** Generate embedding vector from text */ private async embedText(text: string): Promise { const output = await this.embedder(text, { pooling: 'mean', normalize: true }); const vector = Array.from(output.data); if (vector.length !== this.VECTOR_DIM) { throw new Error(`Embedding dimension mismatch. Expected ${this.VECTOR_DIM}, got ${vector.length}`); } return vector; } /** Insert a single record with embedding vector */ async insertWithVector(record: FFBProduction) { const text = this.recordToText(record); const vector = await this.embedText(text); // Explicitly tell TypeScript this object matches the repository type const data: FFBProduction & { vector: number[] } = { ...record, vector }; return this.repo.create(data); } /** Search for top-k similar records using a text query */ async vectorSearch(query: string, k = 5) { if (!query) throw new Error('Query string cannot be empty'); // Step 1: Embed the query text const vector = await this.embedText(query); // Step 2: Use repository aggregation for vector search const results = await this.repo.vectorSearch(vector, k, 50); // numCandidates = 50 // Step 3: Return results directly (they now include the full document + score) return results.map(r => ({ ...r, // all FFBProduction fields _id: r._id.toString(), // convert ObjectId to string if needed score: r.score // similarity score })); } }