| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- import { Injectable, OnModuleInit } from '@nestjs/common';
- import { pipeline, FeatureExtractionPipeline } from '@xenova/transformers';
- import { MongoCoreService } from 'src/mongo/mongo-core.service';
- import { FFBProductionRepository } from 'src/mongo/mongo-ffb-production.repository';
- import { FFBProduction } from './ffb-production.schema';
- @Injectable()
- export class FFBVectorService implements OnModuleInit {
- private embedder: FeatureExtractionPipeline;
- private repo: FFBProductionRepository;
- private readonly VECTOR_DIM = 384; // must match your index
- constructor(private readonly mongoCore: MongoCoreService) { }
- /** Initialize model and repository at module startup */
- async onModuleInit() {
- const modelName = process.env.EMBEDDING_MODEL || 'Xenova/all-MiniLM-L6-v2';
- console.log(`🔹 Loading embedding model: ${modelName}...`);
- this.embedder = (await pipeline('feature-extraction', modelName)) as unknown as FeatureExtractionPipeline;
- const db = await this.mongoCore.getDb();
- this.repo = new FFBProductionRepository(db);
- await this.repo.init();
- console.log(`✅ Embedding model loaded and repository ready.`);
- }
- /** Convert an FFBProduction record into text for embedding */
- private recordToText(record: FFBProduction): string {
- return `Production on ${new Date(record.productionDate).toISOString()} at ${record.site} in ${record.phase} ${record.block} produced ${record.quantity} ${record.quantityUom} with a total weight of ${record.weight} ${record.weightUom}.`;
- }
- /** Generate embedding vector from text */
- private async embedText(text: string): Promise<number[]> {
- const output = await this.embedder(text, { pooling: 'mean', normalize: true });
- const vector = Array.from(output.data);
- if (vector.length !== this.VECTOR_DIM) {
- throw new Error(`Embedding dimension mismatch. Expected ${this.VECTOR_DIM}, got ${vector.length}`);
- }
- return vector;
- }
- /** Insert a single record with embedding vector */
- async insertWithVector(record: FFBProduction) {
- const text = this.recordToText(record);
- const vector = await this.embedText(text);
- // Explicitly tell TypeScript this object matches the repository type
- const data: FFBProduction & { vector: number[] } = { ...record, vector };
- return this.repo.create(data);
- }
- /** Search for top-k similar records using a text query */
- async vectorSearch(query: string, k = 5) {
- if (!query) throw new Error('Query string cannot be empty');
- // Step 1: Embed the query text
- const vector = await this.embedText(query);
- // Step 2: Use repository aggregation for vector search
- const results = await this.repo.vectorSearch(vector, k, 50); // numCandidates = 50
- // Step 3: Return results directly (they now include the full document + score)
- return results.map(r => ({
- ...r, // all FFBProduction fields
- _id: r._id.toString(), // convert ObjectId to string if needed
- score: r.score // similarity score
- }));
- }
- }
|