ffb-vector.service.ts 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. import { Injectable, OnModuleInit } from '@nestjs/common';
  2. import { pipeline, FeatureExtractionPipeline } from '@xenova/transformers';
  3. import { MongoCoreService } from 'src/mongo/mongo-core.service';
  4. import { FFBProductionRepository } from 'src/mongo/mongo-ffb-production.repository';
  5. import { FFBProduction } from './ffb-production.schema';
  6. @Injectable()
  7. export class FFBVectorService implements OnModuleInit {
  8. private embedder: FeatureExtractionPipeline;
  9. private repo: FFBProductionRepository;
  10. private readonly VECTOR_DIM = 384; // must match your index
  11. constructor(private readonly mongoCore: MongoCoreService) { }
  12. /** Initialize model and repository at module startup */
  13. async onModuleInit() {
  14. const modelName = process.env.EMBEDDING_MODEL || 'Xenova/all-MiniLM-L6-v2';
  15. console.log(`🔹 Loading embedding model: ${modelName}...`);
  16. this.embedder = (await pipeline('feature-extraction', modelName)) as unknown as FeatureExtractionPipeline;
  17. const db = await this.mongoCore.getDb();
  18. this.repo = new FFBProductionRepository(db);
  19. await this.repo.init();
  20. console.log(`✅ Embedding model loaded and repository ready.`);
  21. }
  22. /** Convert an FFBProduction record into text for embedding */
  23. private recordToText(record: FFBProduction): string {
  24. return `Production on ${new Date(record.productionDate).toISOString()} at ${record.site} in ${record.phase} ${record.block} produced ${record.quantity} ${record.quantityUom} with a total weight of ${record.weight} ${record.weightUom}.`;
  25. }
  26. /** Generate embedding vector from text */
  27. private async embedText(text: string): Promise<number[]> {
  28. const output = await this.embedder(text, { pooling: 'mean', normalize: true });
  29. const vector = Array.from(output.data);
  30. if (vector.length !== this.VECTOR_DIM) {
  31. throw new Error(`Embedding dimension mismatch. Expected ${this.VECTOR_DIM}, got ${vector.length}`);
  32. }
  33. return vector;
  34. }
  35. /** Insert a single record with embedding vector */
  36. async insertWithVector(record: FFBProduction) {
  37. const text = this.recordToText(record);
  38. const vector = await this.embedText(text);
  39. // Explicitly tell TypeScript this object matches the repository type
  40. const data: FFBProduction & { vector: number[] } = { ...record, vector };
  41. return this.repo.create(data);
  42. }
  43. /** Search for top-k similar records using a text query */
  44. async vectorSearch(query: string, k = 5) {
  45. if (!query) throw new Error('Query string cannot be empty');
  46. // Step 1: Embed the query text
  47. const vector = await this.embedText(query);
  48. // Step 2: Use repository aggregation for vector search
  49. const results = await this.repo.vectorSearch(vector, k, 50); // numCandidates = 50
  50. // Step 3: Return results directly (they now include the full document + score)
  51. return results.map(r => ({
  52. ...r, // all FFBProduction fields
  53. _id: r._id.toString(), // convert ObjectId to string if needed
  54. score: r.score // similarity score
  55. }));
  56. }
  57. }