enzo
/
Personal-Test-Ground


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
							import { Injectable, OnModuleInit, Inject, forwardRef } from '@nestjs/common';
import { MongoCoreService } from 'src/mongo/mongo-core.service';
import { FFBProductionRepository } from 'src/FFB/mongo-ffb-production.repository';
import { FFBProduction } from '../ffb-production.schema';
import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';
import { SiteService } from 'src/site/services/site.service';
import { PhaseService } from 'src/site/services/phase.service';
import { BlockService } from 'src/site/services/block.service';

@Injectable()
export class FFBVectorService implements OnModuleInit {
  private repo: FFBProductionRepository;
  private embeddings: GoogleGenerativeAIEmbeddings;

  constructor(
    private readonly mongoCore: MongoCoreService,
    @Inject(forwardRef(() => SiteService))
    private readonly siteService: SiteService,
    @Inject(forwardRef(() => PhaseService))
    private readonly phaseService: PhaseService,
    @Inject(forwardRef(() => BlockService))
    private readonly blockService: BlockService,
  ) { }

  async onModuleInit() {
    // Initialize Mongo repository
    const db = await this.mongoCore.getDb();
    this.repo = new FFBProductionRepository(db);
    await this.repo.init();

    // Initialize LangChain embeddings
    this.embeddings = new GoogleGenerativeAIEmbeddings({
      apiKey: process.env.GOOGLE_API_KEY,
      modelName: process.env.EMBEDDING_MODEL || 'text-embedding-004', // Modern model
    });

    console.log('✅ FFB Vector Service initialized with LangChain embeddings.');
  }

  /** Get a string representation of the schema based on a sample document */
  async getSchemaContext(): Promise<string> {
    const sample = await this.repo.findOne({});
    if (!sample) return "No data available.";
    const keys = Object.keys(sample);
    // Simple naive schema inference
    return `Fields available: ${keys.join(', ')}. \nSample record: ${JSON.stringify(sample)}`;
  }

  /** Get distinct values for a field */
  async getDistinct(field: string, filter: Record<string, any> = {}): Promise<any[]> {
    return this.repo.distinct(field, filter);
  }

  /** Convert a record to a string suitable for embedding with enriched context */
  private async recordToTextEnriched(record: FFBProduction): Promise<string> {
    const siteId = record.site.id;
    const phaseId = record.phase.id;
    const blockId = record.block.id;

    // Fetch descriptions for enrichment
    const [site, phase, block] = await Promise.all([
      this.siteService.findById(siteId),
      this.phaseService.findById(phaseId),
      this.blockService.findById(blockId),
    ]);

    let text = `FFB Production Record:
Date: ${new Date(record.productionDate).toLocaleDateString()}
Location: Site "${record.site.name}", Phase "${record.phase.name}", Block "${record.block.name}".
Metrics: Produced ${record.quantity} ${record.quantityUom} with a total weight of ${record.weight} ${record.weightUom}.
Remarks: ${record.remarks || 'No remarks provided.'}
Issues: ${record.issues || 'No issues reported.'}
`;

    if (site?.description) text += `Site Context: ${site.description}\n`;
    if (phase?.description) text += `Phase Context: ${phase.description}\n`;
    if (block?.description) text += `Block Context: ${block.description}\n`;

    return text.trim();
  }

  /** Insert a single record with embedding vector */
  async insertWithVector(record: FFBProduction) {
    const text = await this.recordToTextEnriched(record);

    // Use LangChain embeddings with RETRIEVAL_DOCUMENT task type
    const vector = await this.embeddings.embedDocuments([text]);

    const data: FFBProduction & { vector: number[] } = { ...record, vector: vector[0] };
    return this.repo.create(data);
  }

  /** Search for top-k similar records using a text query */
  async vectorSearch(query: string, k = 5, filter: Record<string, any> = {}) {
    if (!query) throw new Error('Query string cannot be empty');

    // Use LangChain embeddings with RETRIEVAL_QUERY task type (internally handled or explicit)
    // LangChain embedQuery uses query task type by default for Google Generative AI
    const vector = await this.embeddings.embedQuery(query);
    const results = await this.repo.vectorSearch(vector, k, 50, filter);

    return results.map((r) => ({
      ...r,
      _id: r._id.toString(),
      score: r.score,
    }));
  }

  /* For traditional operation that requires arithematic operations. */
  async aggregate(pipeline: Array<Record<string, any>>): Promise<any[]> {
    return this.repo.aggregate(pipeline);
  }

}