/** * Seeds FFBProductionData_processed.json into the 'FFB Production' collection. * * For each record: * - Verifies Phase (by locId) and Block (by locId) exist in Atlas * - Skips the record if either is missing * - Resolves their MongoDB _id and embeds into phase.id / block.id * - Generates a 1024-dim vector via Ollama bge-large * - Inserts into 'FFB Production' * * NOTE: Site collection not yet seeded — site.siteId kept as plain string for now. * * Run: npx ts-node --transpile-only scripts/seed-ffb-production.ts */ import * as dotenv from 'dotenv'; import * as path from 'path'; import * as fs from 'fs'; import { MongoClient, ObjectId, Db } from 'mongodb'; import { OllamaEmbeddings } from '@langchain/ollama'; dotenv.config({ path: path.resolve(__dirname, '../.env') }); const MONGO_URI = process.env.MONGO_URI!; const MONGO_DB_NAME = process.env.MONGO_DB_NAME!; const EMBEDDING_MODEL = process.env.EMBEDDING_MODEL || 'bge-large'; const OLLAMA_BASE_URL = 'http://localhost:11434'; const BATCH_SIZE = 20; const MONGO_STUFF = path.resolve(__dirname, '../../mongo stuff'); // ─── Processed record shape (matches FFBProductionData_processed.json) ──────── interface ProcessedFFB { activityId: number; productionDate: string; site: { siteId: string }; phase: { phaseId: number }; block: { blockId: number }; weight: number; weightUom: string; quantity: number; quantityUom: string; remarks: string; vector: number[]; } // ─── Retry wrapper for Ollama (handles transient connection drops) ──────────── async function embedWithRetry( embedder: OllamaEmbeddings, texts: string[], retries = 3, delayMs = 3000, ): Promise { for (let attempt = 1; attempt <= retries; attempt++) { try { return await embedder.embedDocuments(texts); } catch (err: any) { if (attempt === retries) throw err; console.log(`\n ⚠️ Ollama error (attempt ${attempt}/${retries}): ${err.message} — retrying in ${delayMs / 1000}s...`); await new Promise(r => setTimeout(r, delayMs)); } } throw new Error('embedWithRetry exhausted'); } // ─── Embedding text builder ─────────────────────────────────────────────────── function buildEmbeddingText(r: ProcessedFFB): string { const date = new Date(r.productionDate).toISOString().split('T')[0]; let text = `FFB Production Entry Date: ${date} Site: ${r.site.siteId} | Phase ID: ${r.phase.phaseId} | Block ID: ${r.block.blockId} Harvest: ${r.quantity} ${r.quantityUom}, Weight: ${r.weight} ${r.weightUom}`; if (r.remarks) text += `\n Remarks: ${r.remarks}`; return text.trim(); } // ─── Main ───────────────────────────────────────────────────────────────────── async function seed() { if (!MONGO_URI) throw new Error('MONGO_URI not set in .env'); if (!MONGO_DB_NAME) throw new Error('MONGO_DB_NAME not set in .env'); console.log('\n═══════════════════════════════════════════════════════'); console.log(' Seed: FFB Production'); console.log(` DB: ${MONGO_DB_NAME}`); console.log(` Model: ${EMBEDDING_MODEL} @ ${OLLAMA_BASE_URL}`); console.log('═══════════════════════════════════════════════════════\n'); // Load processed JSON console.log('📂 Loading FFBProductionData_processed.json...'); const records: ProcessedFFB[] = JSON.parse( fs.readFileSync(path.join(MONGO_STUFF, 'FFBProductionData_processed.json'), 'utf-8') ); console.log(` ${records.length} records loaded.\n`); // Connect to Atlas console.log('🔗 Connecting to MongoDB Atlas...'); const client = new MongoClient(MONGO_URI); await client.connect(); const db: Db = client.db(MONGO_DB_NAME); console.log(' Connected.\n'); // Load Phase and Block reference maps from Atlas (locId → _id) console.log('📋 Loading Phase and Block reference maps from Atlas...'); const phaseDocs = await db.collection('Phase').find({}, { projection: { _id: 1, locId: 1 } }).toArray(); const blockDocs = await db.collection('Block').find({}, { projection: { _id: 1, locId: 1 } }).toArray(); const phaseLocIdToId = new Map( phaseDocs.map(d => [d.locId as number, d._id as ObjectId]) ); const blockLocIdToId = new Map( blockDocs.map(d => [d.locId as number, d._id as ObjectId]) ); console.log(` Phases in Atlas : ${phaseLocIdToId.size}`); console.log(` Blocks in Atlas : ${blockLocIdToId.size}\n`); // Initialize embedder console.log('🤖 Initializing Ollama embedder...'); const embedder = new OllamaEmbeddings({ model: EMBEDDING_MODEL, baseUrl: OLLAMA_BASE_URL }); console.log(' Ready.\n'); try { // Wipe existing FFB Production collection console.log('🗑️ Clearing FFB Production collection...'); await db.collection('FFB Production').deleteMany({}); console.log(' Cleared.\n'); const totalBatches = Math.ceil(records.length / BATCH_SIZE); let inserted = 0; let skipped = 0; console.log(`📦 Processing ${records.length} records in ${totalBatches} batches of ${BATCH_SIZE}...\n`); for (let i = 0; i < records.length; i += BATCH_SIZE) { const batch = records.slice(i, i + BATCH_SIZE); const batchNum = Math.floor(i / BATCH_SIZE) + 1; process.stdout.write(` [${batchNum}/${totalBatches}] validating...`); // Validate Phase and Block existence; resolve ObjectIds const valid: Array<{ record: ProcessedFFB; phaseId: ObjectId; blockId: ObjectId }> = []; for (const r of batch) { const phaseObjectId = phaseLocIdToId.get(r.phase.phaseId); const blockObjectId = blockLocIdToId.get(r.block.blockId); if (!phaseObjectId || !blockObjectId) { skipped++; continue; } valid.push({ record: r, phaseId: phaseObjectId, blockId: blockObjectId }); } if (valid.length === 0) { console.log(` all ${batch.length - valid.length} skipped (missing Phase/Block).`); continue; } // Generate embeddings for the valid batch process.stdout.write(` embedding ${valid.length}...`); const texts = valid.map(v => buildEmbeddingText(v.record)); const vectors = await embedWithRetry(embedder, texts); // Build final documents with resolved ObjectIds const docs = valid.map((v, idx) => ({ activityId: v.record.activityId, productionDate: new Date(v.record.productionDate), site: { siteId: v.record.site.siteId, }, phase: { id: v.phaseId, // ObjectId ref to Phase._id phaseId: v.record.phase.phaseId, }, block: { id: v.blockId, // ObjectId ref to Block._id blockId: v.record.block.blockId, }, weight: v.record.weight, weightUom: v.record.weightUom, quantity: v.record.quantity, quantityUom: v.record.quantityUom, remarks: v.record.remarks, vector: vectors[idx], })); await db.collection('FFB Production').insertMany(docs); inserted += docs.length; skipped += batch.length - valid.length; const skipNote = batch.length - valid.length > 0 ? ` (${batch.length - valid.length} skipped)` : ''; console.log(` ✅ ${docs.length} inserted.${skipNote}`); } console.log('\n═══════════════════════════════════════════════════════'); console.log(' SEED COMPLETE'); console.log(` Inserted : ${inserted}`); console.log(` Skipped : ${skipped} (Phase or Block not found in Atlas)`); console.log('═══════════════════════════════════════════════════════\n'); } finally { await client.close(); } } seed().catch(err => { console.error('\n❌ Seed failed:', err.message || err); process.exit(1); });