/** * Reads FFBProductionData.json and writes FFBProductionData_processed.json * conforming to the updated Data Manual schema. * * - No MongoDB connection (no _id refs in output) * - Generates remarks via Ollama qwen3:0.6b (one per unique phaseId+blockId pair) * - vector field left as [] — to be filled at seed time * * Run: npx ts-node --transpile-only scripts/build-ffb-processed-json.ts */ import * as path from 'path'; import * as fs from 'fs'; import axios from 'axios'; const OLLAMA_BASE_URL = 'http://localhost:11434'; const REMARK_MODEL = 'qwen3:0.6b'; const MONGO_STUFF = path.resolve(__dirname, '../../mongo stuff'); // ─── Interfaces ─────────────────────────────────────────────────────────────── interface RawPhase { phaseID: number; phaseCode: string; phaseName: string; phaseDesc: string; } interface RawBlock { blockID: number; blockCode: string; blockDesc: string; loc_soil_condition: string; numOfTreesPlanted: string | number | null; } interface RawFFB { activityId: number; productionDate: string; siteId: string; phaseId: number; blockId: number; net_weight: string; act_uom: string; no_of_bunches: number; qty_uom: string; } // ─── Remark topics — cycle through for variety across blocks ───────────────── const TOPICS = [ 'soil or ground conditions', 'weather during harvesting', 'worker performance or fatigue', 'harvesting equipment condition', 'pest or disease observation on fronds', ]; async function generateRemark( blockCode: string, soilCondition: string, phaseName: string, topicIndex: number, ): Promise { const topic = TOPICS[topicIndex % TOPICS.length]; const prompt = `You are an oil palm plantation field supervisor writing a brief harvest log entry. Write ONE sentence (max 25 words) about ${topic} observed during FFB harvesting. Context: Block ${blockCode}, Phase: ${phaseName}, Soil: ${soilCondition || 'mineral'}. Reply with ONLY the sentence. No quotes, no labels. /no_think`; const res = await axios.post(`${OLLAMA_BASE_URL}/api/generate`, { model: REMARK_MODEL, prompt, stream: false, }); // Strip any residual ... tags qwen3 may emit despite /no_think const raw: string = res.data.response ?? ''; return raw.replace(/[\s\S]*?<\/think>/gi, '').trim(); } // ─── Main ───────────────────────────────────────────────────────────────────── async function main() { console.log('\n═══════════════════════════════════════════════════════'); console.log(' Build FFBProductionData_processed.json'); console.log('═══════════════════════════════════════════════════════\n'); // Load source files const rawPhases: RawPhase[] = JSON.parse(fs.readFileSync(path.join(MONGO_STUFF, 'phaseData.json'), 'utf-8')); const rawBlocks: RawBlock[] = JSON.parse(fs.readFileSync(path.join(MONGO_STUFF, 'blockData.json'), 'utf-8')); const rawFFBs: RawFFB[] = JSON.parse(fs.readFileSync(path.join(MONGO_STUFF, 'FFBProductionData.json'), 'utf-8')); console.log(`📂 Loaded: ${rawPhases.length} phases, ${rawBlocks.length} blocks, ${rawFFBs.length} FFB records\n`); // Lookup maps const phaseById = new Map(rawPhases.map(p => [p.phaseID, p])); const blockById = new Map(rawBlocks.map(b => [b.blockID, b])); // Collect unique (phaseId, blockId) pairs const uniquePairs = new Map(); for (const r of rawFFBs) { const key = `${r.phaseId}:${r.blockId}`; if (!uniquePairs.has(key)) uniquePairs.set(key, { phaseId: r.phaseId, blockId: r.blockId }); } console.log(`🔑 Unique (phaseId, blockId) pairs: ${uniquePairs.size}`); console.log(`🤖 Generating one remark per pair via ${REMARK_MODEL}...\n`); // Generate one remark per unique pair const remarkCache = new Map(); let pairIndex = 0; for (const [key, { phaseId, blockId }] of uniquePairs) { const phase = phaseById.get(phaseId); const block = blockById.get(blockId); const blockCode = block?.blockCode ?? `BLK-${blockId}`; const soilCondition = block?.loc_soil_condition ?? ''; const phaseName = phase?.phaseName ?? `Phase ${phaseId}`; process.stdout.write(` [${pairIndex + 1}/${uniquePairs.size}] ${blockCode} (${phaseName})...`); try { const remark = await generateRemark(blockCode, soilCondition, phaseName, pairIndex); remarkCache.set(key, remark); console.log(` ✅`); } catch (err: any) { console.log(` ⚠️ fallback`); remarkCache.set(key, 'Field conditions were normal during harvesting operations.'); } pairIndex++; } console.log('\n🔄 Transforming all FFB records...'); // Transform all records let skipped = 0; const output: object[] = []; for (const raw of rawFFBs) { const key = `${raw.phaseId}:${raw.blockId}`; const remark = remarkCache.get(key) ?? 'No field observations recorded.'; output.push({ activityId: raw.activityId, productionDate: new Date(raw.productionDate).toISOString(), site: { siteId: raw.siteId, }, phase: { phaseId: raw.phaseId, }, block: { blockId: raw.blockId, }, weight: parseFloat(raw.net_weight) || 0, weightUom: raw.act_uom, quantity: raw.no_of_bunches, quantityUom: raw.qty_uom, remarks: remark, vector: [], }); } const outPath = path.join(MONGO_STUFF, 'FFBProductionData_processed.json'); fs.writeFileSync(outPath, JSON.stringify(output, null, 2), 'utf-8'); console.log('\n═══════════════════════════════════════════════════════'); console.log(' DONE'); console.log(` Records written : ${output.length}`); console.log(` Output file : mongo stuff/FFBProductionData_processed.json`); console.log('═══════════════════════════════════════════════════════\n'); // Print first 2 records as a preview console.log('── Sample (first 2 records) ──────────────────────────\n'); console.log(JSON.stringify(output.slice(0, 2), null, 2)); } main().catch(err => { console.error('\n❌ Failed:', err.message || err); process.exit(1); });