build-ffb-processed-json.ts 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /**
  2. * Reads FFBProductionData.json and writes FFBProductionData_processed.json
  3. * conforming to the updated Data Manual schema.
  4. *
  5. * - No MongoDB connection (no _id refs in output)
  6. * - Generates remarks via Ollama qwen3:0.6b (one per unique phaseId+blockId pair)
  7. * - vector field left as [] — to be filled at seed time
  8. *
  9. * Run: npx ts-node --transpile-only scripts/build-ffb-processed-json.ts
  10. */
  11. import * as path from 'path';
  12. import * as fs from 'fs';
  13. import axios from 'axios';
  14. const OLLAMA_BASE_URL = 'http://localhost:11434';
  15. const REMARK_MODEL = 'qwen3:0.6b';
  16. const MONGO_STUFF = path.resolve(__dirname, '../../mongo stuff');
  17. // ─── Interfaces ───────────────────────────────────────────────────────────────
  18. interface RawPhase { phaseID: number; phaseCode: string; phaseName: string; phaseDesc: string; }
  19. interface RawBlock {
  20. blockID: number; blockCode: string; blockDesc: string;
  21. loc_soil_condition: string; numOfTreesPlanted: string | number | null;
  22. }
  23. interface RawFFB {
  24. activityId: number; productionDate: string; siteId: string;
  25. phaseId: number; blockId: number;
  26. net_weight: string; act_uom: string; no_of_bunches: number; qty_uom: string;
  27. }
  28. // ─── Remark topics — cycle through for variety across blocks ─────────────────
  29. const TOPICS = [
  30. 'soil or ground conditions',
  31. 'weather during harvesting',
  32. 'worker performance or fatigue',
  33. 'harvesting equipment condition',
  34. 'pest or disease observation on fronds',
  35. ];
  36. async function generateRemark(
  37. blockCode: string,
  38. soilCondition: string,
  39. phaseName: string,
  40. topicIndex: number,
  41. ): Promise<string> {
  42. const topic = TOPICS[topicIndex % TOPICS.length];
  43. const prompt = `You are an oil palm plantation field supervisor writing a brief harvest log entry.
  44. Write ONE sentence (max 25 words) about ${topic} observed during FFB harvesting.
  45. Context: Block ${blockCode}, Phase: ${phaseName}, Soil: ${soilCondition || 'mineral'}.
  46. Reply with ONLY the sentence. No quotes, no labels. /no_think`;
  47. const res = await axios.post(`${OLLAMA_BASE_URL}/api/generate`, {
  48. model: REMARK_MODEL,
  49. prompt,
  50. stream: false,
  51. });
  52. // Strip any residual <think>...</think> tags qwen3 may emit despite /no_think
  53. const raw: string = res.data.response ?? '';
  54. return raw.replace(/<think>[\s\S]*?<\/think>/gi, '').trim();
  55. }
  56. // ─── Main ─────────────────────────────────────────────────────────────────────
  57. async function main() {
  58. console.log('\n═══════════════════════════════════════════════════════');
  59. console.log(' Build FFBProductionData_processed.json');
  60. console.log('═══════════════════════════════════════════════════════\n');
  61. // Load source files
  62. const rawPhases: RawPhase[] = JSON.parse(fs.readFileSync(path.join(MONGO_STUFF, 'phaseData.json'), 'utf-8'));
  63. const rawBlocks: RawBlock[] = JSON.parse(fs.readFileSync(path.join(MONGO_STUFF, 'blockData.json'), 'utf-8'));
  64. const rawFFBs: RawFFB[] = JSON.parse(fs.readFileSync(path.join(MONGO_STUFF, 'FFBProductionData.json'), 'utf-8'));
  65. console.log(`📂 Loaded: ${rawPhases.length} phases, ${rawBlocks.length} blocks, ${rawFFBs.length} FFB records\n`);
  66. // Lookup maps
  67. const phaseById = new Map<number, RawPhase>(rawPhases.map(p => [p.phaseID, p]));
  68. const blockById = new Map<number, RawBlock>(rawBlocks.map(b => [b.blockID, b]));
  69. // Collect unique (phaseId, blockId) pairs
  70. const uniquePairs = new Map<string, { phaseId: number; blockId: number }>();
  71. for (const r of rawFFBs) {
  72. const key = `${r.phaseId}:${r.blockId}`;
  73. if (!uniquePairs.has(key)) uniquePairs.set(key, { phaseId: r.phaseId, blockId: r.blockId });
  74. }
  75. console.log(`🔑 Unique (phaseId, blockId) pairs: ${uniquePairs.size}`);
  76. console.log(`🤖 Generating one remark per pair via ${REMARK_MODEL}...\n`);
  77. // Generate one remark per unique pair
  78. const remarkCache = new Map<string, string>();
  79. let pairIndex = 0;
  80. for (const [key, { phaseId, blockId }] of uniquePairs) {
  81. const phase = phaseById.get(phaseId);
  82. const block = blockById.get(blockId);
  83. const blockCode = block?.blockCode ?? `BLK-${blockId}`;
  84. const soilCondition = block?.loc_soil_condition ?? '';
  85. const phaseName = phase?.phaseName ?? `Phase ${phaseId}`;
  86. process.stdout.write(` [${pairIndex + 1}/${uniquePairs.size}] ${blockCode} (${phaseName})...`);
  87. try {
  88. const remark = await generateRemark(blockCode, soilCondition, phaseName, pairIndex);
  89. remarkCache.set(key, remark);
  90. console.log(` ✅`);
  91. } catch (err: any) {
  92. console.log(` ⚠️ fallback`);
  93. remarkCache.set(key, 'Field conditions were normal during harvesting operations.');
  94. }
  95. pairIndex++;
  96. }
  97. console.log('\n🔄 Transforming all FFB records...');
  98. // Transform all records
  99. let skipped = 0;
  100. const output: object[] = [];
  101. for (const raw of rawFFBs) {
  102. const key = `${raw.phaseId}:${raw.blockId}`;
  103. const remark = remarkCache.get(key) ?? 'No field observations recorded.';
  104. output.push({
  105. activityId: raw.activityId,
  106. productionDate: new Date(raw.productionDate).toISOString(),
  107. site: {
  108. siteId: raw.siteId,
  109. },
  110. phase: {
  111. phaseId: raw.phaseId,
  112. },
  113. block: {
  114. blockId: raw.blockId,
  115. },
  116. weight: parseFloat(raw.net_weight) || 0,
  117. weightUom: raw.act_uom,
  118. quantity: raw.no_of_bunches,
  119. quantityUom: raw.qty_uom,
  120. remarks: remark,
  121. vector: [],
  122. });
  123. }
  124. const outPath = path.join(MONGO_STUFF, 'FFBProductionData_processed.json');
  125. fs.writeFileSync(outPath, JSON.stringify(output, null, 2), 'utf-8');
  126. console.log('\n═══════════════════════════════════════════════════════');
  127. console.log(' DONE');
  128. console.log(` Records written : ${output.length}`);
  129. console.log(` Output file : mongo stuff/FFBProductionData_processed.json`);
  130. console.log('═══════════════════════════════════════════════════════\n');
  131. // Print first 2 records as a preview
  132. console.log('── Sample (first 2 records) ──────────────────────────\n');
  133. console.log(JSON.stringify(output.slice(0, 2), null, 2));
  134. }
  135. main().catch(err => {
  136. console.error('\n❌ Failed:', err.message || err);
  137. process.exit(1);
  138. });