sample-ffb-processed.ts 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. /**
  2. * Generates a sample of 3 processed FFB Production records to validate:
  3. * - New nested schema shape (site/phase/block objects)
  4. * - Ollama qwen3:0.6b remark generation quality
  5. *
  6. * Run: npx ts-node --transpile-only scripts/sample-ffb-processed.ts
  7. */
  8. import * as dotenv from 'dotenv';
  9. import * as path from 'path';
  10. import * as fs from 'fs';
  11. import { MongoClient, ObjectId } from 'mongodb';
  12. dotenv.config({ path: path.resolve(__dirname, '../.env') });
  13. const MONGO_URI = process.env.MONGO_URI!;
  14. const MONGO_DB_NAME = process.env.MONGO_DB_NAME!;
  15. const OLLAMA_BASE_URL = 'http://localhost:11434';
  16. const REMARK_MODEL = 'qwen3:0.6b';
  17. const MONGO_STUFF = path.resolve(__dirname, '../../mongo stuff');
  18. const SAMPLE_SIZE = 3;
  19. // ─── Source interfaces ────────────────────────────────────────────────────────
  20. interface RawPhase { phaseID: number; phaseCode: string; phaseName: string; phaseDesc: string; }
  21. interface RawBlock {
  22. blockID: number; blockCode: string; blockDesc: string; loc_type: string;
  23. numOfTreesPlanted: string | number | null; totalPlantedArea: string | number | null;
  24. loc_soil_condition: string; plantedLocUOM: string;
  25. }
  26. interface RawFFB {
  27. activityId: number; productionDate: string; siteId: string;
  28. phaseId: number; blockId: number;
  29. net_weight: string; act_uom: string; no_of_bunches: number; qty_uom: string;
  30. }
  31. // ─── Ollama generate (non-streaming) ─────────────────────────────────────────
  32. async function generateRemark(blockCode: string, soilCondition: string, phaseName: string): Promise<string> {
  33. const prompt = `You are an oil palm plantation field supervisor writing a brief harvest observation note.
  34. Write ONE short sentence (max 25 words) about field conditions observed during FFB harvesting today.
  35. Context: Block ${blockCode}, Phase: ${phaseName}, Soil type: ${soilCondition || 'mineral'}.
  36. Your sentence must mention one of: soil/ground conditions, weather, worker performance, equipment, or pest/disease observation.
  37. Reply with ONLY the observation sentence. No quotes, no labels, no preamble. /no_think`;
  38. const res = await fetch(`${OLLAMA_BASE_URL}/api/generate`, {
  39. method: 'POST',
  40. headers: { 'Content-Type': 'application/json' },
  41. body: JSON.stringify({ model: REMARK_MODEL, prompt, stream: false }),
  42. });
  43. if (!res.ok) throw new Error(`Ollama generate failed: ${res.status} ${res.statusText}`);
  44. const json = (await res.json()) as { response: string };
  45. return json.response.trim();
  46. }
  47. // ─── Main ─────────────────────────────────────────────────────────────────────
  48. async function main() {
  49. console.log('\n═══════════════════════════════════════════════');
  50. console.log(' FFB Processed JSON — Sample Preview (3 recs)');
  51. console.log('═══════════════════════════════════════════════\n');
  52. // Load source files
  53. const rawPhases: RawPhase[] = JSON.parse(fs.readFileSync(path.join(MONGO_STUFF, 'phaseData.json'), 'utf-8'));
  54. const rawBlocks: RawBlock[] = JSON.parse(fs.readFileSync(path.join(MONGO_STUFF, 'blockData.json'), 'utf-8'));
  55. const rawFFBs: RawFFB[] = JSON.parse(fs.readFileSync(path.join(MONGO_STUFF, 'FFBProductionData.json'), 'utf-8'));
  56. // In-memory lookup maps (integer ID → raw data)
  57. const phaseById = new Map<number, RawPhase>(rawPhases.map(p => [p.phaseID, p]));
  58. const blockById = new Map<number, RawBlock>(rawBlocks.map(b => [b.blockID, b]));
  59. // Connect to Atlas to get actual _id ObjectIds from Phase and Block collections
  60. console.log('🔗 Connecting to Atlas to resolve ObjectIds...');
  61. const client = new MongoClient(MONGO_URI);
  62. await client.connect();
  63. const db = client.db(MONGO_DB_NAME);
  64. // Fetch all Phase and Block docs (small collections — 13 phases, 598 blocks)
  65. const phaseDocs = await db.collection('Phase').find({}, { projection: { _id: 1, locId: 1, phaseCode: 1 } }).toArray();
  66. const blockDocs = await db.collection('Block').find({}, { projection: { _id: 1, locId: 1, blockCode: 1 } }).toArray();
  67. await client.close();
  68. // Map locId (== original phaseID / blockID) → MongoDB _id
  69. const phaseLocIdToMongoId = new Map<number, ObjectId>(phaseDocs.map(d => [d.locId as number, d._id as ObjectId]));
  70. const blockLocIdToMongoId = new Map<number, ObjectId>(blockDocs.map(d => [d.locId as number, d._id as ObjectId]));
  71. console.log(` Phase ObjectIds resolved: ${phaseLocIdToMongoId.size}`);
  72. console.log(` Block ObjectIds resolved: ${blockLocIdToMongoId.size}\n`);
  73. // Take first SAMPLE_SIZE records that are fully resolvable
  74. const sample: RawFFB[] = [];
  75. for (const raw of rawFFBs) {
  76. if (sample.length >= SAMPLE_SIZE) break;
  77. if (phaseById.has(raw.phaseId) && blockById.has(raw.blockId) &&
  78. phaseLocIdToMongoId.has(raw.phaseId) && blockLocIdToMongoId.has(raw.blockId)) {
  79. sample.push(raw);
  80. }
  81. }
  82. console.log(`📋 Generating remarks for ${sample.length} sample records via ${REMARK_MODEL}...\n`);
  83. const output: object[] = [];
  84. for (let i = 0; i < sample.length; i++) {
  85. const raw = sample[i];
  86. const rawPhase = phaseById.get(raw.phaseId)!;
  87. const rawBlock = blockById.get(raw.blockId)!;
  88. const phaseMongoId = phaseLocIdToMongoId.get(raw.phaseId)!;
  89. const blockMongoId = blockLocIdToMongoId.get(raw.blockId)!;
  90. process.stdout.write(` [${i + 1}/${sample.length}] activityId=${raw.activityId} → generating remark...`);
  91. const remark = await generateRemark(rawBlock.blockCode, rawBlock.loc_soil_condition, rawPhase.phaseName);
  92. console.log(` ✅`);
  93. console.log(` "${remark}"\n`);
  94. output.push({
  95. activityId: raw.activityId,
  96. productionDate: new Date(raw.productionDate).toISOString(),
  97. site: {
  98. _id: null, // Site collection not yet seeded; placeholder
  99. siteId: raw.siteId,
  100. },
  101. phase: {
  102. id: phaseMongoId.toHexString(), // actual ObjectId from Phase collection
  103. phaseId: raw.phaseId,
  104. },
  105. block: {
  106. id: blockMongoId.toHexString(), // actual ObjectId from Block collection
  107. blockId: raw.blockId,
  108. },
  109. weight: parseFloat(raw.net_weight) || 0,
  110. weightUom: raw.act_uom,
  111. quantity: raw.no_of_bunches,
  112. quantityUom: raw.qty_uom,
  113. remarks: remark,
  114. vector: [], // to be filled during full seed run
  115. });
  116. }
  117. // Pretty-print to console and write sample file
  118. const outPath = path.join(MONGO_STUFF, 'FFBProductionData_sample.json');
  119. const pretty = JSON.stringify(output, null, 2);
  120. fs.writeFileSync(outPath, pretty, 'utf-8');
  121. console.log('═══════════════════════════════════════════════');
  122. console.log(' SAMPLE OUTPUT');
  123. console.log('═══════════════════════════════════════════════\n');
  124. console.log(pretty);
  125. console.log(`\n✅ Written to: mongo stuff/FFBProductionData_sample.json`);
  126. }
  127. main().catch(err => {
  128. console.error('\n❌ Sample failed:', err.message || err);
  129. process.exit(1);
  130. });