Преглед на файлове

added comments to the logic

Dr-Swopt преди 13 часа
родител
ревизия
39bd5f56f1
променени са 1 файла, в които са добавени 16 реда и са изтрити 2 реда
  1. 16 2
      src/api/main.py

+ 16 - 2
src/api/main.py

@@ -72,11 +72,19 @@ class ModelManager:
         self.benchmark_class_names = self.benchmark_model.names
 
     def preprocess_onnx(self, img: Image.Image):
+        """ Color Standardization: img.convert("RGB") ensures the image has three color channels (Red, Green, Blue), removing any 
+        alpha/transparency channels that would break the math """
         img = img.convert("RGB")
         orig_w, orig_h = img.size
+        """ Resolution Scaling: img.resize((640, 640)) forces the image into the $640 \times 640$ resolution the model was trained on """
         img_resized = img.resize((640, 640))
+        """ Normalization: np.array(...) / 255.0 converts pixel values (0–255) into a decimal range ($0.0$ to $1.0$), which is the 
+        standard input for deep learning models. """
         img_array = np.array(img_resized) / 255.0
+        """ Channel Transposition: img_array.transpose(2, 0, 1) changes the memory layout from HWC (Height, Width, Channels) to CHW
+        (Channels, Height, Width) to match the ONNX model's requirements """
         img_array = img_array.transpose(2, 0, 1)
+        """ Batch Dimension: reshape(1, 3, 640, 640) adds a "batch" dimension, telling the model it is processing exactly one image. """
         img_array = img_array.reshape(1, 3, 640, 640).astype(np.float32)
         return img_array, orig_w, orig_h
 
@@ -92,7 +100,7 @@ class ModelManager:
         # ONNX Output: [batch, num_boxes, 6] (Where 6: x1, y1, x2, y2, conf, cls)
         # Note: YOLOv8 endpoints often produce normalized coordinates (0.0 to 1.0)
         detections_batch = outputs[0]
-        
+        # Post-processing: Filter by confidence and convert coordinates back to original image scale
         detections = []
         valid_count = 0
         for i in range(detections_batch.shape[1]):
@@ -100,6 +108,9 @@ class ModelManager:
             conf = float(det[4])
             if conf >= conf_threshold:
                 valid_count += 1
+                """ Coordinate Remapping: Since the model was run on a $640 \times 640$ version of the image, the coordinates are 
+                normalized ($0.0$ to $1.0$). This function multiplies them by orig_w and orig_h to map the bounding boxes back to
+                the original full-resolution image. """
                 # 1. Coordinate Scaling: Convert normalized (0.0-1.0) to absolute pixels
                 x1, y1, x2, y2 = det[:4]
                 abs_x1 = x1 * orig_w
@@ -109,7 +120,8 @@ class ModelManager:
                 
                 class_id = int(det[5])
                 class_name = self.class_names.get(class_id, "Unknown")
-                
+                """ Industrial Logic: It maps the numeric class_id to standard MPOB names (e.g., "Ripe", "Abnormal"). It also sets 
+                the is_health_alert flag if the class is "Abnormal" or "Empty_Bunch" to trigger management warnings """
                 detections.append({
                     "bunch_id": valid_count,
                     "class": class_name,
@@ -134,7 +146,9 @@ class ModelManager:
         end_inf = time.perf_counter()
         inference_ms = (end_inf - start_inf) * 1000
 
+        # Post-processing: Extract detections and apply industrial logic
         detections = []
+        """ Bounding Boxes: It iterates through results[0].boxes, extracting the absolute pixel coordinates via box.xyxy. """
         for i, box in enumerate(results[0].boxes):
             class_id = int(box.cls)
             class_name = names.get(class_id, "Unknown")