|
|
@@ -72,11 +72,19 @@ class ModelManager:
|
|
|
self.benchmark_class_names = self.benchmark_model.names
|
|
|
|
|
|
def preprocess_onnx(self, img: Image.Image):
|
|
|
+ """ Color Standardization: img.convert("RGB") ensures the image has three color channels (Red, Green, Blue), removing any
|
|
|
+ alpha/transparency channels that would break the math """
|
|
|
img = img.convert("RGB")
|
|
|
orig_w, orig_h = img.size
|
|
|
+ """ Resolution Scaling: img.resize((640, 640)) forces the image into the $640 \times 640$ resolution the model was trained on """
|
|
|
img_resized = img.resize((640, 640))
|
|
|
+ """ Normalization: np.array(...) / 255.0 converts pixel values (0–255) into a decimal range ($0.0$ to $1.0$), which is the
|
|
|
+ standard input for deep learning models. """
|
|
|
img_array = np.array(img_resized) / 255.0
|
|
|
+ """ Channel Transposition: img_array.transpose(2, 0, 1) changes the memory layout from HWC (Height, Width, Channels) to CHW
|
|
|
+ (Channels, Height, Width) to match the ONNX model's requirements """
|
|
|
img_array = img_array.transpose(2, 0, 1)
|
|
|
+ """ Batch Dimension: reshape(1, 3, 640, 640) adds a "batch" dimension, telling the model it is processing exactly one image. """
|
|
|
img_array = img_array.reshape(1, 3, 640, 640).astype(np.float32)
|
|
|
return img_array, orig_w, orig_h
|
|
|
|
|
|
@@ -92,7 +100,7 @@ class ModelManager:
|
|
|
# ONNX Output: [batch, num_boxes, 6] (Where 6: x1, y1, x2, y2, conf, cls)
|
|
|
# Note: YOLOv8 endpoints often produce normalized coordinates (0.0 to 1.0)
|
|
|
detections_batch = outputs[0]
|
|
|
-
|
|
|
+ # Post-processing: Filter by confidence and convert coordinates back to original image scale
|
|
|
detections = []
|
|
|
valid_count = 0
|
|
|
for i in range(detections_batch.shape[1]):
|
|
|
@@ -100,6 +108,9 @@ class ModelManager:
|
|
|
conf = float(det[4])
|
|
|
if conf >= conf_threshold:
|
|
|
valid_count += 1
|
|
|
+ """ Coordinate Remapping: Since the model was run on a $640 \times 640$ version of the image, the coordinates are
|
|
|
+ normalized ($0.0$ to $1.0$). This function multiplies them by orig_w and orig_h to map the bounding boxes back to
|
|
|
+ the original full-resolution image. """
|
|
|
# 1. Coordinate Scaling: Convert normalized (0.0-1.0) to absolute pixels
|
|
|
x1, y1, x2, y2 = det[:4]
|
|
|
abs_x1 = x1 * orig_w
|
|
|
@@ -109,7 +120,8 @@ class ModelManager:
|
|
|
|
|
|
class_id = int(det[5])
|
|
|
class_name = self.class_names.get(class_id, "Unknown")
|
|
|
-
|
|
|
+ """ Industrial Logic: It maps the numeric class_id to standard MPOB names (e.g., "Ripe", "Abnormal"). It also sets
|
|
|
+ the is_health_alert flag if the class is "Abnormal" or "Empty_Bunch" to trigger management warnings """
|
|
|
detections.append({
|
|
|
"bunch_id": valid_count,
|
|
|
"class": class_name,
|
|
|
@@ -134,7 +146,9 @@ class ModelManager:
|
|
|
end_inf = time.perf_counter()
|
|
|
inference_ms = (end_inf - start_inf) * 1000
|
|
|
|
|
|
+ # Post-processing: Extract detections and apply industrial logic
|
|
|
detections = []
|
|
|
+ """ Bounding Boxes: It iterates through results[0].boxes, extracting the absolute pixel coordinates via box.xyxy. """
|
|
|
for i, box in enumerate(results[0].boxes):
|
|
|
class_id = int(box.cls)
|
|
|
class_name = names.get(class_id, "Unknown")
|