|
|
@@ -110,7 +110,6 @@ class TfliteService {
|
|
|
}
|
|
|
|
|
|
static List<DetectionResult> _inferenceStreamTaskWrapper(Map<String, dynamic> args) {
|
|
|
- // This is a simplified wrapper for stream inference in isolate
|
|
|
final modelBytes = args['modelBytes'] as Uint8List;
|
|
|
final labelData = args['labelData'] as String;
|
|
|
final planes = args['planes'] as List<dynamic>;
|
|
|
@@ -121,16 +120,22 @@ class TfliteService {
|
|
|
final labels = labelData.split('\n').where((l) => l.trim().isNotEmpty).map((l) => l.trim()).toList();
|
|
|
|
|
|
try {
|
|
|
- // Manual YUV to RGB conversion if needed, or use image package if possible
|
|
|
- // For speed in stream, we might want a more optimized conversion.
|
|
|
- // But for now, let's use a basic one or the image package.
|
|
|
-
|
|
|
+ final size = width < height ? width : height;
|
|
|
+ final offsetX = (width - size) ~/ 2;
|
|
|
+ final offsetY = (height - size) ~/ 2;
|
|
|
+
|
|
|
img.Image? image;
|
|
|
if (args['format'] == ImageFormatGroup.yuv420) {
|
|
|
- // Simple YUV420 to RGB (this is slow in Dart, but better in isolate)
|
|
|
- image = _convertYUV420ToImage(planes, width, height);
|
|
|
+ image = _convertYUV420ToImage(
|
|
|
+ planes: planes,
|
|
|
+ width: width,
|
|
|
+ height: height,
|
|
|
+ cropSize: size,
|
|
|
+ offsetX: offsetX,
|
|
|
+ offsetY: offsetY,
|
|
|
+ );
|
|
|
} else if (args['format'] == ImageFormatGroup.bgra8888) {
|
|
|
- image = img.Image.fromBytes(
|
|
|
+ final fullImage = img.Image.fromBytes(
|
|
|
width: width,
|
|
|
height: height,
|
|
|
bytes: planes[0]['bytes'].buffer,
|
|
|
@@ -138,6 +143,7 @@ class TfliteService {
|
|
|
numChannels: 4,
|
|
|
order: img.ChannelOrder.bgra,
|
|
|
);
|
|
|
+ image = img.copyCrop(fullImage, x: offsetX, y: offsetY, width: size, height: size);
|
|
|
}
|
|
|
|
|
|
if (image == null) return [];
|
|
|
@@ -162,13 +168,30 @@ class TfliteService {
|
|
|
);
|
|
|
|
|
|
interpreter.run(inputTensor, outputTensor);
|
|
|
- return _decodeDetections(outputTensor[0], labels);
|
|
|
+
|
|
|
+ // Map detections back to full frame
|
|
|
+ return _decodeDetections(
|
|
|
+ outputTensor[0],
|
|
|
+ labels,
|
|
|
+ cropSize: size,
|
|
|
+ offsetX: offsetX,
|
|
|
+ offsetY: offsetY,
|
|
|
+ fullWidth: width,
|
|
|
+ fullHeight: height
|
|
|
+ );
|
|
|
} finally {
|
|
|
interpreter.close();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- static img.Image _convertYUV420ToImage(List<dynamic> planes, int width, int height) {
|
|
|
+ static img.Image _convertYUV420ToImage({
|
|
|
+ required List<dynamic> planes,
|
|
|
+ required int width,
|
|
|
+ required int height,
|
|
|
+ required int cropSize,
|
|
|
+ required int offsetX,
|
|
|
+ required int offsetY,
|
|
|
+ }) {
|
|
|
final yPlane = planes[0];
|
|
|
final uPlane = planes[1];
|
|
|
final vPlane = planes[2];
|
|
|
@@ -181,12 +204,18 @@ class TfliteService {
|
|
|
final uvRowStride = uPlane['bytesPerRow'] as int;
|
|
|
final uvPixelStride = uPlane['bytesPerPixel'] as int;
|
|
|
|
|
|
- final image = img.Image(width: width, height: height);
|
|
|
+ final image = img.Image(width: cropSize, height: cropSize);
|
|
|
+
|
|
|
+ for (int y = 0; y < cropSize; y++) {
|
|
|
+ for (int x = 0; x < cropSize; x++) {
|
|
|
+ final int actualX = x + offsetX;
|
|
|
+ final int actualY = y + offsetY;
|
|
|
|
|
|
- for (int y = 0; y < height; y++) {
|
|
|
- for (int x = 0; x < width; x++) {
|
|
|
- final int uvIndex = (uvRowStride * (y / 2).floor()) + (uvPixelStride * (x / 2).floor());
|
|
|
- final int yIndex = (y * yRowStride) + x;
|
|
|
+ final int uvIndex = (uvRowStride * (actualY / 2).floor()) + (uvPixelStride * (actualX / 2).floor());
|
|
|
+ final int yIndex = (actualY * yRowStride) + actualX;
|
|
|
+
|
|
|
+ // Ensure we don't go out of bounds
|
|
|
+ if (yIndex >= yBytes.length || uvIndex >= uBytes.length || uvIndex >= vBytes.length) continue;
|
|
|
|
|
|
final int yp = yBytes[yIndex];
|
|
|
final int up = uBytes[uvIndex];
|
|
|
@@ -203,17 +232,34 @@ class TfliteService {
|
|
|
return image;
|
|
|
}
|
|
|
|
|
|
- static List<DetectionResult> _decodeDetections(List<List<double>> rawDetections, List<String> labels) {
|
|
|
+ static List<DetectionResult> _decodeDetections(
|
|
|
+ List<List<double>> rawDetections,
|
|
|
+ List<String> labels, {
|
|
|
+ int? cropSize,
|
|
|
+ int? offsetX,
|
|
|
+ int? offsetY,
|
|
|
+ int? fullWidth,
|
|
|
+ int? fullHeight,
|
|
|
+ }) {
|
|
|
final detections = <DetectionResult>[];
|
|
|
for (final det in rawDetections) {
|
|
|
if (det.length < 6) continue;
|
|
|
final conf = det[4];
|
|
|
if (conf < _confidenceThreshold) continue;
|
|
|
|
|
|
- final x1 = det[0].clamp(0.0, 1.0);
|
|
|
- final y1 = det[1].clamp(0.0, 1.0);
|
|
|
- final x2 = det[2].clamp(0.0, 1.0);
|
|
|
- final y2 = det[3].clamp(0.0, 1.0);
|
|
|
+ double x1 = det[0].clamp(0.0, 1.0);
|
|
|
+ double y1 = det[1].clamp(0.0, 1.0);
|
|
|
+ double x2 = det[2].clamp(0.0, 1.0);
|
|
|
+ double y2 = det[3].clamp(0.0, 1.0);
|
|
|
+
|
|
|
+ // If crop info is provided, map back to full frame
|
|
|
+ if (cropSize != null && offsetX != null && offsetY != null && fullWidth != null && fullHeight != null) {
|
|
|
+ x1 = (x1 * cropSize + offsetX) / fullWidth;
|
|
|
+ x2 = (x2 * cropSize + offsetX) / fullWidth;
|
|
|
+ y1 = (y1 * cropSize + offsetY) / fullHeight;
|
|
|
+ y2 = (y2 * cropSize + offsetY) / fullHeight;
|
|
|
+ }
|
|
|
+
|
|
|
final classId = det[5].round();
|
|
|
|
|
|
if (x2 <= x1 || y2 <= y1) continue;
|
|
|
@@ -263,7 +309,15 @@ class TfliteService {
|
|
|
final decoded = img.decodeImage(imageBytes);
|
|
|
if (decoded == null) throw Exception('Could not decode image');
|
|
|
|
|
|
- final resized = img.copyResize(decoded, width: _inputSize, height: _inputSize, interpolation: img.Interpolation.linear);
|
|
|
+ // Center-Square Crop
|
|
|
+ final int width = decoded.width;
|
|
|
+ final int height = decoded.height;
|
|
|
+ final int size = width < height ? width : height;
|
|
|
+ final int offsetX = (width - size) ~/ 2;
|
|
|
+ final int offsetY = (height - size) ~/ 2;
|
|
|
+
|
|
|
+ final cropped = img.copyCrop(decoded, x: offsetX, y: offsetY, width: size, height: size);
|
|
|
+ final resized = img.copyResize(cropped, width: _inputSize, height: _inputSize, interpolation: img.Interpolation.linear);
|
|
|
|
|
|
final inputTensor = List.generate(1, (_) =>
|
|
|
List.generate(_inputSize, (y) =>
|
|
|
@@ -276,18 +330,25 @@ class TfliteService {
|
|
|
|
|
|
// 3. Prepare output
|
|
|
final outputShape = interpreter.getOutputTensors()[0].shape;
|
|
|
- final numDetections = outputShape[1];
|
|
|
- final numFields = outputShape[2];
|
|
|
final outputTensor = List.generate(1, (_) =>
|
|
|
- List.generate(numDetections, (_) =>
|
|
|
- List<double>.filled(numFields, 0.0)
|
|
|
+ List.generate(outputShape[1], (_) =>
|
|
|
+ List<double>.filled(outputShape[2], 0.0)
|
|
|
)
|
|
|
);
|
|
|
|
|
|
// 4. Run
|
|
|
interpreter.run(inputTensor, outputTensor);
|
|
|
|
|
|
- return _decodeDetections(outputTensor[0], labels);
|
|
|
+ // Map detections back to full frame
|
|
|
+ return _decodeDetections(
|
|
|
+ outputTensor[0],
|
|
|
+ labels,
|
|
|
+ cropSize: size,
|
|
|
+ offsetX: offsetX,
|
|
|
+ offsetY: offsetY,
|
|
|
+ fullWidth: width,
|
|
|
+ fullHeight: height
|
|
|
+ );
|
|
|
} finally {
|
|
|
interpreter.close();
|
|
|
}
|