|
|
@@ -6,6 +6,7 @@ import json
|
|
|
from openai import AsyncOpenAI
|
|
|
from dotenv import load_dotenv
|
|
|
from PIL import Image
|
|
|
+import fitz # PyMuPDF
|
|
|
from backend.schemas import ExtractionResponse, V2TemplateResponse
|
|
|
|
|
|
load_dotenv()
|
|
|
@@ -35,9 +36,37 @@ def compress_image(image_content: bytes) -> bytes:
|
|
|
img.save(output_buffer, format="JPEG", quality=IMAGE_QUALITY, optimize=True)
|
|
|
return output_buffer.getvalue()
|
|
|
|
|
|
-async def extract_receipt_data(image_content: bytes, user_name: str, department: str) -> ExtractionResponse:
|
|
|
- # 1. Compress Image
|
|
|
- compressed_content = compress_image(image_content)
|
|
|
+def convert_to_image_bytes(content: bytes, content_type: str) -> bytes:
|
|
|
+ """If PDF, convert first page to image using PyMuPDF. Otherwise return original."""
|
|
|
+ if content_type == "application/pdf":
|
|
|
+ try:
|
|
|
+ # Open PDF from bytes
|
|
|
+ doc = fitz.open(stream=content, filetype="pdf")
|
|
|
+ if len(doc) == 0:
|
|
|
+ return content
|
|
|
+
|
|
|
+ # Get the first page
|
|
|
+ page = doc[0]
|
|
|
+
|
|
|
+ # Render page to a pixmap (300 DPI: scale=300/72 = 4.166...)
|
|
|
+ # For high quality OCR, a scale of 2.0 or 3.0 is usually sufficient
|
|
|
+ matrix = fitz.Matrix(2.0, 2.0)
|
|
|
+ pix = page.get_pixmap(matrix=matrix)
|
|
|
+
|
|
|
+ # Convert pixmap to JPEG bytes
|
|
|
+ img_data = pix.tobytes("jpeg")
|
|
|
+ doc.close()
|
|
|
+ return img_data
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"PDF conversion failed: {str(e)}")
|
|
|
+ return content
|
|
|
+ return content
|
|
|
+
|
|
|
+async def extract_receipt_data(image_content: bytes, content_type: str, user_name: str, department: str) -> ExtractionResponse:
|
|
|
+ # 1. Convert if PDF
|
|
|
+ raw_image = convert_to_image_bytes(image_content, content_type)
|
|
|
+ # 2. Compress Image
|
|
|
+ compressed_content = compress_image(raw_image)
|
|
|
base64_image = base64.b64encode(compressed_content).decode("utf-8")
|
|
|
|
|
|
# 2. Refined Prompt
|
|
|
@@ -89,9 +118,11 @@ async def extract_receipt_data(image_content: bytes, user_name: str, department:
|
|
|
|
|
|
return result
|
|
|
|
|
|
-async def fill_form_with_template_v2(image_content: bytes, template_fields: dict, user_name: str, department: str) -> V2TemplateResponse:
|
|
|
- # 1. Compress Image
|
|
|
- compressed_content = compress_image(image_content)
|
|
|
+async def fill_form_with_template_v2(image_content: bytes, content_type: str, template_fields: dict, user_name: str, department: str) -> V2TemplateResponse:
|
|
|
+ # 1. Convert if PDF
|
|
|
+ raw_image = convert_to_image_bytes(image_content, content_type)
|
|
|
+ # 2. Compress Image
|
|
|
+ compressed_content = compress_image(raw_image)
|
|
|
base64_image = base64.b64encode(compressed_content).decode("utf-8")
|
|
|
|
|
|
# 2. V2 Prompt
|