2 месяцев назад · 767dfdbfc9
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ openai
 
															 python-dotenv
														
 
															 python-multipart
														
 
															 pydantic
														
 
															+Pillow
														
--- a/src/services/openai_service.py
+++ b/src/services/openai_service.py
@@ -1,19 +1,55 @@
 
															 import os
														
 
															 import base64
														
 
															-from openai import OpenAI
														
 
															+import io
														
 
															+import logging
														
 
															+from openai import AsyncOpenAI
														
 
															 from dotenv import load_dotenv
														
 
															+from PIL import Image
														
 
															 from src.schemas import ExtractionResponse
														
 
															 load_dotenv()
														
 
															-client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
														
 
															+# Setup logging
														
 
															+logging.basicConfig(level=logging.INFO)
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
														
 
															+
														
 
															+MAX_IMAGE_SIZE = (2000, 2000)
														
 
															+IMAGE_QUALITY = 85
														
 
															+
														
 
															+def compress_image(image_content: bytes) -> bytes:
														
 
															+    """Resizes and compresses the image to reduce upload size."""
														
 
															+    img = Image.open(io.BytesIO(image_content))
														
 
															+    
														
 
															+    # Convert to RGB if necessary (to save as JPEG)
														
 
															+    if img.mode in ("RGBA", "P"):
														
 
															+        img = img.convert("RGB")
														
 
															+    
														
 
															+    # Resize if larger than max dimensions
														
 
															+    img.thumbnail(MAX_IMAGE_SIZE, Image.Resampling.LANCZOS)
														
 
															+    
														
 
															+    # Save to bytes
														
 
															+    output_buffer = io.BytesIO()
														
 
															+    img.save(output_buffer, format="JPEG", quality=IMAGE_QUALITY, optimize=True)
														
 
															+    return output_buffer.getvalue()
														
 
															 async def extract_receipt_data(image_content: bytes, user_name: str, department: str) -> ExtractionResponse:
														
 
															-    base64_image = base64.b64encode(image_content).decode("utf-8")
														
 
															+    # 1. Compress Image
														
 
															+    compressed_content = compress_image(image_content)
														
 
															+    base64_image = base64.b64encode(compressed_content).decode("utf-8")
														
 
															-    prompt = f"You are an HR data entry assistant. Extract the requested fields from the provided medical receipt image. The employee submitting this is {user_name} from {department}. If the date is missing, look for a 'Payment Date' as a fallback."
														
 
															+    # 2. Refined Prompt
														
 
															+    prompt = (
														
 
															+        f"You are an HR data entry assistant. Extract the requested fields from the provided medical receipt image. "
														
 
															+        f"The employee submitting this is {user_name} from {department}. "
														
 
															+        f"If the date is missing, look for a 'Payment Date' as a fallback. "
														
 
															+        f"Analyze the receipt for authenticity. If the total amount appears altered or if the provider name is missing, "
														
 
															+        f"set `needs_manual_review` to `true` and provide a low `confidence_score`."
														
 
															+    )
														
 
															-    completion = client.beta.chat.completions.parse(
														
 
															+    # 3. Async Extraction
														
 
															+    completion = await client.beta.chat.completions.parse(
														
 
															         model="gpt-4o",
														
 
															         messages=[
														
 
															             {
														
@@ -30,4 +66,12 @@ async def extract_receipt_data(image_content: bytes, user_name: str, department:
 
															         response_format=ExtractionResponse,
														
 
															     )
														
 
															-    return completion.choices[0].message.parsed
														
 
															+    result = completion.choices[0].message.parsed
														
 
															+    
														
 
															+    # 4. Logging for Demo
														
 
															+    if result:
														
 
															+        logger.info(f"Extraction complete for {user_name}. Confidence Score: {result.confidence_score}")
														
 
															+        if result.needs_manual_review:
														
 
															+            logger.warning(f"Manual review required for receipt submitted by {user_name}")
														
 
															+            
														
 
															+    return result