import os import base64 import io import logging import json from openai import AsyncOpenAI from dotenv import load_dotenv from PIL import Image import fitz # PyMuPDF from backend.schemas import ExtractionResponse, V2TemplateResponse load_dotenv() # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) MAX_IMAGE_SIZE = (2000, 2000) IMAGE_QUALITY = 85 def compress_image(image_content: bytes) -> bytes: """Resizes and compresses the image to reduce upload size.""" img = Image.open(io.BytesIO(image_content)) # Convert to RGB if necessary (to save as JPEG) if img.mode in ("RGBA", "P"): img = img.convert("RGB") # Resize if larger than max dimensions img.thumbnail(MAX_IMAGE_SIZE, Image.Resampling.LANCZOS) # Save to bytes output_buffer = io.BytesIO() img.save(output_buffer, format="JPEG", quality=IMAGE_QUALITY, optimize=True) return output_buffer.getvalue() def convert_to_image_bytes(content: bytes, content_type: str) -> bytes: """If PDF, convert first page to image using PyMuPDF. Otherwise return original.""" if content_type == "application/pdf": try: # Open PDF from bytes doc = fitz.open(stream=content, filetype="pdf") if len(doc) == 0: return content # Get the first page page = doc[0] # Render page to a pixmap (300 DPI: scale=300/72 = 4.166...) # For high quality OCR, a scale of 2.0 or 3.0 is usually sufficient matrix = fitz.Matrix(2.0, 2.0) pix = page.get_pixmap(matrix=matrix) # Convert pixmap to JPEG bytes img_data = pix.tobytes("jpeg") doc.close() return img_data except Exception as e: logger.error(f"PDF conversion failed: {str(e)}") return content return content async def extract_receipt_data(image_content: bytes, content_type: str, user_name: str, department: str) -> ExtractionResponse: # 1. Convert if PDF raw_image = convert_to_image_bytes(image_content, content_type) # 2. Compress Image compressed_content = compress_image(raw_image) base64_image = base64.b64encode(compressed_content).decode("utf-8") # 2. Refined Prompt prompt = ( f"You are a cautious auditor helping an HR department in Malaysia. " f"Extract the requested fields from the provided medical receipt image. " f"The employee submitting this is {user_name} from {department}. " f"IMPORTANT: The context is Malaysia (MYR). " f"For the fields `receipt_ref_no` and `clinic_reg_no`, only provide a value if you can read it clearly without any guessing or inference. If the text is smudged, handwritten, or ambiguous, return `null`. " f"Map the clinic/services to a `claim_category` from: [General, Dental, Optical, Specialist] based on the clinic name or invoice items. " f"Provide a 1-sentence `diagnosis_brief` summarizing the services seen (e.g. 'Fever consultation and medicine'). " f"Set `needs_manual_review` to `true` and provide a low `confidence_score` if: " f"1. The 'Total' does not match the sum of the individual items. " f"2. The receipt looks hand-written and lacks an official stamp. " f"3. The provider name is missing or the amount looks altered. " f"4. The user's name ({user_name}) is not clearly visible on the receipt. " f"IMPORTANT: Fill the `ai_reasoning` field with a 1-sentence explanation of how you identified the clinic and category." ) # 3. Async Extraction completion = await client.beta.chat.completions.parse( model="gpt-4o-mini", messages=[ { "role": "system", "content": "You are an HR data entry assistant. Extract medical receipt data accurately into structured JSON." }, { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, }, ], } ], response_format=ExtractionResponse, ) result = completion.choices[0].message.parsed # 4. Logging for Demo if result: logger.info(f"Extraction complete for {user_name}. Confidence Score: {result.confidence_score}") if result.needs_manual_review: logger.warning(f"Manual review required for receipt submitted by {user_name}") return result async def fill_form_with_template_v2(image_content: bytes, content_type: str, template_fields: dict, user_name: str, department: str) -> V2TemplateResponse: # 1. Convert if PDF raw_image = convert_to_image_bytes(image_content, content_type) # 2. Compress Image compressed_content = compress_image(raw_image) base64_image = base64.b64encode(compressed_content).decode("utf-8") # 2. V2 Prompt template_json = json.dumps(template_fields, indent=2) prompt = ( f"You are a professional Data Entry Clerk helping an HR department in Malaysia. " f"You will receive a medical receipt image and a Form Template consisting of specific field names and descriptions. " f"Your task is to fill the form values based ONLY on the evidence in the image. " f"The employee is {user_name} from {department}. " f"FORM TEMPLATE (JSON): {template_json}\n\n" f"STRICT RULES:\n" f"1. If a field in the template is not explicitly visible or is ambiguous, you MUST return `null`. Do not guess.\n" f"2. For currency, assume MYR unless stated otherwise.\n" f"3. If the user's name ({user_name}) is not on the receipt, leave any name-related fields `null`.\n" f"4. For any field identified, provide a clean value (e.g. string or float).\n" f"5. Return the result as a structured object with `filled_data` (a list of objects each containing `key` and `value`) " f"and `unfilled_fields` (a list of keys from the template for which no evidence was found)." ) # 3. Async Extraction completion = await client.beta.chat.completions.parse( model="gpt-4o-mini", messages=[ { "role": "system", "content": "You are a professional Data Entry Clerk. Extract data accurately based on a provided template." }, { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, }, ], } ], response_format=V2TemplateResponse, ) result = completion.choices[0].message.parsed # 4. Logging for Demo if result: logger.info(f"V2 Extraction complete for {user_name}. Fields filled: {len(result.filled_data)}") return result