openai_service.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. import os
  2. import base64
  3. import io
  4. import logging
  5. import json
  6. from openai import AsyncOpenAI
  7. from dotenv import load_dotenv
  8. from PIL import Image
  9. from backend.schemas import ExtractionResponse, V2TemplateResponse
  10. load_dotenv()
  11. # Setup logging
  12. logging.basicConfig(level=logging.INFO)
  13. logger = logging.getLogger(__name__)
  14. client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
  15. MAX_IMAGE_SIZE = (2000, 2000)
  16. IMAGE_QUALITY = 85
  17. def compress_image(image_content: bytes) -> bytes:
  18. """Resizes and compresses the image to reduce upload size."""
  19. img = Image.open(io.BytesIO(image_content))
  20. # Convert to RGB if necessary (to save as JPEG)
  21. if img.mode in ("RGBA", "P"):
  22. img = img.convert("RGB")
  23. # Resize if larger than max dimensions
  24. img.thumbnail(MAX_IMAGE_SIZE, Image.Resampling.LANCZOS)
  25. # Save to bytes
  26. output_buffer = io.BytesIO()
  27. img.save(output_buffer, format="JPEG", quality=IMAGE_QUALITY, optimize=True)
  28. return output_buffer.getvalue()
  29. async def extract_receipt_data(image_content: bytes, user_name: str, department: str) -> ExtractionResponse:
  30. # 1. Compress Image
  31. compressed_content = compress_image(image_content)
  32. base64_image = base64.b64encode(compressed_content).decode("utf-8")
  33. # 2. Refined Prompt
  34. prompt = (
  35. f"You are a cautious auditor helping an HR department in Malaysia. "
  36. f"Extract the requested fields from the provided medical receipt image. "
  37. f"The employee submitting this is {user_name} from {department}. "
  38. f"IMPORTANT: The context is Malaysia (MYR). "
  39. f"For the fields `receipt_ref_no` and `clinic_reg_no`, only provide a value if you can read it clearly without any guessing or inference. If the text is smudged, handwritten, or ambiguous, return `null`. "
  40. f"Map the clinic/services to a `claim_category` from: [General, Dental, Optical, Specialist] based on the clinic name or invoice items. "
  41. f"Provide a 1-sentence `diagnosis_brief` summarizing the services seen (e.g. 'Fever consultation and medicine'). "
  42. f"Set `needs_manual_review` to `true` and provide a low `confidence_score` if: "
  43. f"1. The 'Total' does not match the sum of the individual items. "
  44. f"2. The receipt looks hand-written and lacks an official stamp. "
  45. f"3. The provider name is missing or the amount looks altered. "
  46. f"4. The user's name ({user_name}) is not clearly visible on the receipt. "
  47. f"IMPORTANT: Fill the `ai_reasoning` field with a 1-sentence explanation of how you identified the clinic and category."
  48. )
  49. # 3. Async Extraction
  50. completion = await client.beta.chat.completions.parse(
  51. model="gpt-4o-mini",
  52. messages=[
  53. {
  54. "role": "system",
  55. "content": "You are an HR data entry assistant. Extract medical receipt data accurately into structured JSON."
  56. },
  57. {
  58. "role": "user",
  59. "content": [
  60. {"type": "text", "text": prompt},
  61. {
  62. "type": "image_url",
  63. "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
  64. },
  65. ],
  66. }
  67. ],
  68. response_format=ExtractionResponse,
  69. )
  70. result = completion.choices[0].message.parsed
  71. # 4. Logging for Demo
  72. if result:
  73. logger.info(f"Extraction complete for {user_name}. Confidence Score: {result.confidence_score}")
  74. if result.needs_manual_review:
  75. logger.warning(f"Manual review required for receipt submitted by {user_name}")
  76. return result
  77. async def fill_form_with_template_v2(image_content: bytes, template_fields: dict, user_name: str, department: str) -> V2TemplateResponse:
  78. # 1. Compress Image
  79. compressed_content = compress_image(image_content)
  80. base64_image = base64.b64encode(compressed_content).decode("utf-8")
  81. # 2. V2 Prompt
  82. template_json = json.dumps(template_fields, indent=2)
  83. prompt = (
  84. f"You are a professional Data Entry Clerk helping an HR department in Malaysia. "
  85. f"You will receive a medical receipt image and a Form Template consisting of specific field names and descriptions. "
  86. f"Your task is to fill the form values based ONLY on the evidence in the image. "
  87. f"The employee is {user_name} from {department}. "
  88. f"FORM TEMPLATE (JSON): {template_json}\n\n"
  89. f"STRICT RULES:\n"
  90. f"1. If a field in the template is not explicitly visible or is ambiguous, you MUST return `null`. Do not guess.\n"
  91. f"2. For currency, assume MYR unless stated otherwise.\n"
  92. f"3. If the user's name ({user_name}) is not on the receipt, leave any name-related fields `null`.\n"
  93. f"4. For any field identified, provide a clean value (e.g. string or float).\n"
  94. f"5. Return the result as a structured object with `filled_data` (a list of objects each containing `key` and `value`) "
  95. f"and `unfilled_fields` (a list of keys from the template for which no evidence was found)."
  96. )
  97. # 3. Async Extraction
  98. completion = await client.beta.chat.completions.parse(
  99. model="gpt-4o-mini",
  100. messages=[
  101. {
  102. "role": "system",
  103. "content": "You are a professional Data Entry Clerk. Extract data accurately based on a provided template."
  104. },
  105. {
  106. "role": "user",
  107. "content": [
  108. {"type": "text", "text": prompt},
  109. {
  110. "type": "image_url",
  111. "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
  112. },
  113. ],
  114. }
  115. ],
  116. response_format=V2TemplateResponse,
  117. )
  118. result = completion.choices[0].message.parsed
  119. # 4. Logging for Demo
  120. if result:
  121. logger.info(f"V2 Extraction complete for {user_name}. Fields filled: {len(result.filled_data)}")
  122. return result