openai_service.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. import os
  2. import base64
  3. import io
  4. import logging
  5. import json
  6. from openai import AsyncOpenAI
  7. from dotenv import load_dotenv
  8. from PIL import Image
  9. import fitz # PyMuPDF
  10. from backend.schemas import ExtractionResponse, V2TemplateResponse
  11. load_dotenv()
  12. # Setup logging
  13. logging.basicConfig(level=logging.INFO)
  14. logger = logging.getLogger(__name__)
  15. client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
  16. MAX_IMAGE_SIZE = (2000, 2000)
  17. IMAGE_QUALITY = 85
  18. def compress_image(image_content: bytes) -> bytes:
  19. """Resizes and compresses the image to reduce upload size."""
  20. img = Image.open(io.BytesIO(image_content))
  21. # Convert to RGB if necessary (to save as JPEG)
  22. if img.mode in ("RGBA", "P"):
  23. img = img.convert("RGB")
  24. # Resize if larger than max dimensions
  25. img.thumbnail(MAX_IMAGE_SIZE, Image.Resampling.LANCZOS)
  26. # Save to bytes
  27. output_buffer = io.BytesIO()
  28. img.save(output_buffer, format="JPEG", quality=IMAGE_QUALITY, optimize=True)
  29. return output_buffer.getvalue()
  30. def convert_to_image_bytes(content: bytes, content_type: str) -> bytes:
  31. """If PDF, convert first page to image using PyMuPDF. Otherwise return original."""
  32. if content_type == "application/pdf":
  33. try:
  34. # Open PDF from bytes
  35. doc = fitz.open(stream=content, filetype="pdf")
  36. if len(doc) == 0:
  37. return content
  38. # Get the first page
  39. page = doc[0]
  40. # Render page to a pixmap (300 DPI: scale=300/72 = 4.166...)
  41. # For high quality OCR, a scale of 2.0 or 3.0 is usually sufficient
  42. matrix = fitz.Matrix(2.0, 2.0)
  43. pix = page.get_pixmap(matrix=matrix)
  44. # Convert pixmap to JPEG bytes
  45. img_data = pix.tobytes("jpeg")
  46. doc.close()
  47. return img_data
  48. except Exception as e:
  49. logger.error(f"PDF conversion failed: {str(e)}")
  50. return content
  51. return content
  52. async def extract_receipt_data(image_content: bytes, content_type: str, user_name: str, department: str) -> ExtractionResponse:
  53. # 1. Convert if PDF
  54. raw_image = convert_to_image_bytes(image_content, content_type)
  55. # 2. Compress Image
  56. compressed_content = compress_image(raw_image)
  57. base64_image = base64.b64encode(compressed_content).decode("utf-8")
  58. # 2. Refined Prompt
  59. prompt = (
  60. f"You are a cautious auditor helping an HR department in Malaysia. "
  61. f"Extract the requested fields from the provided medical receipt image. "
  62. f"The employee submitting this is {user_name} from {department}. "
  63. f"IMPORTANT: The context is Malaysia (MYR). "
  64. f"For the fields `receipt_ref_no` and `clinic_reg_no`, only provide a value if you can read it clearly without any guessing or inference. If the text is smudged, handwritten, or ambiguous, return `null`. "
  65. f"Map the clinic/services to a `claim_category` from: [General, Dental, Optical, Specialist] based on the clinic name or invoice items. "
  66. f"Provide a 1-sentence `diagnosis_brief` summarizing the services seen (e.g. 'Fever consultation and medicine'). "
  67. f"Set `needs_manual_review` to `true` and provide a low `confidence_score` if: "
  68. f"1. The 'Total' does not match the sum of the individual items. "
  69. f"2. The receipt looks hand-written and lacks an official stamp. "
  70. f"3. The provider name is missing or the amount looks altered. "
  71. f"4. The user's name ({user_name}) is not clearly visible on the receipt. "
  72. f"IMPORTANT: Fill the `ai_reasoning` field with a 1-sentence explanation of how you identified the clinic and category."
  73. )
  74. # 3. Async Extraction
  75. completion = await client.beta.chat.completions.parse(
  76. model="gpt-4o-mini",
  77. messages=[
  78. {
  79. "role": "system",
  80. "content": "You are an HR data entry assistant. Extract medical receipt data accurately into structured JSON."
  81. },
  82. {
  83. "role": "user",
  84. "content": [
  85. {"type": "text", "text": prompt},
  86. {
  87. "type": "image_url",
  88. "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
  89. },
  90. ],
  91. }
  92. ],
  93. response_format=ExtractionResponse,
  94. )
  95. result = completion.choices[0].message.parsed
  96. # 4. Logging for Demo
  97. if result:
  98. logger.info(f"Extraction complete for {user_name}. Confidence Score: {result.confidence_score}")
  99. if result.needs_manual_review:
  100. logger.warning(f"Manual review required for receipt submitted by {user_name}")
  101. return result
  102. async def fill_form_with_template_v2(image_content: bytes, content_type: str, template_fields: dict, user_name: str, department: str) -> V2TemplateResponse:
  103. # 1. Convert if PDF
  104. raw_image = convert_to_image_bytes(image_content, content_type)
  105. # 2. Compress Image
  106. compressed_content = compress_image(raw_image)
  107. base64_image = base64.b64encode(compressed_content).decode("utf-8")
  108. # 2. V2 Prompt
  109. template_json = json.dumps(template_fields, indent=2)
  110. prompt = (
  111. f"You are a professional Data Entry Clerk helping an HR department in Malaysia. "
  112. f"You will receive a medical receipt image and a Form Template consisting of specific field names and descriptions. "
  113. f"Your task is to fill the form values based ONLY on the evidence in the image. "
  114. f"The employee is {user_name} from {department}. "
  115. f"FORM TEMPLATE (JSON): {template_json}\n\n"
  116. f"STRICT RULES:\n"
  117. f"1. If a field in the template is not explicitly visible or is ambiguous, you MUST return `null`. Do not guess.\n"
  118. f"2. For currency, assume MYR unless stated otherwise.\n"
  119. f"3. If the user's name ({user_name}) is not on the receipt, leave any name-related fields `null`.\n"
  120. f"4. For any field identified, provide a clean value (e.g. string or float).\n"
  121. f"5. Return the result as a structured object with `filled_data` (a list of objects each containing `key` and `value`) "
  122. f"and `unfilled_fields` (a list of keys from the template for which no evidence was found)."
  123. )
  124. # 3. Async Extraction
  125. completion = await client.beta.chat.completions.parse(
  126. model="gpt-4o-mini",
  127. messages=[
  128. {
  129. "role": "system",
  130. "content": "You are a professional Data Entry Clerk. Extract data accurately based on a provided template."
  131. },
  132. {
  133. "role": "user",
  134. "content": [
  135. {"type": "text", "text": prompt},
  136. {
  137. "type": "image_url",
  138. "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
  139. },
  140. ],
  141. }
  142. ],
  143. response_format=V2TemplateResponse,
  144. )
  145. result = completion.choices[0].message.parsed
  146. # 4. Logging for Demo
  147. if result:
  148. logger.info(f"V2 Extraction complete for {user_name}. Fields filled: {len(result.filled_data)}")
  149. return result