structured well
This commit is contained in:
0
apps/PaymentOCRService/app/init.py
Normal file
0
apps/PaymentOCRService/app/init.py
Normal file
81
apps/PaymentOCRService/app/main.py
Normal file
81
apps/PaymentOCRService/app/main.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from fastapi import FastAPI, UploadFile, File, HTTPException
|
||||
from fastapi.responses import StreamingResponse, JSONResponse, PlainTextResponse
|
||||
from typing import List, Optional
|
||||
import io
|
||||
import os
|
||||
|
||||
from app.pipeline_adapter import (
|
||||
process_images_to_rows,
|
||||
rows_to_csv_bytes,
|
||||
)
|
||||
|
||||
app = FastAPI(
|
||||
title="Medical Billing OCR API",
|
||||
description="FastAPI wrapper around the complete OCR pipeline (Google Vision + deskew + line clustering + extraction).",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
ALLOWED_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp"}
|
||||
|
||||
@app.get("/health", response_class=PlainTextResponse)
|
||||
def health():
|
||||
# Simple sanity check (also ensures GCP creds var visibility)
|
||||
creds = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "")
|
||||
return f"OK | GOOGLE_APPLICATION_CREDENTIALS set: {bool(creds)}"
|
||||
|
||||
@app.post("/extract/json")
|
||||
async def extract_json(files: List[UploadFile] = File(...)):
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="No files provided.")
|
||||
|
||||
# Validate extensions early (not bulletproof, but helpful)
|
||||
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
|
||||
if bad:
|
||||
raise HTTPException(
|
||||
status_code=415,
|
||||
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
|
||||
)
|
||||
|
||||
# Read blobs in-memory
|
||||
blobs = []
|
||||
filenames = []
|
||||
for f in files:
|
||||
blobs.append(await f.read())
|
||||
filenames.append(f.filename or "upload.bin")
|
||||
|
||||
try:
|
||||
rows = process_images_to_rows(blobs, filenames)
|
||||
# rows is a list[dict] where each dict contains the columns you already emit (Patient Name, etc.)
|
||||
return JSONResponse(content={"rows": rows})
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
|
||||
|
||||
@app.post("/extract/csv")
|
||||
async def extract_csv(files: List[UploadFile] = File(...), filename: Optional[str] = None):
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="No files provided.")
|
||||
|
||||
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
|
||||
if bad:
|
||||
raise HTTPException(
|
||||
status_code=415,
|
||||
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
|
||||
)
|
||||
|
||||
blobs = []
|
||||
filenames = []
|
||||
for f in files:
|
||||
blobs.append(await f.read())
|
||||
filenames.append(f.filename or "upload.bin")
|
||||
|
||||
try:
|
||||
rows = process_images_to_rows(blobs, filenames)
|
||||
csv_bytes = rows_to_csv_bytes(rows)
|
||||
out_name = filename or "medical_billing_extract.csv"
|
||||
return StreamingResponse(
|
||||
io.BytesIO(csv_bytes),
|
||||
media_type="text/csv",
|
||||
headers={"Content-Disposition": f'attachment; filename="{out_name}"'}
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
|
||||
77
apps/PaymentOCRService/app/pipeline-adaptor.py
Normal file
77
apps/PaymentOCRService/app/pipeline-adaptor.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import os
|
||||
import tempfile
|
||||
from typing import List, Dict
|
||||
import pandas as pd
|
||||
|
||||
# Import your existing functions directly from complete_pipeline.py
|
||||
from complete_pipeline import (
|
||||
smart_deskew_with_lines,
|
||||
extract_all_clients_from_lines,
|
||||
)
|
||||
|
||||
def _process_single_image_bytes(blob: bytes, display_name: str) -> List[Dict]:
|
||||
"""
|
||||
Saves bytes to a temp file (so OpenCV + Google Vision can read it),
|
||||
runs your existing pipeline functions, and returns extracted rows.
|
||||
"""
|
||||
suffix = os.path.splitext(display_name)[1] or ".jpg"
|
||||
tmp_path = None
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
tmp.write(blob)
|
||||
tmp_path = tmp.name
|
||||
|
||||
# Uses Google Vision + deskew + post-line grouping
|
||||
info = smart_deskew_with_lines(tmp_path, None, clamp_deg=30.0, use_vision=True)
|
||||
post_lines = info.get("post_lines", []) if info else []
|
||||
rows = extract_all_clients_from_lines(post_lines) if post_lines else []
|
||||
|
||||
# Add source file information (same as your Streamlit app)
|
||||
for r in rows:
|
||||
r["Source File"] = display_name
|
||||
|
||||
# If nothing parsed, still return a placeholder row to indicate failure (optional)
|
||||
if not rows:
|
||||
rows.append({
|
||||
'Patient Name': "", 'Patient ID': "", 'ICN': "", 'CDT Code': "",
|
||||
'Tooth': "", 'Date SVC': "",
|
||||
'Billed Amount': "", 'Allowed Amount': "", 'Paid Amount': "",
|
||||
'Extraction Success': False, 'Source File': display_name,
|
||||
})
|
||||
|
||||
return rows
|
||||
|
||||
finally:
|
||||
if tmp_path:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def process_images_to_rows(blobs: List[bytes], filenames: List[str]) -> List[Dict]:
|
||||
"""
|
||||
Public API used by FastAPI routes.
|
||||
blobs: list of image bytes
|
||||
filenames: matching names for display / Source File column
|
||||
"""
|
||||
all_rows: List[Dict] = []
|
||||
for blob, name in zip(blobs, filenames):
|
||||
rows = _process_single_image_bytes(blob, name)
|
||||
all_rows.extend(rows)
|
||||
|
||||
return all_rows
|
||||
|
||||
def rows_to_csv_bytes(rows: List[Dict]) -> bytes:
|
||||
"""
|
||||
Convert pipeline rows to CSV bytes (for frontend to consume as a table).
|
||||
"""
|
||||
df = pd.DataFrame(rows)
|
||||
# Keep a stable column order if present (mirrors your Excel order)
|
||||
desired = [
|
||||
'Patient Name', 'Patient ID', 'ICN', 'CDT Code', 'Tooth', 'Date SVC',
|
||||
'Billed Amount', 'Allowed Amount', 'Paid Amount',
|
||||
'Extraction Success', 'Source File'
|
||||
]
|
||||
cols = [c for c in desired if c in df.columns] + [c for c in df.columns if c not in desired]
|
||||
df = df[cols]
|
||||
return df.to_csv(index=False).encode("utf-8")
|
||||
Reference in New Issue
Block a user