Files
Gitead dd0df4a435 feat: payment PDF extraction, import, and remittance tracking
- Add Upload Payment Documents section with Extract & Download (Excel)
  and Extract & Import (database) buttons
- PDF extractor (pdfplumber) parses MassHealth RA PDFs: two-pass
  strategy joins summary-page ICN/patient map with detail-page
  procedure data (CDT code, paid code, tooth, date, allowed amount)
- RA cover-page summary (Payee ID, RA #, Payment Amount, etc.)
  included as separate Excel sheet; numeric values written as numbers
- Backend PDF import route groups rows by Member #, finds/creates
  patient, creates Payment + ServiceLines with ICN per procedure
- Add icn, paidCode, allowedAmount fields to ServiceLine schema
- Payments table: status simplified to Paid in Full / Balance;
  adjustment auto-computed on mhPaidAmount/copayment change;
  Paid in Full and Revert buttons with confirmation dialogs
- Edit Payment modal: shows ICN, Paid Code, Allowed Amount per line
- PDF Import badge distinguishes from OCR imports in payments table

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-07 12:53:50 -04:00

197 lines
6.6 KiB
Python
Executable File

from fastapi import FastAPI, UploadFile, File, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse, JSONResponse, PlainTextResponse
from typing import List, Optional
import io
import os
import asyncio
import uvicorn
from dotenv import load_dotenv
load_dotenv()
from complete_pipeline_adapter import process_images_to_rows,rows_to_csv_bytes
from pdf_extractor import extract_ra_pdf
app = FastAPI(
title="Payment OCR Services API",
description="FastAPI wrapper around the OCR pipeline (Google Vision + deskew + line grouping + extraction).",
version="1.0.0",
)
# Concurrency/semaphore (optional but useful for OCR)
MAX_CONCURRENCY = int(os.getenv("MAX_CONCURRENCY", "2"))
semaphore = asyncio.Semaphore(MAX_CONCURRENCY)
active_jobs = 0
waiting_jobs = 0
lock = asyncio.Lock()
# CORS
cors_origins = os.getenv("CORS_ORIGINS", "*")
allow_origins = ["*"] if cors_origins.strip() == "*" else [o.strip() for o in cors_origins.split(",") if o.strip()]
app.add_middleware(
CORSMiddleware,
allow_origins=allow_origins,
allow_methods=["*"],
allow_headers=["*"],
)
ALLOWED_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp"}
ALLOWED_PDF_EXTS = {".pdf"}
# -------------------------------------------------
# Health + status
# -------------------------------------------------
@app.get("/health", response_class=PlainTextResponse)
def health():
creds = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "")
return f"OK | GOOGLE_APPLICATION_CREDENTIALS set: {bool(creds)}"
@app.get("/status")
async def get_status():
async with lock:
return {
"active_jobs": active_jobs,
"queued_jobs": waiting_jobs,
"max_concurrency": MAX_CONCURRENCY,
"status": "busy" if active_jobs > 0 or waiting_jobs > 0 else "idle",
}
# -------------------------------------------------
# Helpers
# -------------------------------------------------
def _validate_files(files: List[UploadFile]):
if not files:
raise HTTPException(status_code=400, detail="No files provided.")
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
if bad:
raise HTTPException(
status_code=415,
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
)
# -------------------------------------------------
# Endpoints
# -------------------------------------------------
@app.post("/extract/json")
async def extract_json(files: List[UploadFile] = File(...)):
_validate_files(files)
async with lock:
global waiting_jobs
waiting_jobs += 1
async with semaphore:
async with lock:
waiting_jobs -= 1
global active_jobs
active_jobs += 1
try:
blobs = [await f.read() for f in files]
names = [f.filename or "upload.bin" for f in files]
rows = process_images_to_rows(blobs, names) # calls pipeline
return JSONResponse(content={"rows": rows})
except Exception as e:
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
finally:
async with lock:
active_jobs -= 1
@app.post("/extract/csvtext", response_class=PlainTextResponse)
async def extract_csvtext(files: List[UploadFile] = File(...)):
_validate_files(files)
async with lock:
global waiting_jobs
waiting_jobs += 1
async with semaphore:
async with lock:
waiting_jobs -= 1
global active_jobs
active_jobs += 1
try:
blobs = [await f.read() for f in files]
names = [f.filename or "upload.bin" for f in files]
rows = process_images_to_rows(blobs, names)
csv_bytes = rows_to_csv_bytes(rows)
return PlainTextResponse(csv_bytes.decode("utf-8"), media_type="text/csv")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
finally:
async with lock:
active_jobs -= 1
@app.post("/extract/pdf/json")
async def extract_pdf_json(files: List[UploadFile] = File(...)):
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_PDF_EXTS]
if bad:
raise HTTPException(status_code=415, detail=f"Only PDF files allowed. Got: {', '.join(bad)}")
async with lock:
global waiting_jobs
waiting_jobs += 1
async with semaphore:
async with lock:
waiting_jobs -= 1
global active_jobs
active_jobs += 1
try:
all_rows = []
all_headers = []
for f in files:
blob = await f.read()
result = extract_ra_pdf(blob, f.filename or "upload.pdf")
all_rows.extend(result["rows"])
all_headers.append(result["header"])
return JSONResponse(content={"rows": all_rows, "headers": all_headers})
except Exception as e:
raise HTTPException(status_code=500, detail=f"PDF extraction error: {e}")
finally:
async with lock:
active_jobs -= 1
@app.post("/extract/csv")
async def extract_csv(files: List[UploadFile] = File(...), filename: Optional[str] = None):
_validate_files(files)
async with lock:
global waiting_jobs
waiting_jobs += 1
async with semaphore:
async with lock:
waiting_jobs -= 1
global active_jobs
active_jobs += 1
try:
blobs = [await f.read() for f in files]
names = [f.filename or "upload.bin" for f in files]
rows = process_images_to_rows(blobs, names)
csv_bytes = rows_to_csv_bytes(rows)
out_name = filename or "medical_billing_extract.csv"
return StreamingResponse(
io.BytesIO(csv_bytes),
media_type="text/csv",
headers={"Content-Disposition": f'attachment; filename="{out_name}"'}
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
finally:
async with lock:
active_jobs -= 1
# -------------------------------------------------
# Entrypoint (same pattern as your selenium app)
# -------------------------------------------------
if __name__ == "__main__":
host = os.getenv("HOST")
port = int(os.getenv("PORT"))
uvicorn.run(app, host=host, port=port)