feat(ocr payment page) - added backend routes on app
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
GOOGLE_APPLICATION_CREDENTIALS=google-credentials.json
|
||||
HOST="0.0.0.0"
|
||||
PORT="5003"
|
||||
1
apps/PaymentOCRService/.gitignore
vendored
Normal file
1
apps/PaymentOCRService/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
google_credentials.json
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,81 +0,0 @@
|
||||
from fastapi import FastAPI, UploadFile, File, HTTPException
|
||||
from fastapi.responses import StreamingResponse, JSONResponse, PlainTextResponse
|
||||
from typing import List, Optional
|
||||
import io
|
||||
import os
|
||||
|
||||
from app.pipeline_adapter import (
|
||||
process_images_to_rows,
|
||||
rows_to_csv_bytes,
|
||||
)
|
||||
|
||||
app = FastAPI(
|
||||
title="Medical Billing OCR API",
|
||||
description="FastAPI wrapper around the complete OCR pipeline (Google Vision + deskew + line clustering + extraction).",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
ALLOWED_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp"}
|
||||
|
||||
@app.get("/health", response_class=PlainTextResponse)
|
||||
def health():
|
||||
# Simple sanity check (also ensures GCP creds var visibility)
|
||||
creds = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "")
|
||||
return f"OK | GOOGLE_APPLICATION_CREDENTIALS set: {bool(creds)}"
|
||||
|
||||
@app.post("/extract/json")
|
||||
async def extract_json(files: List[UploadFile] = File(...)):
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="No files provided.")
|
||||
|
||||
# Validate extensions early (not bulletproof, but helpful)
|
||||
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
|
||||
if bad:
|
||||
raise HTTPException(
|
||||
status_code=415,
|
||||
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
|
||||
)
|
||||
|
||||
# Read blobs in-memory
|
||||
blobs = []
|
||||
filenames = []
|
||||
for f in files:
|
||||
blobs.append(await f.read())
|
||||
filenames.append(f.filename or "upload.bin")
|
||||
|
||||
try:
|
||||
rows = process_images_to_rows(blobs, filenames)
|
||||
# rows is a list[dict] where each dict contains the columns you already emit (Patient Name, etc.)
|
||||
return JSONResponse(content={"rows": rows})
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
|
||||
|
||||
@app.post("/extract/csv")
|
||||
async def extract_csv(files: List[UploadFile] = File(...), filename: Optional[str] = None):
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="No files provided.")
|
||||
|
||||
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
|
||||
if bad:
|
||||
raise HTTPException(
|
||||
status_code=415,
|
||||
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
|
||||
)
|
||||
|
||||
blobs = []
|
||||
filenames = []
|
||||
for f in files:
|
||||
blobs.append(await f.read())
|
||||
filenames.append(f.filename or "upload.bin")
|
||||
|
||||
try:
|
||||
rows = process_images_to_rows(blobs, filenames)
|
||||
csv_bytes = rows_to_csv_bytes(rows)
|
||||
out_name = filename or "medical_billing_extract.csv"
|
||||
return StreamingResponse(
|
||||
io.BytesIO(csv_bytes),
|
||||
media_type="text/csv",
|
||||
headers={"Content-Disposition": f'attachment; filename="{out_name}"'}
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
|
||||
@@ -2,6 +2,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
ALL IS GENERATED BY REPLIT:
|
||||
|
||||
End-to-end local pipeline (single script)
|
||||
|
||||
- One Google Vision pass per image (DOCUMENT_TEXT_DETECTION)
|
||||
@@ -4,10 +4,7 @@ from typing import List, Dict
|
||||
import pandas as pd
|
||||
|
||||
# Import your existing functions directly from complete_pipeline.py
|
||||
from complete_pipeline import (
|
||||
smart_deskew_with_lines,
|
||||
extract_all_clients_from_lines,
|
||||
)
|
||||
from complete_pipeline import smart_deskew_with_lines, extract_all_clients_from_lines
|
||||
|
||||
def _process_single_image_bytes(blob: bytes, display_name: str) -> List[Dict]:
|
||||
"""
|
||||
168
apps/PaymentOCRService/main.py
Normal file
168
apps/PaymentOCRService/main.py
Normal file
@@ -0,0 +1,168 @@
|
||||
from fastapi import FastAPI, UploadFile, File, HTTPException, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import StreamingResponse, JSONResponse, PlainTextResponse
|
||||
from typing import List, Optional
|
||||
import io
|
||||
import os
|
||||
import asyncio
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv() # loads .env (GOOGLE_APPLICATION_CREDENTIALS, HOST, PORT, etc.)
|
||||
|
||||
# Your adapter that calls the pipeline
|
||||
from complete_pipeline_adapter import process_images_to_rows,rows_to_csv_bytes
|
||||
|
||||
# -------------------------------------------------
|
||||
# App + concurrency controls (similar to your other app)
|
||||
# -------------------------------------------------
|
||||
app = FastAPI(
|
||||
title="Payment OCR Services API",
|
||||
description="FastAPI wrapper around the OCR pipeline (Google Vision + deskew + line grouping + extraction).",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
# Concurrency/semaphore (optional but useful for OCR)
|
||||
MAX_CONCURRENCY = int(os.getenv("MAX_CONCURRENCY", "2"))
|
||||
semaphore = asyncio.Semaphore(MAX_CONCURRENCY)
|
||||
|
||||
active_jobs = 0
|
||||
waiting_jobs = 0
|
||||
lock = asyncio.Lock()
|
||||
|
||||
# CORS
|
||||
cors_origins = os.getenv("CORS_ORIGINS", "*")
|
||||
allow_origins = ["*"] if cors_origins.strip() == "*" else [o.strip() for o in cors_origins.split(",") if o.strip()]
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=allow_origins,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
ALLOWED_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp"}
|
||||
|
||||
# -------------------------------------------------
|
||||
# Health + status
|
||||
# -------------------------------------------------
|
||||
@app.get("/health", response_class=PlainTextResponse)
|
||||
def health():
|
||||
creds = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "")
|
||||
return f"OK | GOOGLE_APPLICATION_CREDENTIALS set: {bool(creds)}"
|
||||
|
||||
@app.get("/status")
|
||||
async def get_status():
|
||||
async with lock:
|
||||
return {
|
||||
"active_jobs": active_jobs,
|
||||
"queued_jobs": waiting_jobs,
|
||||
"max_concurrency": MAX_CONCURRENCY,
|
||||
"status": "busy" if active_jobs > 0 or waiting_jobs > 0 else "idle",
|
||||
}
|
||||
|
||||
# -------------------------------------------------
|
||||
# Helpers
|
||||
# -------------------------------------------------
|
||||
def _validate_files(files: List[UploadFile]):
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="No files provided.")
|
||||
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
|
||||
if bad:
|
||||
raise HTTPException(
|
||||
status_code=415,
|
||||
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
|
||||
)
|
||||
|
||||
# -------------------------------------------------
|
||||
# Endpoints
|
||||
# -------------------------------------------------
|
||||
@app.post("/extract/json")
|
||||
async def extract_json(files: List[UploadFile] = File(...)):
|
||||
_validate_files(files)
|
||||
|
||||
async with lock:
|
||||
global waiting_jobs
|
||||
waiting_jobs += 1
|
||||
|
||||
async with semaphore:
|
||||
async with lock:
|
||||
waiting_jobs -= 1
|
||||
global active_jobs
|
||||
active_jobs += 1
|
||||
|
||||
try:
|
||||
blobs = [await f.read() for f in files]
|
||||
names = [f.filename or "upload.bin" for f in files]
|
||||
rows = process_images_to_rows(blobs, names) # calls your pipeline
|
||||
return JSONResponse(content={"rows": rows})
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
|
||||
finally:
|
||||
async with lock:
|
||||
active_jobs -= 1
|
||||
|
||||
@app.post("/extract/csvtext", response_class=PlainTextResponse)
|
||||
async def extract_csvtext(files: List[UploadFile] = File(...)):
|
||||
_validate_files(files)
|
||||
|
||||
async with lock:
|
||||
global waiting_jobs
|
||||
waiting_jobs += 1
|
||||
|
||||
async with semaphore:
|
||||
async with lock:
|
||||
waiting_jobs -= 1
|
||||
global active_jobs
|
||||
active_jobs += 1
|
||||
|
||||
try:
|
||||
blobs = [await f.read() for f in files]
|
||||
names = [f.filename or "upload.bin" for f in files]
|
||||
rows = process_images_to_rows(blobs, names)
|
||||
csv_bytes = rows_to_csv_bytes(rows)
|
||||
return PlainTextResponse(csv_bytes.decode("utf-8"), media_type="text/csv")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
|
||||
finally:
|
||||
async with lock:
|
||||
active_jobs -= 1
|
||||
|
||||
@app.post("/extract/csv")
|
||||
async def extract_csv(files: List[UploadFile] = File(...), filename: Optional[str] = None):
|
||||
_validate_files(files)
|
||||
|
||||
async with lock:
|
||||
global waiting_jobs
|
||||
waiting_jobs += 1
|
||||
|
||||
async with semaphore:
|
||||
async with lock:
|
||||
waiting_jobs -= 1
|
||||
global active_jobs
|
||||
active_jobs += 1
|
||||
|
||||
try:
|
||||
blobs = [await f.read() for f in files]
|
||||
names = [f.filename or "upload.bin" for f in files]
|
||||
rows = process_images_to_rows(blobs, names)
|
||||
csv_bytes = rows_to_csv_bytes(rows)
|
||||
out_name = filename or "medical_billing_extract.csv"
|
||||
return StreamingResponse(
|
||||
io.BytesIO(csv_bytes),
|
||||
media_type="text/csv",
|
||||
headers={"Content-Disposition": f'attachment; filename="{out_name}"'}
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
|
||||
finally:
|
||||
async with lock:
|
||||
active_jobs -= 1
|
||||
|
||||
# -------------------------------------------------
|
||||
# Entrypoint (same pattern as your selenium app)
|
||||
# -------------------------------------------------
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
host = os.getenv("HOST")
|
||||
port = int(os.getenv("PORT"))
|
||||
reload_flag = os.getenv("RELOAD", "false").lower() == "true"
|
||||
uvicorn.run(app, host=host, port=port, reload=reload_flag)
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"name": "pdfservice",
|
||||
"name": "paymentocrservice",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"postinstall": "pip install -r requirements.txt",
|
||||
|
||||
@@ -1,10 +1,26 @@
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
google-cloud-vision
|
||||
opencv-python-headless
|
||||
pytesseract
|
||||
pillow
|
||||
pandas
|
||||
openpyxl
|
||||
numpy
|
||||
python-multipart
|
||||
annotated-types==0.7.0
|
||||
anyio==4.10.0
|
||||
click==8.2.1
|
||||
colorama==0.4.6
|
||||
et_xmlfile==2.0.0
|
||||
fastapi==0.116.1
|
||||
h11==0.16.0
|
||||
idna==3.10
|
||||
numpy==2.2.6
|
||||
google-cloud-vision>=3.10.2
|
||||
opencv-python==4.12.0.88
|
||||
openpyxl==3.1.5
|
||||
pandas==2.3.2
|
||||
pydantic==2.11.7
|
||||
pydantic_core==2.33.2
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.1.1
|
||||
pytz==2025.2
|
||||
six==1.17.0
|
||||
sniffio==1.3.1
|
||||
starlette==0.47.3
|
||||
typing-inspection==0.4.1
|
||||
typing_extensions==4.15.0
|
||||
tzdata==2025.2
|
||||
uvicorn==0.35.0
|
||||
python-multipart==0.0.20
|
||||
|
||||
Reference in New Issue
Block a user