feat(ocr payment page) - added backend routes on app

This commit is contained in:
2025-08-30 01:11:59 +05:30
parent e5271d6979
commit c5edac2ab8
19 changed files with 314 additions and 109 deletions

View File

@@ -0,0 +1,3 @@
GOOGLE_APPLICATION_CREDENTIALS=google-credentials.json
HOST="0.0.0.0"
PORT="5003"

1
apps/PaymentOCRService/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
google_credentials.json

View File

@@ -1,81 +0,0 @@
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import StreamingResponse, JSONResponse, PlainTextResponse
from typing import List, Optional
import io
import os
from app.pipeline_adapter import (
process_images_to_rows,
rows_to_csv_bytes,
)
app = FastAPI(
title="Medical Billing OCR API",
description="FastAPI wrapper around the complete OCR pipeline (Google Vision + deskew + line clustering + extraction).",
version="1.0.0",
)
ALLOWED_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp"}
@app.get("/health", response_class=PlainTextResponse)
def health():
# Simple sanity check (also ensures GCP creds var visibility)
creds = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "")
return f"OK | GOOGLE_APPLICATION_CREDENTIALS set: {bool(creds)}"
@app.post("/extract/json")
async def extract_json(files: List[UploadFile] = File(...)):
if not files:
raise HTTPException(status_code=400, detail="No files provided.")
# Validate extensions early (not bulletproof, but helpful)
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
if bad:
raise HTTPException(
status_code=415,
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
)
# Read blobs in-memory
blobs = []
filenames = []
for f in files:
blobs.append(await f.read())
filenames.append(f.filename or "upload.bin")
try:
rows = process_images_to_rows(blobs, filenames)
# rows is a list[dict] where each dict contains the columns you already emit (Patient Name, etc.)
return JSONResponse(content={"rows": rows})
except Exception as e:
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
@app.post("/extract/csv")
async def extract_csv(files: List[UploadFile] = File(...), filename: Optional[str] = None):
if not files:
raise HTTPException(status_code=400, detail="No files provided.")
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
if bad:
raise HTTPException(
status_code=415,
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
)
blobs = []
filenames = []
for f in files:
blobs.append(await f.read())
filenames.append(f.filename or "upload.bin")
try:
rows = process_images_to_rows(blobs, filenames)
csv_bytes = rows_to_csv_bytes(rows)
out_name = filename or "medical_billing_extract.csv"
return StreamingResponse(
io.BytesIO(csv_bytes),
media_type="text/csv",
headers={"Content-Disposition": f'attachment; filename="{out_name}"'}
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Processing error: {e}")

View File

@@ -2,6 +2,8 @@
# -*- coding: utf-8 -*-
"""
ALL IS GENERATED BY REPLIT:
End-to-end local pipeline (single script)
- One Google Vision pass per image (DOCUMENT_TEXT_DETECTION)

View File

@@ -4,10 +4,7 @@ from typing import List, Dict
import pandas as pd
# Import your existing functions directly from complete_pipeline.py
from complete_pipeline import (
smart_deskew_with_lines,
extract_all_clients_from_lines,
)
from complete_pipeline import smart_deskew_with_lines, extract_all_clients_from_lines
def _process_single_image_bytes(blob: bytes, display_name: str) -> List[Dict]:
"""

View File

@@ -0,0 +1,168 @@
from fastapi import FastAPI, UploadFile, File, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse, JSONResponse, PlainTextResponse
from typing import List, Optional
import io
import os
import asyncio
from dotenv import load_dotenv
load_dotenv() # loads .env (GOOGLE_APPLICATION_CREDENTIALS, HOST, PORT, etc.)
# Your adapter that calls the pipeline
from complete_pipeline_adapter import process_images_to_rows,rows_to_csv_bytes
# -------------------------------------------------
# App + concurrency controls (similar to your other app)
# -------------------------------------------------
app = FastAPI(
title="Payment OCR Services API",
description="FastAPI wrapper around the OCR pipeline (Google Vision + deskew + line grouping + extraction).",
version="1.0.0",
)
# Concurrency/semaphore (optional but useful for OCR)
MAX_CONCURRENCY = int(os.getenv("MAX_CONCURRENCY", "2"))
semaphore = asyncio.Semaphore(MAX_CONCURRENCY)
active_jobs = 0
waiting_jobs = 0
lock = asyncio.Lock()
# CORS
cors_origins = os.getenv("CORS_ORIGINS", "*")
allow_origins = ["*"] if cors_origins.strip() == "*" else [o.strip() for o in cors_origins.split(",") if o.strip()]
app.add_middleware(
CORSMiddleware,
allow_origins=allow_origins,
allow_methods=["*"],
allow_headers=["*"],
)
ALLOWED_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp"}
# -------------------------------------------------
# Health + status
# -------------------------------------------------
@app.get("/health", response_class=PlainTextResponse)
def health():
creds = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "")
return f"OK | GOOGLE_APPLICATION_CREDENTIALS set: {bool(creds)}"
@app.get("/status")
async def get_status():
async with lock:
return {
"active_jobs": active_jobs,
"queued_jobs": waiting_jobs,
"max_concurrency": MAX_CONCURRENCY,
"status": "busy" if active_jobs > 0 or waiting_jobs > 0 else "idle",
}
# -------------------------------------------------
# Helpers
# -------------------------------------------------
def _validate_files(files: List[UploadFile]):
if not files:
raise HTTPException(status_code=400, detail="No files provided.")
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
if bad:
raise HTTPException(
status_code=415,
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
)
# -------------------------------------------------
# Endpoints
# -------------------------------------------------
@app.post("/extract/json")
async def extract_json(files: List[UploadFile] = File(...)):
_validate_files(files)
async with lock:
global waiting_jobs
waiting_jobs += 1
async with semaphore:
async with lock:
waiting_jobs -= 1
global active_jobs
active_jobs += 1
try:
blobs = [await f.read() for f in files]
names = [f.filename or "upload.bin" for f in files]
rows = process_images_to_rows(blobs, names) # calls your pipeline
return JSONResponse(content={"rows": rows})
except Exception as e:
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
finally:
async with lock:
active_jobs -= 1
@app.post("/extract/csvtext", response_class=PlainTextResponse)
async def extract_csvtext(files: List[UploadFile] = File(...)):
_validate_files(files)
async with lock:
global waiting_jobs
waiting_jobs += 1
async with semaphore:
async with lock:
waiting_jobs -= 1
global active_jobs
active_jobs += 1
try:
blobs = [await f.read() for f in files]
names = [f.filename or "upload.bin" for f in files]
rows = process_images_to_rows(blobs, names)
csv_bytes = rows_to_csv_bytes(rows)
return PlainTextResponse(csv_bytes.decode("utf-8"), media_type="text/csv")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
finally:
async with lock:
active_jobs -= 1
@app.post("/extract/csv")
async def extract_csv(files: List[UploadFile] = File(...), filename: Optional[str] = None):
_validate_files(files)
async with lock:
global waiting_jobs
waiting_jobs += 1
async with semaphore:
async with lock:
waiting_jobs -= 1
global active_jobs
active_jobs += 1
try:
blobs = [await f.read() for f in files]
names = [f.filename or "upload.bin" for f in files]
rows = process_images_to_rows(blobs, names)
csv_bytes = rows_to_csv_bytes(rows)
out_name = filename or "medical_billing_extract.csv"
return StreamingResponse(
io.BytesIO(csv_bytes),
media_type="text/csv",
headers={"Content-Disposition": f'attachment; filename="{out_name}"'}
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
finally:
async with lock:
active_jobs -= 1
# -------------------------------------------------
# Entrypoint (same pattern as your selenium app)
# -------------------------------------------------
if __name__ == "__main__":
import uvicorn
host = os.getenv("HOST")
port = int(os.getenv("PORT"))
reload_flag = os.getenv("RELOAD", "false").lower() == "true"
uvicorn.run(app, host=host, port=port, reload=reload_flag)

View File

@@ -1,5 +1,5 @@
{
"name": "pdfservice",
"name": "paymentocrservice",
"private": true,
"scripts": {
"postinstall": "pip install -r requirements.txt",

View File

@@ -1,10 +1,26 @@
fastapi
uvicorn[standard]
google-cloud-vision
opencv-python-headless
pytesseract
pillow
pandas
openpyxl
numpy
python-multipart
annotated-types==0.7.0
anyio==4.10.0
click==8.2.1
colorama==0.4.6
et_xmlfile==2.0.0
fastapi==0.116.1
h11==0.16.0
idna==3.10
numpy==2.2.6
google-cloud-vision>=3.10.2
opencv-python==4.12.0.88
openpyxl==3.1.5
pandas==2.3.2
pydantic==2.11.7
pydantic_core==2.33.2
python-dateutil==2.9.0.post0
python-dotenv==1.1.1
pytz==2025.2
six==1.17.0
sniffio==1.3.1
starlette==0.47.3
typing-inspection==0.4.1
typing_extensions==4.15.0
tzdata==2025.2
uvicorn==0.35.0
python-multipart==0.0.20