diff --git a/apps/Backend/src/routes/index.ts b/apps/Backend/src/routes/index.ts index e177dfb..ffe1e93 100644 --- a/apps/Backend/src/routes/index.ts +++ b/apps/Backend/src/routes/index.ts @@ -3,14 +3,15 @@ import patientsRoutes from "./patients"; import appointmentsRoutes from "./appointments"; import usersRoutes from "./users"; import staffsRoutes from "./staffs"; -import pdfExtractionRoutes from "./pdfExtraction"; import claimsRoutes from "./claims"; +import patientDataExtractionRoutes from "./patientdataExtraction"; import insuranceCredsRoutes from "./insuranceCreds"; import documentsRoutes from "./documents"; import insuranceEligibilityRoutes from "./insuranceEligibility"; import paymentsRoutes from "./payments"; import databaseManagementRoutes from "./database-management"; import notificationsRoutes from "./notifications"; +import paymentOcrRoutes from "./paymentOcrExtraction"; const router = Router(); @@ -18,7 +19,7 @@ router.use("/patients", patientsRoutes); router.use("/appointments", appointmentsRoutes); router.use("/users", usersRoutes); router.use("/staffs", staffsRoutes); -router.use("/pdfExtraction", pdfExtractionRoutes); +router.use("/patientDataExtraction", patientDataExtractionRoutes); router.use("/claims", claimsRoutes); router.use("/insuranceCreds", insuranceCredsRoutes); router.use("/documents", documentsRoutes); @@ -26,5 +27,6 @@ router.use("/insuranceEligibility", insuranceEligibilityRoutes); router.use("/payments", paymentsRoutes); router.use("/database-management", databaseManagementRoutes); router.use("/notifications", notificationsRoutes); +router.use("/payment-ocr", paymentOcrRoutes); export default router; diff --git a/apps/Backend/src/routes/pdfExtraction.ts b/apps/Backend/src/routes/patientDataExtraction.ts similarity index 62% rename from apps/Backend/src/routes/pdfExtraction.ts rename to apps/Backend/src/routes/patientDataExtraction.ts index 8b01282..92fc227 100644 --- a/apps/Backend/src/routes/pdfExtraction.ts +++ b/apps/Backend/src/routes/patientDataExtraction.ts @@ -2,17 +2,17 @@ import { Router } from "express"; import type { Request, Response } from "express"; const router = Router(); import multer from "multer"; -import forwardToPdfService from "../services/pdfClient"; +import forwardToPatientDataExtractorService from "../services/patientDataExtractorService"; const upload = multer({ storage: multer.memoryStorage() }); -router.post("/extract", upload.single("pdf"), async (req: Request, res: Response): Promise=> { +router.post("/patientdataextract", upload.single("pdf"), async (req: Request, res: Response): Promise=> { if (!req.file) { return res.status(400).json({ error: "No PDF file uploaded." }); } try { - const result = await forwardToPdfService(req.file); + const result = await forwardToPatientDataExtractorService(req.file); res.json(result); } catch (err) { console.error(err); diff --git a/apps/Backend/src/routes/paymentOcrExtraction.ts b/apps/Backend/src/routes/paymentOcrExtraction.ts new file mode 100644 index 0000000..34e2244 --- /dev/null +++ b/apps/Backend/src/routes/paymentOcrExtraction.ts @@ -0,0 +1,50 @@ +import { Router, Request, Response } from "express"; +import multer from "multer"; +import { forwardToPaymentOCRService } from "../services/paymentOCRService"; + +const router = Router(); + +// keep files in memory; FastAPI accepts them as multipart bytes +const upload = multer({ storage: multer.memoryStorage() }); + +// POST /payment-ocr/extract (field name: "files") +router.post( + "/extract", + upload.array("files"), // allow multiple images + async (req: Request, res: Response): Promise => { + try { + const files = req.files as Express.Multer.File[] | undefined; + + if (!files || files.length === 0) { + return res + .status(400) + .json({ error: "No image files uploaded. Use field name 'files'." }); + } + + // (optional) basic client-side MIME guard + const allowed = new Set([ + "image/jpeg", + "image/png", + "image/tiff", + "image/bmp", + "image/jpg", + ]); + const bad = files.filter((f) => !allowed.has(f.mimetype.toLowerCase())); + if (bad.length) { + return res.status(415).json({ + error: `Unsupported file types: ${bad + .map((b) => b.originalname) + .join(", ")}`, + }); + } + + const rows = await forwardToPaymentOCRService(files); + return res.json({ rows }); + } catch (err) { + console.error(err); + return res.status(500).json({ error: "Payment OCR extraction failed" }); + } + } +); + +export default router; diff --git a/apps/Backend/src/services/pdfClient.ts b/apps/Backend/src/services/patientDataExtractorService.ts similarity index 89% rename from apps/Backend/src/services/pdfClient.ts rename to apps/Backend/src/services/patientDataExtractorService.ts index 101239b..a99c270 100644 --- a/apps/Backend/src/services/pdfClient.ts +++ b/apps/Backend/src/services/patientDataExtractorService.ts @@ -9,7 +9,7 @@ export interface ExtractedData { [key: string]: any; } -export default async function forwardToPdfService( +export default async function forwardToPatientDataExtractorService( file: Express.Multer.File ): Promise { const form = new FormData(); diff --git a/apps/Backend/src/services/paymentOCRService.ts b/apps/Backend/src/services/paymentOCRService.ts new file mode 100644 index 0000000..e8608b7 --- /dev/null +++ b/apps/Backend/src/services/paymentOCRService.ts @@ -0,0 +1,34 @@ +import axios from "axios"; +import FormData from "form-data"; + +export async function forwardToPaymentOCRService( + files: Express.Multer.File | Express.Multer.File[] +): Promise { + const arr = Array.isArray(files) ? files : [files]; + + const form = new FormData(); + for (const f of arr) { + form.append("files", f.buffer, { + filename: f.originalname, + contentType: f.mimetype, // image/jpeg, image/png, image/tiff, etc. + knownLength: f.size, + }); + } + + const url = `http://localhost:5003/extract/json`; + + try { + const resp = await axios.post<{ rows: any }>(url, form, { + headers: form.getHeaders(), + maxBodyLength: Infinity, + maxContentLength: Infinity, + timeout: 120000, // OCR can be heavy; adjust as needed + }); + return resp.data?.rows ?? []; + } catch (err: any) { + // Bubble up a useful error message + const status = err?.response?.status; + const detail = err?.response?.data?.detail || err?.message || "Unknown error"; + throw new Error(`Payment OCR request failed${status ? ` (${status})` : ""}: ${detail}`); + } +} diff --git a/apps/Frontend/src/hooks/use-extractPdfData.ts b/apps/Frontend/src/hooks/use-extractPdfData.ts index 02d07b9..17d8a1e 100644 --- a/apps/Frontend/src/hooks/use-extractPdfData.ts +++ b/apps/Frontend/src/hooks/use-extractPdfData.ts @@ -16,7 +16,7 @@ export default function useExtractPdfData() { const formData = new FormData(); formData.append("pdf", pdfFile); - const res = await apiRequest("POST", "/api/pdfExtraction/extract", formData); + const res = await apiRequest("POST", "/api/patientDataExtraction/patientdataextract", formData); if (!res.ok) throw new Error("Failed to extract PDF"); return res.json(); }, diff --git a/apps/PaymentOCRService/.env.example b/apps/PaymentOCRService/.env.example index e69de29..2b436fd 100644 --- a/apps/PaymentOCRService/.env.example +++ b/apps/PaymentOCRService/.env.example @@ -0,0 +1,3 @@ +GOOGLE_APPLICATION_CREDENTIALS=google-credentials.json +HOST="0.0.0.0" +PORT="5003" \ No newline at end of file diff --git a/apps/PaymentOCRService/.gitignore b/apps/PaymentOCRService/.gitignore new file mode 100644 index 0000000..a79e064 --- /dev/null +++ b/apps/PaymentOCRService/.gitignore @@ -0,0 +1 @@ +google_credentials.json \ No newline at end of file diff --git a/apps/PaymentOCRService/__pycache__/complete_pipeline.cpython-313.pyc b/apps/PaymentOCRService/__pycache__/complete_pipeline.cpython-313.pyc new file mode 100644 index 0000000..6440d43 Binary files /dev/null and b/apps/PaymentOCRService/__pycache__/complete_pipeline.cpython-313.pyc differ diff --git a/apps/PaymentOCRService/__pycache__/complete_pipeline_adapter.cpython-313.pyc b/apps/PaymentOCRService/__pycache__/complete_pipeline_adapter.cpython-313.pyc new file mode 100644 index 0000000..10d6eb8 Binary files /dev/null and b/apps/PaymentOCRService/__pycache__/complete_pipeline_adapter.cpython-313.pyc differ diff --git a/apps/PaymentOCRService/app/init.py b/apps/PaymentOCRService/app/init.py deleted file mode 100644 index e69de29..0000000 diff --git a/apps/PaymentOCRService/app/main.py b/apps/PaymentOCRService/app/main.py deleted file mode 100644 index 044937d..0000000 --- a/apps/PaymentOCRService/app/main.py +++ /dev/null @@ -1,81 +0,0 @@ -from fastapi import FastAPI, UploadFile, File, HTTPException -from fastapi.responses import StreamingResponse, JSONResponse, PlainTextResponse -from typing import List, Optional -import io -import os - -from app.pipeline_adapter import ( - process_images_to_rows, - rows_to_csv_bytes, -) - -app = FastAPI( - title="Medical Billing OCR API", - description="FastAPI wrapper around the complete OCR pipeline (Google Vision + deskew + line clustering + extraction).", - version="1.0.0", -) - -ALLOWED_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp"} - -@app.get("/health", response_class=PlainTextResponse) -def health(): - # Simple sanity check (also ensures GCP creds var visibility) - creds = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "") - return f"OK | GOOGLE_APPLICATION_CREDENTIALS set: {bool(creds)}" - -@app.post("/extract/json") -async def extract_json(files: List[UploadFile] = File(...)): - if not files: - raise HTTPException(status_code=400, detail="No files provided.") - - # Validate extensions early (not bulletproof, but helpful) - bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS] - if bad: - raise HTTPException( - status_code=415, - detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}" - ) - - # Read blobs in-memory - blobs = [] - filenames = [] - for f in files: - blobs.append(await f.read()) - filenames.append(f.filename or "upload.bin") - - try: - rows = process_images_to_rows(blobs, filenames) - # rows is a list[dict] where each dict contains the columns you already emit (Patient Name, etc.) - return JSONResponse(content={"rows": rows}) - except Exception as e: - raise HTTPException(status_code=500, detail=f"Processing error: {e}") - -@app.post("/extract/csv") -async def extract_csv(files: List[UploadFile] = File(...), filename: Optional[str] = None): - if not files: - raise HTTPException(status_code=400, detail="No files provided.") - - bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS] - if bad: - raise HTTPException( - status_code=415, - detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}" - ) - - blobs = [] - filenames = [] - for f in files: - blobs.append(await f.read()) - filenames.append(f.filename or "upload.bin") - - try: - rows = process_images_to_rows(blobs, filenames) - csv_bytes = rows_to_csv_bytes(rows) - out_name = filename or "medical_billing_extract.csv" - return StreamingResponse( - io.BytesIO(csv_bytes), - media_type="text/csv", - headers={"Content-Disposition": f'attachment; filename="{out_name}"'} - ) - except Exception as e: - raise HTTPException(status_code=500, detail=f"Processing error: {e}") diff --git a/apps/PaymentOCRService/complete-pipeline.py b/apps/PaymentOCRService/complete_pipeline.py similarity index 99% rename from apps/PaymentOCRService/complete-pipeline.py rename to apps/PaymentOCRService/complete_pipeline.py index d713127..63036ef 100644 --- a/apps/PaymentOCRService/complete-pipeline.py +++ b/apps/PaymentOCRService/complete_pipeline.py @@ -2,6 +2,8 @@ # -*- coding: utf-8 -*- """ +ALL IS GENERATED BY REPLIT: + End-to-end local pipeline (single script) - One Google Vision pass per image (DOCUMENT_TEXT_DETECTION) diff --git a/apps/PaymentOCRService/app/pipeline-adaptor.py b/apps/PaymentOCRService/complete_pipeline_adapter.py similarity index 96% rename from apps/PaymentOCRService/app/pipeline-adaptor.py rename to apps/PaymentOCRService/complete_pipeline_adapter.py index c52fb04..252f74c 100644 --- a/apps/PaymentOCRService/app/pipeline-adaptor.py +++ b/apps/PaymentOCRService/complete_pipeline_adapter.py @@ -4,10 +4,7 @@ from typing import List, Dict import pandas as pd # Import your existing functions directly from complete_pipeline.py -from complete_pipeline import ( - smart_deskew_with_lines, - extract_all_clients_from_lines, -) +from complete_pipeline import smart_deskew_with_lines, extract_all_clients_from_lines def _process_single_image_bytes(blob: bytes, display_name: str) -> List[Dict]: """ diff --git a/apps/PaymentOCRService/main.py b/apps/PaymentOCRService/main.py new file mode 100644 index 0000000..502232e --- /dev/null +++ b/apps/PaymentOCRService/main.py @@ -0,0 +1,168 @@ +from fastapi import FastAPI, UploadFile, File, HTTPException, Request +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import StreamingResponse, JSONResponse, PlainTextResponse +from typing import List, Optional +import io +import os +import asyncio + +from dotenv import load_dotenv +load_dotenv() # loads .env (GOOGLE_APPLICATION_CREDENTIALS, HOST, PORT, etc.) + +# Your adapter that calls the pipeline +from complete_pipeline_adapter import process_images_to_rows,rows_to_csv_bytes + +# ------------------------------------------------- +# App + concurrency controls (similar to your other app) +# ------------------------------------------------- +app = FastAPI( + title="Payment OCR Services API", + description="FastAPI wrapper around the OCR pipeline (Google Vision + deskew + line grouping + extraction).", + version="1.0.0", +) + +# Concurrency/semaphore (optional but useful for OCR) +MAX_CONCURRENCY = int(os.getenv("MAX_CONCURRENCY", "2")) +semaphore = asyncio.Semaphore(MAX_CONCURRENCY) + +active_jobs = 0 +waiting_jobs = 0 +lock = asyncio.Lock() + +# CORS +cors_origins = os.getenv("CORS_ORIGINS", "*") +allow_origins = ["*"] if cors_origins.strip() == "*" else [o.strip() for o in cors_origins.split(",") if o.strip()] +app.add_middleware( + CORSMiddleware, + allow_origins=allow_origins, + allow_methods=["*"], + allow_headers=["*"], +) + +ALLOWED_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp"} + +# ------------------------------------------------- +# Health + status +# ------------------------------------------------- +@app.get("/health", response_class=PlainTextResponse) +def health(): + creds = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "") + return f"OK | GOOGLE_APPLICATION_CREDENTIALS set: {bool(creds)}" + +@app.get("/status") +async def get_status(): + async with lock: + return { + "active_jobs": active_jobs, + "queued_jobs": waiting_jobs, + "max_concurrency": MAX_CONCURRENCY, + "status": "busy" if active_jobs > 0 or waiting_jobs > 0 else "idle", + } + +# ------------------------------------------------- +# Helpers +# ------------------------------------------------- +def _validate_files(files: List[UploadFile]): + if not files: + raise HTTPException(status_code=400, detail="No files provided.") + bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS] + if bad: + raise HTTPException( + status_code=415, + detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}" + ) + +# ------------------------------------------------- +# Endpoints +# ------------------------------------------------- +@app.post("/extract/json") +async def extract_json(files: List[UploadFile] = File(...)): + _validate_files(files) + + async with lock: + global waiting_jobs + waiting_jobs += 1 + + async with semaphore: + async with lock: + waiting_jobs -= 1 + global active_jobs + active_jobs += 1 + + try: + blobs = [await f.read() for f in files] + names = [f.filename or "upload.bin" for f in files] + rows = process_images_to_rows(blobs, names) # calls your pipeline + return JSONResponse(content={"rows": rows}) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Processing error: {e}") + finally: + async with lock: + active_jobs -= 1 + +@app.post("/extract/csvtext", response_class=PlainTextResponse) +async def extract_csvtext(files: List[UploadFile] = File(...)): + _validate_files(files) + + async with lock: + global waiting_jobs + waiting_jobs += 1 + + async with semaphore: + async with lock: + waiting_jobs -= 1 + global active_jobs + active_jobs += 1 + + try: + blobs = [await f.read() for f in files] + names = [f.filename or "upload.bin" for f in files] + rows = process_images_to_rows(blobs, names) + csv_bytes = rows_to_csv_bytes(rows) + return PlainTextResponse(csv_bytes.decode("utf-8"), media_type="text/csv") + except Exception as e: + raise HTTPException(status_code=500, detail=f"Processing error: {e}") + finally: + async with lock: + active_jobs -= 1 + +@app.post("/extract/csv") +async def extract_csv(files: List[UploadFile] = File(...), filename: Optional[str] = None): + _validate_files(files) + + async with lock: + global waiting_jobs + waiting_jobs += 1 + + async with semaphore: + async with lock: + waiting_jobs -= 1 + global active_jobs + active_jobs += 1 + + try: + blobs = [await f.read() for f in files] + names = [f.filename or "upload.bin" for f in files] + rows = process_images_to_rows(blobs, names) + csv_bytes = rows_to_csv_bytes(rows) + out_name = filename or "medical_billing_extract.csv" + return StreamingResponse( + io.BytesIO(csv_bytes), + media_type="text/csv", + headers={"Content-Disposition": f'attachment; filename="{out_name}"'} + ) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Processing error: {e}") + finally: + async with lock: + active_jobs -= 1 + +# ------------------------------------------------- +# Entrypoint (same pattern as your selenium app) +# ------------------------------------------------- +if __name__ == "__main__": + import uvicorn + host = os.getenv("HOST") + port = int(os.getenv("PORT")) + reload_flag = os.getenv("RELOAD", "false").lower() == "true" + uvicorn.run(app, host=host, port=port, reload=reload_flag) diff --git a/apps/PaymentOCRService/package.json b/apps/PaymentOCRService/package.json index a4416d1..4674a63 100644 --- a/apps/PaymentOCRService/package.json +++ b/apps/PaymentOCRService/package.json @@ -1,5 +1,5 @@ { - "name": "pdfservice", + "name": "paymentocrservice", "private": true, "scripts": { "postinstall": "pip install -r requirements.txt", diff --git a/apps/PaymentOCRService/requirements.txt b/apps/PaymentOCRService/requirements.txt index 7320dce..9c33a3f 100644 --- a/apps/PaymentOCRService/requirements.txt +++ b/apps/PaymentOCRService/requirements.txt @@ -1,10 +1,26 @@ -fastapi -uvicorn[standard] -google-cloud-vision -opencv-python-headless -pytesseract -pillow -pandas -openpyxl -numpy -python-multipart +annotated-types==0.7.0 +anyio==4.10.0 +click==8.2.1 +colorama==0.4.6 +et_xmlfile==2.0.0 +fastapi==0.116.1 +h11==0.16.0 +idna==3.10 +numpy==2.2.6 +google-cloud-vision>=3.10.2 +opencv-python==4.12.0.88 +openpyxl==3.1.5 +pandas==2.3.2 +pydantic==2.11.7 +pydantic_core==2.33.2 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +pytz==2025.2 +six==1.17.0 +sniffio==1.3.1 +starlette==0.47.3 +typing-inspection==0.4.1 +typing_extensions==4.15.0 +tzdata==2025.2 +uvicorn==0.35.0 +python-multipart==0.0.20 diff --git a/package-lock.json b/package-lock.json index f653d58..43e67c3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -163,8 +163,17 @@ "vite": "^6.3.5" } }, + "apps/PatientDataExtractorService": { + "name": "patientdataextractorservice", + "hasInstallScript": true + }, + "apps/PaymentOCRService": { + "name": "paymentocrservice", + "hasInstallScript": true + }, "apps/PdfService": { "name": "pdfservice", + "extraneous": true, "hasInstallScript": true }, "apps/SeleniumService": { @@ -9983,11 +9992,19 @@ "devOptional": true, "license": "MIT" }, + "node_modules/patientdataextractorservice": { + "resolved": "apps/PatientDataExtractorService", + "link": true + }, "node_modules/pause": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/pause/-/pause-0.0.1.tgz", "integrity": "sha512-KG8UEiEVkR3wGEb4m5yZkVCzigAD+cVEJck2CzYZO37ZGJfctvVptVO192MwrtPhzONn6go8ylnOdMhKqi4nfg==" }, + "node_modules/paymentocrservice": { + "resolved": "apps/PaymentOCRService", + "link": true + }, "node_modules/pdfjs-dist": { "version": "3.11.174", "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-3.11.174.tgz", @@ -10001,10 +10018,6 @@ "path2d-polyfill": "^2.0.1" } }, - "node_modules/pdfservice": { - "resolved": "apps/PdfService", - "link": true - }, "node_modules/perfect-debounce": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/perfect-debounce/-/perfect-debounce-1.0.0.tgz", diff --git a/package.json b/package.json index d1b5783..7735671 100644 --- a/package.json +++ b/package.json @@ -11,8 +11,8 @@ "db:generate": "prisma generate --schema=packages/db/prisma/schema.prisma && ts-node packages/db/scripts/patch-zod-buffer.ts", "db:migrate": "dotenv -e packages/db/.env -- prisma migrate dev --schema=packages/db/prisma/schema.prisma", "db:seed": "prisma db seed --schema=packages/db/prisma/schema.prisma", - "setup:env": "shx cp packages/db/prisma/.env.example packages/db/prisma/.env && shx cp apps/Frontend/.env.example apps/Frontend/.env && shx cp apps/Backend/.env.example apps/Backend/.env", - "postinstall": "cd apps/PdfService && npm run postinstall" + "setup:env": "shx cp packages/db/prisma/.env.example packages/db/prisma/.env && shx cp apps/Frontend/.env.example apps/Frontend/.env && shx cp apps/Backend/.env.example apps/Backend/.env && shx cp apps/PaymentOCRService/.env.example apps/PaymentOCRService/.env", + "postinstall": "npm --prefix apps/PatientDataExtractorService run postinstall && npm --prefix apps/PaymentOCRService run postinstall" }, "prisma": { "seed": "ts-node packages/db/prisma/seed.ts"