initial commit
This commit is contained in:
2
apps/PatientDataExtractorService/.env
Executable file
2
apps/PatientDataExtractorService/.env
Executable file
@@ -0,0 +1,2 @@
|
||||
HOST=localhost
|
||||
PORT=5001
|
||||
2
apps/PatientDataExtractorService/.env.example
Executable file
2
apps/PatientDataExtractorService/.env.example
Executable file
@@ -0,0 +1,2 @@
|
||||
HOST=localhost
|
||||
PORT=5001
|
||||
8
apps/PatientDataExtractorService/.turbo/turbo-dev.log
Executable file
8
apps/PatientDataExtractorService/.turbo/turbo-dev.log
Executable file
@@ -0,0 +1,8 @@
|
||||
|
||||
> dev
|
||||
> python3 main.py
|
||||
|
||||
[32mINFO[0m: Started server process [[36m6318[0m]
|
||||
[32mINFO[0m: Waiting for application startup.
|
||||
[32mINFO[0m: Application startup complete.
|
||||
[32mINFO[0m: Uvicorn running on [1mhttp://localhost:5001[0m (Press CTRL+C to quit)
|
||||
95
apps/PatientDataExtractorService/main.py
Executable file
95
apps/PatientDataExtractorService/main.py
Executable file
@@ -0,0 +1,95 @@
|
||||
from fastapi import FastAPI, UploadFile, File, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
import uvicorn
|
||||
import fitz # PyMuPDF
|
||||
import re
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
# Optional: allow CORS for development
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # change in production
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
DOB_RE = re.compile(r'(?<!\d)(\d{1,2})/(\d{1,2})/(\d{4})(?!\d)')
|
||||
ID_RE = re.compile(r'^\d{8,14}$') # 8–14 digits, whole line
|
||||
|
||||
# lines that tell us we've moved past the name/DOB area
|
||||
STOP_WORDS = {
|
||||
'eligibility', 'coverage', 'age band', 'date of', 'service',
|
||||
'tooth', 'number', 'surface', 'procedure', 'code', 'description',
|
||||
'provider', 'printed on', 'member id', 'name', 'date of birth'
|
||||
}
|
||||
|
||||
@app.post("/extract")
|
||||
async def extract(pdf: UploadFile = File(...)):
|
||||
if not pdf:
|
||||
raise HTTPException(status_code=400, detail="Missing 'pdf' file")
|
||||
|
||||
content = await pdf.read()
|
||||
try:
|
||||
doc = fitz.open(stream=content, filetype="pdf")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=f"Unable to open PDF: {e}")
|
||||
|
||||
# Extract text from all pages
|
||||
text = "\n".join(page.get_text("text") for page in doc)
|
||||
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
||||
|
||||
member_id = ""
|
||||
name = ""
|
||||
dob = ""
|
||||
|
||||
# 1) Find the first plausible member ID (8–14 digits)
|
||||
id_idx = -1
|
||||
for i, line in enumerate(lines):
|
||||
if ID_RE.match(line):
|
||||
member_id = line
|
||||
id_idx = i
|
||||
break
|
||||
|
||||
if id_idx == -1:
|
||||
return {"memberId": "", "name": "", "dob": ""}
|
||||
|
||||
# 2) Scan forward to collect name + DOB; handle both same-line and next-line cases
|
||||
collected = []
|
||||
j = id_idx + 1
|
||||
while j < len(lines):
|
||||
low = lines[j].lower()
|
||||
if any(sw in low for sw in STOP_WORDS):
|
||||
break
|
||||
collected.append(lines[j])
|
||||
# If we already found a DOB, we can stop early
|
||||
if DOB_RE.search(lines[j]):
|
||||
break
|
||||
j += 1
|
||||
|
||||
# Flatten the collected chunk to search for a date (works if DOB is on same line or next)
|
||||
blob = " ".join(collected).strip()
|
||||
|
||||
m = DOB_RE.search(blob)
|
||||
if m:
|
||||
dob = m.group(0)
|
||||
# name is everything before the date within the same blob
|
||||
name = blob[:m.start()].strip()
|
||||
else:
|
||||
# fallback: if we didn't find a date, assume first collected line(s) are name
|
||||
name = blob
|
||||
|
||||
return {
|
||||
"memberId": member_id,
|
||||
"name": name,
|
||||
"dob": dob
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
host = os.getenv("HOST")
|
||||
port = int(os.getenv("PORT"))
|
||||
uvicorn.run(app, host=host, port=port)
|
||||
8
apps/PatientDataExtractorService/package.json
Executable file
8
apps/PatientDataExtractorService/package.json
Executable file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "patientdataextractorservice",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"postinstall": "pip install -r requirements.txt",
|
||||
"dev": "python3 main.py"
|
||||
}
|
||||
}
|
||||
2
apps/PatientDataExtractorService/requirements.txt
Executable file
2
apps/PatientDataExtractorService/requirements.txt
Executable file
@@ -0,0 +1,2 @@
|
||||
flask
|
||||
pymupdf
|
||||
Reference in New Issue
Block a user