structured well
This commit is contained in:
0
apps/PaymentOCRService/.env.example
Normal file
0
apps/PaymentOCRService/.env.example
Normal file
13
apps/PaymentOCRService/README.md
Normal file
13
apps/PaymentOCRService/README.md
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# Medical Billing OCR API (FastAPI)
|
||||||
|
|
||||||
|
## 1) Prereqs
|
||||||
|
- Google Cloud Vision service-account JSON.
|
||||||
|
- `GOOGLE_APPLICATION_CREDENTIALS` env var pointing to that JSON.
|
||||||
|
- Tesseract installed (for fallback OCR), and on PATH.
|
||||||
|
|
||||||
|
## 2) Install & run (local)
|
||||||
|
```bash
|
||||||
|
python -m venv .venv && source .venv/bin/activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
export GOOGLE_APPLICATION_CREDENTIALS=/absolute/path/to/service-account.json
|
||||||
|
uvicorn app.main:app --reload --port 8080
|
||||||
0
apps/PaymentOCRService/app/init.py
Normal file
0
apps/PaymentOCRService/app/init.py
Normal file
81
apps/PaymentOCRService/app/main.py
Normal file
81
apps/PaymentOCRService/app/main.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
from fastapi import FastAPI, UploadFile, File, HTTPException
|
||||||
|
from fastapi.responses import StreamingResponse, JSONResponse, PlainTextResponse
|
||||||
|
from typing import List, Optional
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
|
||||||
|
from app.pipeline_adapter import (
|
||||||
|
process_images_to_rows,
|
||||||
|
rows_to_csv_bytes,
|
||||||
|
)
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="Medical Billing OCR API",
|
||||||
|
description="FastAPI wrapper around the complete OCR pipeline (Google Vision + deskew + line clustering + extraction).",
|
||||||
|
version="1.0.0",
|
||||||
|
)
|
||||||
|
|
||||||
|
ALLOWED_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp"}
|
||||||
|
|
||||||
|
@app.get("/health", response_class=PlainTextResponse)
|
||||||
|
def health():
|
||||||
|
# Simple sanity check (also ensures GCP creds var visibility)
|
||||||
|
creds = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "")
|
||||||
|
return f"OK | GOOGLE_APPLICATION_CREDENTIALS set: {bool(creds)}"
|
||||||
|
|
||||||
|
@app.post("/extract/json")
|
||||||
|
async def extract_json(files: List[UploadFile] = File(...)):
|
||||||
|
if not files:
|
||||||
|
raise HTTPException(status_code=400, detail="No files provided.")
|
||||||
|
|
||||||
|
# Validate extensions early (not bulletproof, but helpful)
|
||||||
|
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
|
||||||
|
if bad:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=415,
|
||||||
|
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Read blobs in-memory
|
||||||
|
blobs = []
|
||||||
|
filenames = []
|
||||||
|
for f in files:
|
||||||
|
blobs.append(await f.read())
|
||||||
|
filenames.append(f.filename or "upload.bin")
|
||||||
|
|
||||||
|
try:
|
||||||
|
rows = process_images_to_rows(blobs, filenames)
|
||||||
|
# rows is a list[dict] where each dict contains the columns you already emit (Patient Name, etc.)
|
||||||
|
return JSONResponse(content={"rows": rows})
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
|
||||||
|
|
||||||
|
@app.post("/extract/csv")
|
||||||
|
async def extract_csv(files: List[UploadFile] = File(...), filename: Optional[str] = None):
|
||||||
|
if not files:
|
||||||
|
raise HTTPException(status_code=400, detail="No files provided.")
|
||||||
|
|
||||||
|
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
|
||||||
|
if bad:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=415,
|
||||||
|
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
|
||||||
|
)
|
||||||
|
|
||||||
|
blobs = []
|
||||||
|
filenames = []
|
||||||
|
for f in files:
|
||||||
|
blobs.append(await f.read())
|
||||||
|
filenames.append(f.filename or "upload.bin")
|
||||||
|
|
||||||
|
try:
|
||||||
|
rows = process_images_to_rows(blobs, filenames)
|
||||||
|
csv_bytes = rows_to_csv_bytes(rows)
|
||||||
|
out_name = filename or "medical_billing_extract.csv"
|
||||||
|
return StreamingResponse(
|
||||||
|
io.BytesIO(csv_bytes),
|
||||||
|
media_type="text/csv",
|
||||||
|
headers={"Content-Disposition": f'attachment; filename="{out_name}"'}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
|
||||||
77
apps/PaymentOCRService/app/pipeline-adaptor.py
Normal file
77
apps/PaymentOCRService/app/pipeline-adaptor.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from typing import List, Dict
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# Import your existing functions directly from complete_pipeline.py
|
||||||
|
from complete_pipeline import (
|
||||||
|
smart_deskew_with_lines,
|
||||||
|
extract_all_clients_from_lines,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _process_single_image_bytes(blob: bytes, display_name: str) -> List[Dict]:
|
||||||
|
"""
|
||||||
|
Saves bytes to a temp file (so OpenCV + Google Vision can read it),
|
||||||
|
runs your existing pipeline functions, and returns extracted rows.
|
||||||
|
"""
|
||||||
|
suffix = os.path.splitext(display_name)[1] or ".jpg"
|
||||||
|
tmp_path = None
|
||||||
|
try:
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||||
|
tmp.write(blob)
|
||||||
|
tmp_path = tmp.name
|
||||||
|
|
||||||
|
# Uses Google Vision + deskew + post-line grouping
|
||||||
|
info = smart_deskew_with_lines(tmp_path, None, clamp_deg=30.0, use_vision=True)
|
||||||
|
post_lines = info.get("post_lines", []) if info else []
|
||||||
|
rows = extract_all_clients_from_lines(post_lines) if post_lines else []
|
||||||
|
|
||||||
|
# Add source file information (same as your Streamlit app)
|
||||||
|
for r in rows:
|
||||||
|
r["Source File"] = display_name
|
||||||
|
|
||||||
|
# If nothing parsed, still return a placeholder row to indicate failure (optional)
|
||||||
|
if not rows:
|
||||||
|
rows.append({
|
||||||
|
'Patient Name': "", 'Patient ID': "", 'ICN': "", 'CDT Code': "",
|
||||||
|
'Tooth': "", 'Date SVC': "",
|
||||||
|
'Billed Amount': "", 'Allowed Amount': "", 'Paid Amount': "",
|
||||||
|
'Extraction Success': False, 'Source File': display_name,
|
||||||
|
})
|
||||||
|
|
||||||
|
return rows
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if tmp_path:
|
||||||
|
try:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def process_images_to_rows(blobs: List[bytes], filenames: List[str]) -> List[Dict]:
|
||||||
|
"""
|
||||||
|
Public API used by FastAPI routes.
|
||||||
|
blobs: list of image bytes
|
||||||
|
filenames: matching names for display / Source File column
|
||||||
|
"""
|
||||||
|
all_rows: List[Dict] = []
|
||||||
|
for blob, name in zip(blobs, filenames):
|
||||||
|
rows = _process_single_image_bytes(blob, name)
|
||||||
|
all_rows.extend(rows)
|
||||||
|
|
||||||
|
return all_rows
|
||||||
|
|
||||||
|
def rows_to_csv_bytes(rows: List[Dict]) -> bytes:
|
||||||
|
"""
|
||||||
|
Convert pipeline rows to CSV bytes (for frontend to consume as a table).
|
||||||
|
"""
|
||||||
|
df = pd.DataFrame(rows)
|
||||||
|
# Keep a stable column order if present (mirrors your Excel order)
|
||||||
|
desired = [
|
||||||
|
'Patient Name', 'Patient ID', 'ICN', 'CDT Code', 'Tooth', 'Date SVC',
|
||||||
|
'Billed Amount', 'Allowed Amount', 'Paid Amount',
|
||||||
|
'Extraction Success', 'Source File'
|
||||||
|
]
|
||||||
|
cols = [c for c in desired if c in df.columns] + [c for c in df.columns if c not in desired]
|
||||||
|
df = df[cols]
|
||||||
|
return df.to_csv(index=False).encode("utf-8")
|
||||||
837
apps/PaymentOCRService/complete-pipeline.py
Normal file
837
apps/PaymentOCRService/complete-pipeline.py
Normal file
@@ -0,0 +1,837 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
End-to-end local pipeline (single script)
|
||||||
|
|
||||||
|
- One Google Vision pass per image (DOCUMENT_TEXT_DETECTION)
|
||||||
|
- Smart deskew (Hough + OCR pairs) with fine grid search (in-memory)
|
||||||
|
- Build slope-aware (pre) and horizontal (post) line dumps (in-memory)
|
||||||
|
- Extract all clients & PD rows per page (robust to headers/EOBS)
|
||||||
|
- Export nicely formatted Excel via ExcelGenerator
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python ocr_pipeline.py --input "C:\\imgs" --out "results.xlsx"
|
||||||
|
python ocr_pipeline.py --files s1.jpg s2.jpg --out results.xlsx
|
||||||
|
python ocr_pipeline.py --input "C:\\imgs" --out results.xlsx --deskewed-only
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import io
|
||||||
|
import cv2
|
||||||
|
import math
|
||||||
|
import glob
|
||||||
|
import argparse
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from typing import List, Dict, Tuple, Any, Optional
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# ========= Debug switch =========
|
||||||
|
# Set to True to re-enable saving deskewed images, writing *_lines_*.txt,
|
||||||
|
# and printing progress messages.
|
||||||
|
DEBUG = False
|
||||||
|
|
||||||
|
# ---------- Google Vision ----------
|
||||||
|
from google.cloud import vision
|
||||||
|
|
||||||
|
# ---------- openpyxl helpers ----------
|
||||||
|
from openpyxl.utils import get_column_letter
|
||||||
|
from openpyxl.cell.cell import MergedCell
|
||||||
|
from openpyxl import Workbook
|
||||||
|
from openpyxl.styles import Font, PatternFill, Border, Side, Alignment
|
||||||
|
from openpyxl.utils.dataframe import dataframe_to_rows
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Config (tuning)
|
||||||
|
# ============================================================
|
||||||
|
PERP_TOL_FACTOR = 0.6
|
||||||
|
SEED_BAND_H = 3.0
|
||||||
|
ALLOW_SINGLETON = True
|
||||||
|
|
||||||
|
POST_Y_TOL_FACTOR = 0.55
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Vision OCR (ONE pass per image)
|
||||||
|
# ============================================================
|
||||||
|
def _open_bytes(path: str) -> bytes:
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def extract_words_and_text(image_path: str) -> Tuple[List[Dict], str]:
|
||||||
|
client = vision.ImageAnnotatorClient()
|
||||||
|
resp = client.document_text_detection(image=vision.Image(content=_open_bytes(image_path)))
|
||||||
|
if resp.error.message:
|
||||||
|
raise RuntimeError(resp.error.message)
|
||||||
|
|
||||||
|
full_text = resp.full_text_annotation.text or ""
|
||||||
|
|
||||||
|
words: List[Dict] = []
|
||||||
|
for page in resp.full_text_annotation.pages:
|
||||||
|
for block in page.blocks:
|
||||||
|
for para in block.paragraphs:
|
||||||
|
for word in para.words:
|
||||||
|
text = "".join(s.text for s in word.symbols)
|
||||||
|
vs = word.bounding_box.vertices
|
||||||
|
xs = [v.x for v in vs]; ys = [v.y for v in vs]
|
||||||
|
left, top = min(xs), min(ys)
|
||||||
|
w, h = max(xs) - left, max(ys) - top
|
||||||
|
cx, cy = left + w/2.0, top + h/2.0
|
||||||
|
words.append({"text": text, "left": left, "top": top,
|
||||||
|
"w": w, "h": h, "cx": cx, "cy": cy})
|
||||||
|
return words, full_text
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Skew estimation (Hough + OCR pairs)
|
||||||
|
# ============================================================
|
||||||
|
def weighted_median(pairs: List[Tuple[float, float]]) -> float:
|
||||||
|
if not pairs: return 0.0
|
||||||
|
arr = sorted(pairs, key=lambda t: t[0])
|
||||||
|
tot = sum(w for _, w in arr)
|
||||||
|
acc = 0.0
|
||||||
|
for v, w in arr:
|
||||||
|
acc += w
|
||||||
|
if acc >= tot/2.0:
|
||||||
|
return v
|
||||||
|
return arr[-1][0]
|
||||||
|
|
||||||
|
def estimate_skew_pairs(words: List[Dict],
|
||||||
|
y_band_mult: float = 2.0,
|
||||||
|
min_dx_mult: float = 0.8,
|
||||||
|
max_abs_deg: float = 15.0) -> Tuple[float,int]:
|
||||||
|
if not words: return 0.0, 0
|
||||||
|
widths = [w["w"] for w in words if w["w"]>0]
|
||||||
|
heights = [w["h"] for w in words if w["h"]>0]
|
||||||
|
w_med = float(np.median(widths) if widths else 10.0)
|
||||||
|
h_med = float(np.median(heights) if heights else 16.0)
|
||||||
|
y_band = y_band_mult * h_med
|
||||||
|
min_dx = max(4.0, min_dx_mult * w_med)
|
||||||
|
|
||||||
|
words_sorted = sorted(words, key=lambda w: (w["cy"], w["cx"]))
|
||||||
|
pairs: List[Tuple[float,float]] = []
|
||||||
|
for i, wi in enumerate(words_sorted):
|
||||||
|
best_j = None; best_dx = None
|
||||||
|
for j in range(i+1, len(words_sorted)):
|
||||||
|
wj = words_sorted[j]
|
||||||
|
dy = wj["cy"] - wi["cy"]
|
||||||
|
if dy > y_band: break
|
||||||
|
if abs(dy) <= y_band:
|
||||||
|
dx = wj["cx"] - wi["cx"]
|
||||||
|
if dx <= 0 or dx < min_dx: continue
|
||||||
|
if best_dx is None or dx < best_dx:
|
||||||
|
best_dx, best_j = dx, j
|
||||||
|
if best_j is None: continue
|
||||||
|
wj = words_sorted[best_j]
|
||||||
|
dx = wj["cx"] - wi["cx"]; dy = wj["cy"] - wi["cy"]
|
||||||
|
ang = math.degrees(math.atan2(dy, dx))
|
||||||
|
if abs(ang) <= max_abs_deg:
|
||||||
|
pairs.append((ang, max(1.0, dx)))
|
||||||
|
|
||||||
|
if not pairs: return 0.0, 0
|
||||||
|
vals = np.array([v for v,_ in pairs], dtype=float)
|
||||||
|
q1, q3 = np.percentile(vals, [25,75]); iqr = q3-q1
|
||||||
|
lo, hi = q1 - 1.5*iqr, q3 + 1.5*iqr
|
||||||
|
trimmed = [(v,w) for v,w in pairs if lo <= v <= hi] or pairs
|
||||||
|
return float(weighted_median(trimmed)), len(trimmed)
|
||||||
|
|
||||||
|
def estimate_skew_hough(img: np.ndarray, thr: int = 180) -> Tuple[float,int]:
|
||||||
|
g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||||
|
g = cv2.GaussianBlur(g, (3,3), 0)
|
||||||
|
edges = cv2.Canny(g, 60, 160, apertureSize=3)
|
||||||
|
lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=thr)
|
||||||
|
if lines is None: return 0.0, 0
|
||||||
|
angs = []
|
||||||
|
for (rho, theta) in lines[:,0,:]:
|
||||||
|
ang = (theta - np.pi/2.0) * 180.0/np.pi
|
||||||
|
while ang > 45: ang -= 90
|
||||||
|
while ang < -45: ang += 90
|
||||||
|
angs.append(ang)
|
||||||
|
angs = np.array(angs, dtype=float)
|
||||||
|
med = float(np.median(angs))
|
||||||
|
keep = angs[np.abs(angs - med) <= 10.0]
|
||||||
|
return (float(np.median(keep)) if keep.size else med), int(angs.size)
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Rotation (image + coordinates) and scoring
|
||||||
|
# ============================================================
|
||||||
|
def rotation_matrix_keep_bounds(shape_hw: Tuple[int,int], angle_deg: float) -> Tuple[np.ndarray, Tuple[int,int]]:
|
||||||
|
h, w = shape_hw
|
||||||
|
center = (w/2.0, h/2.0)
|
||||||
|
M = cv2.getRotationMatrix2D(center, angle_deg, 1.0)
|
||||||
|
cos, sin = abs(M[0,0]), abs(M[0,1])
|
||||||
|
new_w = int(h*sin + w*cos)
|
||||||
|
new_h = int(h*cos + w*sin)
|
||||||
|
M[0,2] += (new_w/2) - center[0]
|
||||||
|
M[1,2] += (new_h/2) - center[1]
|
||||||
|
return M, (new_h, new_w)
|
||||||
|
|
||||||
|
def rotate_image_keep_bounds(img: np.ndarray, angle_deg: float, border_value=255) -> np.ndarray:
|
||||||
|
M, (nh, nw) = rotation_matrix_keep_bounds(img.shape[:2], angle_deg)
|
||||||
|
return cv2.warpAffine(img, M, (nw, nh),
|
||||||
|
flags=cv2.INTER_LINEAR,
|
||||||
|
borderMode=cv2.BORDER_CONSTANT,
|
||||||
|
borderValue=border_value)
|
||||||
|
|
||||||
|
def transform_words(words: List[Dict], shape_hw: Tuple[int,int], angle_deg: float) -> List[Dict]:
|
||||||
|
M, _ = rotation_matrix_keep_bounds(shape_hw, angle_deg)
|
||||||
|
out = []
|
||||||
|
for w in words:
|
||||||
|
x, y = (M @ np.array([w["cx"], w["cy"], 1.0])).tolist()
|
||||||
|
ww = dict(w)
|
||||||
|
ww["cx_rot"], ww["cy_rot"] = float(x), float(y)
|
||||||
|
out.append(ww)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def preview_score(img: np.ndarray, deskew_angle: float) -> float:
|
||||||
|
h, w = img.shape[:2]
|
||||||
|
scale = 1200.0 / max(h, w)
|
||||||
|
small = cv2.resize(img, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA) if scale < 1 else img
|
||||||
|
rot = rotate_image_keep_bounds(small, deskew_angle, border_value=255)
|
||||||
|
resid, n = estimate_skew_hough(rot, thr=140)
|
||||||
|
return abs(resid) if n > 0 else 90.0
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Slope-based clustering (pre-rotation)
|
||||||
|
# ============================================================
|
||||||
|
def line_from_points(p0, p1):
|
||||||
|
(x0,y0),(x1,y1)=p0,p1
|
||||||
|
dx = x1-x0
|
||||||
|
if abs(dx) < 1e-9: return float("inf"), x0
|
||||||
|
m = (y1-y0)/dx; b = y0 - m*x0
|
||||||
|
return m,b
|
||||||
|
|
||||||
|
def perp_distance(m,b,x,y):
|
||||||
|
if math.isinf(m): return abs(x-b)
|
||||||
|
return abs(m*x - y + b) / math.sqrt(m*m + 1.0)
|
||||||
|
|
||||||
|
def refit_line(points: List[Tuple[float,float]]) -> Tuple[float,float]:
|
||||||
|
if len(points) == 1:
|
||||||
|
x,y = points[0]; return 0.0, y
|
||||||
|
xs=[p[0] for p in points]; ys=[p[1] for p in points]
|
||||||
|
xm = sum(xs)/len(xs); ym = sum(ys)/len(ys)
|
||||||
|
num = sum((x-xm)*(y-ym) for x,y in zip(xs,ys))
|
||||||
|
den = sum((x-xm)**2 for x in xs)
|
||||||
|
if abs(den) < 1e-12: return float("inf"), xm
|
||||||
|
m = num/den; b = ym - m*xm
|
||||||
|
return m,b
|
||||||
|
|
||||||
|
def project_t(m,b,x0,y0,x,y):
|
||||||
|
if math.isinf(m): return y - y0
|
||||||
|
denom = math.sqrt(1+m*m)
|
||||||
|
return ((x-x0) + m*(y-y0))/denom
|
||||||
|
|
||||||
|
def _build_line_result(words, idxs, m, b, rotated=False):
|
||||||
|
origin_idx = min(idxs, key=lambda i: (words[i]["cx_rot"] if rotated else words[i]["cx"]))
|
||||||
|
x0 = words[origin_idx]["cx_rot"] if rotated else words[origin_idx]["cx"]
|
||||||
|
y0 = words[origin_idx]["cy_rot"] if rotated else words[origin_idx]["cy"]
|
||||||
|
|
||||||
|
ordered = sorted(
|
||||||
|
idxs,
|
||||||
|
key=lambda i: project_t(
|
||||||
|
m, b, x0, y0,
|
||||||
|
words[i]["cx_rot"] if rotated else words[i]["cx"],
|
||||||
|
words[i]["cy_rot"] if rotated else words[i]["cy"]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
line_words = [words[i] for i in ordered]
|
||||||
|
text = " ".join(w["text"] for w in line_words)
|
||||||
|
|
||||||
|
xs = [(w["cx_rot"] if rotated else w["cx"]) for w in line_words]
|
||||||
|
ys = [(w["cy_rot"] if rotated else w["cy"]) for w in line_words]
|
||||||
|
return {
|
||||||
|
"text": text,
|
||||||
|
"words": line_words,
|
||||||
|
"slope": m,
|
||||||
|
"center_x": float(sum(xs)/len(xs)),
|
||||||
|
"center_y": float(sum(ys)/len(ys)),
|
||||||
|
"count": len(line_words),
|
||||||
|
}
|
||||||
|
|
||||||
|
def cluster_tilted_lines(words: List[Dict]) -> List[Dict]:
|
||||||
|
if not words: return []
|
||||||
|
hs = sorted([w["h"] for w in words if w["h"]>0])
|
||||||
|
h_med = hs[len(hs)//2] if hs else 16.0
|
||||||
|
perp_tol = PERP_TOL_FACTOR * h_med
|
||||||
|
band_dy = SEED_BAND_H * h_med
|
||||||
|
|
||||||
|
remaining = set(range(len(words)))
|
||||||
|
order = sorted(remaining, key=lambda i: (words[i]["cy"], words[i]["cx"]))
|
||||||
|
lines = []
|
||||||
|
|
||||||
|
while remaining:
|
||||||
|
seed_idx = next(i for i in order if i in remaining)
|
||||||
|
remaining.remove(seed_idx)
|
||||||
|
sx, sy = words[seed_idx]["cx"], words[seed_idx]["cy"]
|
||||||
|
|
||||||
|
cand_idxs = [j for j in remaining if abs(words[j]["cy"] - sy) <= band_dy]
|
||||||
|
if not cand_idxs:
|
||||||
|
if ALLOW_SINGLETON:
|
||||||
|
m,b = refit_line([(sx,sy)])
|
||||||
|
lines.append(_build_line_result(words, {seed_idx}, m, b))
|
||||||
|
continue
|
||||||
|
|
||||||
|
cand_idxs.sort(key=lambda j: abs(words[j]["cx"] - sx))
|
||||||
|
best_inliers = None; best_mb = None
|
||||||
|
for j in cand_idxs[:min(10, len(cand_idxs))]:
|
||||||
|
m,b = line_from_points((sx,sy), (words[j]["cx"], words[j]["cy"]))
|
||||||
|
inliers = {seed_idx, j}
|
||||||
|
for k in remaining:
|
||||||
|
xk, yk = words[k]["cx"], words[k]["cy"]
|
||||||
|
if perp_distance(m,b,xk,yk) <= perp_tol:
|
||||||
|
inliers.add(k)
|
||||||
|
if best_inliers is None or len(inliers) > len(best_inliers):
|
||||||
|
best_inliers, best_mb = inliers, (m,b)
|
||||||
|
|
||||||
|
m,b = best_mb
|
||||||
|
pts = [(words[i]["cx"], words[i]["cy"]) for i in best_inliers]
|
||||||
|
m,b = refit_line(pts)
|
||||||
|
|
||||||
|
expanded = set(best_inliers)
|
||||||
|
for idx in list(remaining):
|
||||||
|
xk, yk = words[idx]["cx"], words[idx]["cy"]
|
||||||
|
if perp_distance(m,b,xk,yk) <= perp_tol:
|
||||||
|
expanded.add(idx)
|
||||||
|
|
||||||
|
for idx in expanded:
|
||||||
|
if idx in remaining:
|
||||||
|
remaining.remove(idx)
|
||||||
|
lines.append(_build_line_result(words, expanded, m, b))
|
||||||
|
|
||||||
|
lines.sort(key=lambda L: L["center_y"])
|
||||||
|
return lines
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Post-rotation grouping (simple horizontal lines)
|
||||||
|
# ============================================================
|
||||||
|
def group_horizontal_lines(rotated_words: List[Dict]) -> List[Dict]:
|
||||||
|
if not rotated_words: return []
|
||||||
|
hs = sorted([w["h"] for w in rotated_words if w["h"]>0])
|
||||||
|
h_med = hs[len(hs)//2] if hs else 16.0
|
||||||
|
y_tol = POST_Y_TOL_FACTOR * h_med
|
||||||
|
|
||||||
|
idxs = list(range(len(rotated_words)))
|
||||||
|
idxs.sort(key=lambda i: (rotated_words[i]["cy_rot"], rotated_words[i]["cx_rot"]))
|
||||||
|
lines = []
|
||||||
|
cur = []
|
||||||
|
|
||||||
|
def flush():
|
||||||
|
nonlocal cur
|
||||||
|
if not cur: return
|
||||||
|
xs = [rotated_words[i]["cx_rot"] for i in cur]
|
||||||
|
ys = [rotated_words[i]["cy_rot"] for i in cur]
|
||||||
|
m,b = refit_line(list(zip(xs,ys)))
|
||||||
|
cur_sorted = sorted(cur, key=lambda i: rotated_words[i]["cx_rot"])
|
||||||
|
lines.append(_build_line_result(rotated_words, set(cur_sorted), m, b, rotated=True))
|
||||||
|
cur = []
|
||||||
|
|
||||||
|
for i in idxs:
|
||||||
|
if not cur:
|
||||||
|
cur = [i]
|
||||||
|
else:
|
||||||
|
y0 = rotated_words[cur[0]]["cy_rot"]
|
||||||
|
yi = rotated_words[i]["cy_rot"]
|
||||||
|
if abs(yi - y0) <= y_tol:
|
||||||
|
cur.append(i)
|
||||||
|
else:
|
||||||
|
flush()
|
||||||
|
cur = [i]
|
||||||
|
flush()
|
||||||
|
lines.sort(key=lambda L: L["center_y"])
|
||||||
|
return lines
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Utilities: dump lines to txt (only if DEBUG)
|
||||||
|
# ============================================================
|
||||||
|
def slope_to_deg(m: float) -> float:
|
||||||
|
if math.isinf(m): return 90.0
|
||||||
|
return math.degrees(math.atan(m))
|
||||||
|
|
||||||
|
def write_lines_txt(base_path: str, suffix: str, lines: List[Dict]) -> Optional[str]:
|
||||||
|
if not DEBUG:
|
||||||
|
return None
|
||||||
|
txt_path = f"{os.path.splitext(base_path)[0]}_{suffix}.txt"
|
||||||
|
with open(txt_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(f"# {os.path.basename(base_path)} ({suffix})\n")
|
||||||
|
for i, L in enumerate(lines, 1):
|
||||||
|
ang = slope_to_deg(L["slope"])
|
||||||
|
f.write(f"[{i:03d}] words={L['count']:>3} slope={ang:+.3f}°\n")
|
||||||
|
f.write(L["text"] + "\n\n")
|
||||||
|
return txt_path
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Smart deskew + full pipeline (in-memory; returns words + full_text)
|
||||||
|
# ============================================================
|
||||||
|
def smart_deskew_with_lines(image_path: str,
|
||||||
|
out_path: Optional[str] = None,
|
||||||
|
clamp_deg: float = 30.0,
|
||||||
|
use_vision: bool = True) -> Dict:
|
||||||
|
img = cv2.imread(image_path, cv2.IMREAD_COLOR)
|
||||||
|
if img is None: raise FileNotFoundError(image_path)
|
||||||
|
|
||||||
|
words, full_text = ([], "")
|
||||||
|
if use_vision:
|
||||||
|
words, full_text = extract_words_and_text(image_path)
|
||||||
|
|
||||||
|
a_h, n_h = estimate_skew_hough(img)
|
||||||
|
a_p, n_p = (0.0, 0)
|
||||||
|
if words:
|
||||||
|
a_p, n_p = estimate_skew_pairs(words, y_band_mult=2.0, min_dx_mult=0.8, max_abs_deg=15.0)
|
||||||
|
|
||||||
|
candidates = []
|
||||||
|
if n_h >= 10: candidates += [a_h, -a_h]
|
||||||
|
if n_p >= 10: candidates += [a_p, -a_p]
|
||||||
|
if not candidates: candidates = [0.0]
|
||||||
|
|
||||||
|
cand = []
|
||||||
|
for a in candidates:
|
||||||
|
a = float(max(-clamp_deg, min(clamp_deg, a)))
|
||||||
|
if all(abs(a - b) > 0.05 for b in cand):
|
||||||
|
cand.append(a)
|
||||||
|
|
||||||
|
grid = []
|
||||||
|
for a in cand:
|
||||||
|
for d in (-0.6, -0.4, -0.2, 0.0, 0.2, 0.4, 0.6):
|
||||||
|
g = a + d
|
||||||
|
if all(abs(g - x) > 0.05 for x in grid):
|
||||||
|
grid.append(g)
|
||||||
|
|
||||||
|
scored = [(a, preview_score(img, -a)) for a in grid]
|
||||||
|
best_angle, best_cost = min(scored, key=lambda t: t[1])
|
||||||
|
|
||||||
|
# Debug print kept as a comment
|
||||||
|
# print(f"[smart] hough={a_h:.3f}°(n={n_h}) pairs={a_p:.3f}°(n={n_p}) tried={', '.join(f'{a:+.2f}°' for a,_ in scored)} → chosen {best_angle:+.2f}° (cost={best_cost:.3f})")
|
||||||
|
|
||||||
|
# Rotate in-memory. Save only if DEBUG.
|
||||||
|
rotated = rotate_image_keep_bounds(img, -best_angle, border_value=255)
|
||||||
|
if DEBUG and out_path:
|
||||||
|
cv2.imwrite(out_path, rotated)
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"angle_deg": float(best_angle),
|
||||||
|
"hough_lines": int(n_h),
|
||||||
|
"pair_samples": int(n_p),
|
||||||
|
"out_path": out_path if DEBUG else None,
|
||||||
|
"pre_txt": None,
|
||||||
|
"post_txt": None,
|
||||||
|
"pre_lines": [],
|
||||||
|
"post_lines": [],
|
||||||
|
"words": words,
|
||||||
|
"full_text": full_text,
|
||||||
|
}
|
||||||
|
|
||||||
|
if words:
|
||||||
|
pre_lines = cluster_tilted_lines(words)
|
||||||
|
result["pre_lines"] = pre_lines
|
||||||
|
result["pre_txt"] = write_lines_txt(image_path, "lines_pre", pre_lines) # only if DEBUG
|
||||||
|
|
||||||
|
rot_words = transform_words(words, img.shape[:2], -best_angle)
|
||||||
|
post_lines = group_horizontal_lines(rot_words)
|
||||||
|
result["post_lines"] = post_lines
|
||||||
|
result["post_txt"] = write_lines_txt(image_path, "lines_post", post_lines) # only if DEBUG
|
||||||
|
|
||||||
|
# More debug prints kept as comments
|
||||||
|
# def preview(lines, tag):
|
||||||
|
# print(f" {tag} ({len(lines)} lines)")
|
||||||
|
# for L in lines[:5]:
|
||||||
|
# ang = slope_to_deg(L["slope"])
|
||||||
|
# print(f" [{L['count']:>3} w] slope={ang:+.3f}° | {L['text'][:90]}")
|
||||||
|
# preview(pre_lines, "pre (slope-aware)")
|
||||||
|
# preview(post_lines, "post (horizontal)")
|
||||||
|
# if DEBUG:
|
||||||
|
# print(f" → wrote: {result['pre_txt']} and {result['post_txt']}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Multi-client extraction from post lines (robust)
|
||||||
|
# ============================================================
|
||||||
|
MEMBER_RE = re.compile(r'\bMEMBER NAME\s*:\s*(.+)', re.IGNORECASE)
|
||||||
|
MEMBERID_RE = re.compile(r'\bMEMBER ID\s*:\s*([A-Za-z0-9]+)', re.IGNORECASE)
|
||||||
|
ICN_LINE_RE = re.compile(r'^\s*\d{12,}\b')
|
||||||
|
|
||||||
|
AMOUNT_RE = re.compile(r'(\d{1,3}(?:,\d{3})*\.\d{2})') # decimals only
|
||||||
|
DATE6_RE = re.compile(r'\b\d{6}\b')
|
||||||
|
PD_ROW_RE = re.compile(r'\bPD\s+(D?\d{4})\b', re.IGNORECASE)
|
||||||
|
TOOTH_RE = re.compile(r'^(?:[1-9]|[12][0-9]|3[0-2]|[A-Ta-t])$')
|
||||||
|
SURFACE_RE = re.compile(r'^[MDBOILFP]{1,4}$', re.IGNORECASE)
|
||||||
|
|
||||||
|
def _to_float(s: str) -> float:
|
||||||
|
try:
|
||||||
|
return float(s.replace(',', ''))
|
||||||
|
except Exception:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
def _parse_pd_line(t: str) -> Optional[Tuple[str, Optional[float], Optional[float], Optional[float], Optional[str], Optional[str], Optional[str]]]:
|
||||||
|
"""
|
||||||
|
Parse a single PD line.
|
||||||
|
Returns: (CDT, billed, allowed, paid, date6, tooth, surface)
|
||||||
|
"""
|
||||||
|
m = PD_ROW_RE.search(t)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
|
||||||
|
code = m.group(1)
|
||||||
|
code = code if code.upper().startswith('D') else f'D{code}'
|
||||||
|
|
||||||
|
amts = [_to_float(x) for x in AMOUNT_RE.findall(t)]
|
||||||
|
billed = allowed = paid = None
|
||||||
|
if len(amts) >= 3:
|
||||||
|
billed, allowed, paid = amts[-3:]
|
||||||
|
|
||||||
|
d = None
|
||||||
|
md = DATE6_RE.search(t)
|
||||||
|
if md:
|
||||||
|
d = md.group(0)
|
||||||
|
|
||||||
|
tooth = None
|
||||||
|
surface = None
|
||||||
|
|
||||||
|
tokens = t.split()
|
||||||
|
try:
|
||||||
|
code_idx = tokens.index(code)
|
||||||
|
except ValueError:
|
||||||
|
code_idx = None
|
||||||
|
for i, tok in enumerate(tokens):
|
||||||
|
if PD_ROW_RE.match(f'PD {tok}'):
|
||||||
|
code_idx = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if code_idx is not None:
|
||||||
|
date_idx = None
|
||||||
|
for i in range(code_idx + 1, len(tokens)):
|
||||||
|
if DATE6_RE.fullmatch(tokens[i]):
|
||||||
|
date_idx = i
|
||||||
|
break
|
||||||
|
|
||||||
|
window = tokens[code_idx + 1: date_idx if date_idx is not None else len(tokens)]
|
||||||
|
|
||||||
|
for tok in window:
|
||||||
|
if TOOTH_RE.fullmatch(tok):
|
||||||
|
tooth = tok.upper()
|
||||||
|
break
|
||||||
|
|
||||||
|
start_j = 0
|
||||||
|
if tooth is not None:
|
||||||
|
for j, tok in enumerate(window):
|
||||||
|
if tok.upper() == tooth:
|
||||||
|
start_j = j + 1
|
||||||
|
break
|
||||||
|
for tok in window[start_j:]:
|
||||||
|
if SURFACE_RE.fullmatch(tok):
|
||||||
|
surface = tok.upper()
|
||||||
|
break
|
||||||
|
|
||||||
|
return code, billed, allowed, paid, d, tooth, surface
|
||||||
|
|
||||||
|
def extract_all_clients_from_lines(post_lines: List[dict]) -> List[dict]:
|
||||||
|
"""
|
||||||
|
Split strictly by MEMBER NAME lines; ignore anything before the first name.
|
||||||
|
For each member block, look up ICN from the nearest line above the member header.
|
||||||
|
Parse each PD line for CDT, Date SVC, Billed, Allowed, Paid (decimals only).
|
||||||
|
"""
|
||||||
|
texts = [L["text"] for L in post_lines]
|
||||||
|
starts = [i for i,t in enumerate(texts) if MEMBER_RE.search(t)]
|
||||||
|
if not starts:
|
||||||
|
return []
|
||||||
|
|
||||||
|
out_rows = []
|
||||||
|
|
||||||
|
for si, start in enumerate(starts):
|
||||||
|
end = starts[si+1] if si+1 < len(starts) else len(texts)
|
||||||
|
|
||||||
|
# header line with MEMBER NAME
|
||||||
|
name_line = texts[start]
|
||||||
|
raw_name = MEMBER_RE.search(name_line).group(1).strip()
|
||||||
|
# Stop at "MEMBER ID" (case-insensitive) and other headers
|
||||||
|
cut_points = ["MEMBER ID", "OTH INS CD", "PA:", "DIAG:"]
|
||||||
|
mname = raw_name
|
||||||
|
for cp in cut_points:
|
||||||
|
idx = mname.upper().find(cp)
|
||||||
|
if idx != -1:
|
||||||
|
mname = mname[:idx].strip()
|
||||||
|
# Debug
|
||||||
|
# print(raw_name); print(mname)
|
||||||
|
|
||||||
|
# member id: search within the block
|
||||||
|
mid = ""
|
||||||
|
for t in texts[start:end]:
|
||||||
|
m = MEMBERID_RE.search(t)
|
||||||
|
if m:
|
||||||
|
mid = m.group(1).strip()
|
||||||
|
break
|
||||||
|
|
||||||
|
# ICN: search a few lines ABOVE the member header
|
||||||
|
icn = ""
|
||||||
|
for k in range(start-1, max(-1, start-6), -1):
|
||||||
|
if k < 0: break
|
||||||
|
mm = ICN_LINE_RE.match(texts[k])
|
||||||
|
if mm:
|
||||||
|
icn = mm.group(0)
|
||||||
|
break
|
||||||
|
|
||||||
|
# PD lines in the block
|
||||||
|
had_pd = False
|
||||||
|
for t in texts[start:end]:
|
||||||
|
if " PD " not in f" {t} ":
|
||||||
|
continue
|
||||||
|
parsed = _parse_pd_line(t)
|
||||||
|
if not parsed:
|
||||||
|
continue
|
||||||
|
had_pd = True
|
||||||
|
code, billed, allowed, paid, dsvc, tooth, surface = parsed
|
||||||
|
out_rows.append({
|
||||||
|
'Patient Name': mname.title() if mname else "",
|
||||||
|
'Patient ID': mid,
|
||||||
|
'ICN': icn,
|
||||||
|
'CDT Code': code,
|
||||||
|
'Tooth': tooth if tooth else "",
|
||||||
|
#'Surface': surface if surface else "",
|
||||||
|
'Date SVC': dsvc if dsvc else "",
|
||||||
|
'Billed Amount': billed if billed is not None else "",
|
||||||
|
'Allowed Amount': allowed if allowed is not None else "",
|
||||||
|
'Paid Amount': paid if paid is not None else "",
|
||||||
|
'Extraction Success': True,
|
||||||
|
})
|
||||||
|
|
||||||
|
if not had_pd:
|
||||||
|
out_rows.append({
|
||||||
|
'Patient Name': mname.title() if mname else "",
|
||||||
|
'Patient ID': mid,
|
||||||
|
'ICN': icn,
|
||||||
|
'CDT Code': "",
|
||||||
|
'Tooth': "",
|
||||||
|
#'Surface': "",
|
||||||
|
'Date SVC': "",
|
||||||
|
'Billed Amount': "",
|
||||||
|
'Allowed Amount': "",
|
||||||
|
'Paid Amount': "",
|
||||||
|
'Extraction Success': bool(mname or mid),
|
||||||
|
})
|
||||||
|
|
||||||
|
return out_rows
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# ExcelGenerator
|
||||||
|
# ============================================================
|
||||||
|
class ExcelGenerator:
|
||||||
|
def __init__(self):
|
||||||
|
self.header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
|
||||||
|
self.header_font = Font(color="FFFFFF", bold=True)
|
||||||
|
self.border = Border(
|
||||||
|
left=Side(style='thin'),
|
||||||
|
right=Side(style='thin'),
|
||||||
|
top=Side(style='thin'),
|
||||||
|
bottom=Side(style='thin')
|
||||||
|
)
|
||||||
|
self.center_alignment = Alignment(horizontal='center', vertical='center')
|
||||||
|
|
||||||
|
def create_excel_file(self, df: pd.DataFrame) -> bytes:
|
||||||
|
wb = Workbook()
|
||||||
|
ws = wb.active
|
||||||
|
ws.title = "Medical Billing Extract"
|
||||||
|
ws['A1'] = f"Medical Billing OCR Extract - Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
||||||
|
ws.merge_cells('A1:H1')
|
||||||
|
ws['A1'].font = Font(size=14, bold=True)
|
||||||
|
ws['A1'].alignment = self.center_alignment
|
||||||
|
ws.append([])
|
||||||
|
|
||||||
|
excel_df = self.prepare_dataframe_for_excel(df)
|
||||||
|
for r in dataframe_to_rows(excel_df, index=False, header=True):
|
||||||
|
ws.append(r)
|
||||||
|
|
||||||
|
self.format_worksheet(ws, len(excel_df) + 3)
|
||||||
|
self.add_summary_sheet(wb, excel_df)
|
||||||
|
|
||||||
|
output = io.BytesIO()
|
||||||
|
wb.save(output)
|
||||||
|
output.seek(0)
|
||||||
|
return output.getvalue()
|
||||||
|
|
||||||
|
def prepare_dataframe_for_excel(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
excel_df = df.copy()
|
||||||
|
column_order = [
|
||||||
|
'Patient Name', 'Patient ID', 'ICN', 'CDT Code', 'Tooth', 'Date SVC', #'Surface',
|
||||||
|
'Billed Amount', 'Allowed Amount', 'Paid Amount',
|
||||||
|
'Extraction Success', 'Source File'
|
||||||
|
]
|
||||||
|
existing = [c for c in column_order if c in excel_df.columns]
|
||||||
|
excel_df = excel_df[existing]
|
||||||
|
for amount_col in ['Billed Amount', 'Allowed Amount', 'Paid Amount']:
|
||||||
|
if amount_col in excel_df.columns:
|
||||||
|
excel_df[amount_col] = excel_df[amount_col].apply(self.format_currency)
|
||||||
|
if 'Extraction Success' in excel_df.columns:
|
||||||
|
excel_df['Extraction Success'] = excel_df['Extraction Success'].apply(lambda x: 'Yes' if x else 'No')
|
||||||
|
return excel_df
|
||||||
|
|
||||||
|
def format_currency(self, value):
|
||||||
|
if pd.isna(value) or value == "":
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
if isinstance(value, str):
|
||||||
|
clean_value = value.replace('$', '').replace(',', '')
|
||||||
|
value = float(clean_value)
|
||||||
|
return f"${value:,.2f}"
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
def format_worksheet(self, ws, data_rows):
|
||||||
|
header_row = 3
|
||||||
|
for cell in ws[header_row]:
|
||||||
|
if cell.value:
|
||||||
|
cell.fill = self.header_fill
|
||||||
|
cell.font = self.header_font
|
||||||
|
cell.alignment = self.center_alignment
|
||||||
|
cell.border = self.border
|
||||||
|
for row in range(header_row + 1, data_rows + 1):
|
||||||
|
for cell in ws[row]:
|
||||||
|
cell.border = self.border
|
||||||
|
cell.alignment = Alignment(horizontal='left', vertical='center')
|
||||||
|
self.auto_adjust_columns(ws)
|
||||||
|
self.add_conditional_formatting(ws, header_row, data_rows)
|
||||||
|
|
||||||
|
def auto_adjust_columns(self, ws):
|
||||||
|
max_col = ws.max_column
|
||||||
|
max_row = ws.max_row
|
||||||
|
for col_idx in range(1, max_col + 1):
|
||||||
|
max_len = 0
|
||||||
|
for row in range(1, max_row + 1):
|
||||||
|
cell = ws.cell(row=row, column=col_idx)
|
||||||
|
if isinstance(cell, MergedCell):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
val = cell.value
|
||||||
|
if val is None:
|
||||||
|
continue
|
||||||
|
max_len = max(max_len, len(str(val)))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
letter = get_column_letter(col_idx)
|
||||||
|
ws.column_dimensions[letter].width = min(max_len + 2, 50)
|
||||||
|
|
||||||
|
def add_conditional_formatting(self, ws, header_row, data_rows):
|
||||||
|
success_col = None
|
||||||
|
for col, cell in enumerate(ws[header_row], 1):
|
||||||
|
if cell.value == 'Extraction Success':
|
||||||
|
success_col = col
|
||||||
|
break
|
||||||
|
if success_col:
|
||||||
|
for row in range(header_row + 1, data_rows + 1):
|
||||||
|
cell = ws.cell(row=row, column=success_col)
|
||||||
|
if cell.value == 'Yes':
|
||||||
|
cell.fill = PatternFill(start_color="90EE90", end_color="90EE90", fill_type="solid")
|
||||||
|
elif cell.value == 'No':
|
||||||
|
cell.fill = PatternFill(start_color="FFB6C1", end_color="FFB6C1", fill_type="solid")
|
||||||
|
|
||||||
|
def add_summary_sheet(self, wb, df):
|
||||||
|
ws = wb.create_sheet(title="Summary")
|
||||||
|
ws['A1'] = "Extraction Summary"
|
||||||
|
ws['A1'].font = Font(size=16, bold=True)
|
||||||
|
ws.merge_cells('A1:B1')
|
||||||
|
row = 3
|
||||||
|
stats = [
|
||||||
|
("Total Rows", len(df)),
|
||||||
|
("Successful", len(df[df['Extraction Success'] == 'Yes']) if 'Extraction Success' in df.columns else 0),
|
||||||
|
("Failed", len(df[df['Extraction Success'] == 'No']) if 'Extraction Success' in df.columns else 0),
|
||||||
|
]
|
||||||
|
for name, val in stats:
|
||||||
|
ws[f'A{row}'] = name
|
||||||
|
ws[f'B{row}'] = val
|
||||||
|
ws[f'A{row}'].font = Font(bold=True)
|
||||||
|
row += 1
|
||||||
|
ExcelGenerator().auto_adjust_columns(ws)
|
||||||
|
row += 2
|
||||||
|
ws[f'A{row}'] = f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
||||||
|
ws[f'A{row}'].font = Font(italic=True)
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Runner: glue everything together
|
||||||
|
# ============================================================
|
||||||
|
def process_images_to_excel(files: List[str], out_excel: str, deskewed_only: bool=False) -> None:
|
||||||
|
excel_gen = ExcelGenerator()
|
||||||
|
records: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
for src in files:
|
||||||
|
try:
|
||||||
|
if deskewed_only:
|
||||||
|
img = cv2.imread(src, cv2.IMREAD_COLOR)
|
||||||
|
if img is None:
|
||||||
|
raise FileNotFoundError(src)
|
||||||
|
words, _ = extract_words_and_text(src)
|
||||||
|
rot_words = []
|
||||||
|
for w in words:
|
||||||
|
ww = dict(w)
|
||||||
|
ww["cx_rot"], ww["cy_rot"] = w["cx"], w["cy"]
|
||||||
|
rot_words.append(ww)
|
||||||
|
post_lines = group_horizontal_lines(rot_words)
|
||||||
|
|
||||||
|
post_txt = write_lines_txt(src, "lines_post", post_lines) # only if DEBUG
|
||||||
|
|
||||||
|
rows = extract_all_clients_from_lines(post_lines)
|
||||||
|
for r in rows:
|
||||||
|
r["Source File"] = os.path.basename(src)
|
||||||
|
records.append(r)
|
||||||
|
# if DEBUG: print(f"{src} → parsed {len(rows)} PD rows (wrote {post_txt})")
|
||||||
|
|
||||||
|
else:
|
||||||
|
base, ext = os.path.splitext(src)
|
||||||
|
dst = f"{base}_deskewed{ext if ext else '.jpg'}" if DEBUG else None
|
||||||
|
info = smart_deskew_with_lines(src, dst, clamp_deg=30.0, use_vision=True)
|
||||||
|
post_lines = info.get("post_lines", []) if info else []
|
||||||
|
rows = extract_all_clients_from_lines(post_lines) if post_lines else []
|
||||||
|
for r in rows:
|
||||||
|
r["Source File"] = os.path.basename(src)
|
||||||
|
records.append(r)
|
||||||
|
# if DEBUG: print(f"{src} → rotated by {-info['angle_deg']:.3f}° → {dst}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# if DEBUG: print(f"{src}: {e}")
|
||||||
|
records.append({
|
||||||
|
'Patient Name': "", 'Patient ID': "", 'ICN': "", 'CDT Code': "",
|
||||||
|
'Date SVC': "", 'Billed Amount': "", 'Allowed Amount': "", 'Paid Amount': "",
|
||||||
|
'Extraction Success': False, 'Source File': os.path.basename(src),
|
||||||
|
})
|
||||||
|
|
||||||
|
df = pd.DataFrame.from_records(records)
|
||||||
|
data = excel_gen.create_excel_file(df)
|
||||||
|
with open(out_excel, "wb") as f:
|
||||||
|
f.write(data)
|
||||||
|
# if DEBUG:
|
||||||
|
# print(f"\n✅ Wrote Excel → {out_excel}")
|
||||||
|
# print(" (and per-image: *_lines_pre.txt, *_lines_post.txt, *_deskewed.* when DEBUG=True)")
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# CLI
|
||||||
|
# ============================================================
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--input", help="Folder of images (jpg/png/tif).", default=None)
|
||||||
|
ap.add_argument("--files", nargs="*", help="Specific image files.", default=None)
|
||||||
|
ap.add_argument("--out", help="Output Excel path.", required=True)
|
||||||
|
ap.add_argument("--deskewed-only", action="store_true",
|
||||||
|
help="Only process files whose name contains '_deskewed'; skip deskew step.")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
paths: List[str] = []
|
||||||
|
if args.files:
|
||||||
|
for f in args.files:
|
||||||
|
if os.path.isfile(f):
|
||||||
|
paths.append(f)
|
||||||
|
if args.input and os.path.isdir(args.input):
|
||||||
|
for ext in ("*.jpg","*.jpeg","*.png","*.tif","*.tiff","*.bmp"):
|
||||||
|
paths.extend(glob.glob(os.path.join(args.input, ext)))
|
||||||
|
|
||||||
|
if args.deskewed_only:
|
||||||
|
paths = [p for p in paths if "_deskewed" in os.path.basename(p).lower()]
|
||||||
|
|
||||||
|
if not paths:
|
||||||
|
raise SystemExit("No input images found. Use --files or --input (and --deskewed-only if desired).")
|
||||||
|
|
||||||
|
if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
|
||||||
|
# print("WARNING: GOOGLE_APPLICATION_CREDENTIALS not set. Set it to your local service account JSON path.")
|
||||||
|
pass
|
||||||
|
|
||||||
|
process_images_to_excel(paths, args.out, deskewed_only=args.deskewed_only)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
8
apps/PaymentOCRService/package.json
Normal file
8
apps/PaymentOCRService/package.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"name": "pdfservice",
|
||||||
|
"private": true,
|
||||||
|
"scripts": {
|
||||||
|
"postinstall": "pip install -r requirements.txt",
|
||||||
|
"dev": "python main.py"
|
||||||
|
}
|
||||||
|
}
|
||||||
10
apps/PaymentOCRService/requirements.txt
Normal file
10
apps/PaymentOCRService/requirements.txt
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
fastapi
|
||||||
|
uvicorn[standard]
|
||||||
|
google-cloud-vision
|
||||||
|
opencv-python-headless
|
||||||
|
pytesseract
|
||||||
|
pillow
|
||||||
|
pandas
|
||||||
|
openpyxl
|
||||||
|
numpy
|
||||||
|
python-multipart
|
||||||
BIN
apps/ProcedureCodeFromMhPdf/MH.pdf
Normal file
BIN
apps/ProcedureCodeFromMhPdf/MH.pdf
Normal file
Binary file not shown.
5
apps/ProcedureCodeFromMhPdf/Readme.md
Normal file
5
apps/ProcedureCodeFromMhPdf/Readme.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
This code was written only while extracting procedure code data from Mass Health pdf, to make process easy.
|
||||||
|
|
||||||
|
Only was a one time process, not used as core functionality in this whole app.
|
||||||
|
|
||||||
|
Keeping it as in future might need to extract again.
|
||||||
96
apps/ProcedureCodeFromMhPdf/compareJson.py
Normal file
96
apps/ProcedureCodeFromMhPdf/compareJson.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Compare a main dental JSON file with one or more other JSON files and
|
||||||
|
return all records whose 'Procedure Code' is NOT present in the main file.
|
||||||
|
|
||||||
|
- Matching key: 'Procedure Code' (case-insensitive, trimmed).
|
||||||
|
- Keeps the full record from the other files (including extra fields like 'Full Price').
|
||||||
|
- Deduplicates by Procedure Code across the collected "missing" results.
|
||||||
|
|
||||||
|
CONFIG: set MAIN_PATH, OTHER_PATHS, OUT_PATH below.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# CONFIG — EDIT THESE ONLY
|
||||||
|
# =========================
|
||||||
|
MAIN_PATH = "procedureCodesMain.json" # your main JSON (with PriceLTEQ21/PriceGT21)
|
||||||
|
OTHER_PATHS = [
|
||||||
|
"procedureCodesOld.json", # one or more other JSON files to compare against the main
|
||||||
|
# "other2.json",
|
||||||
|
]
|
||||||
|
OUT_PATH = "not_in_main.json" # where to write the results
|
||||||
|
# =========================
|
||||||
|
|
||||||
|
|
||||||
|
def _load_json_any(path: str) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Load JSON. Accept:
|
||||||
|
- a list of objects
|
||||||
|
- a single object (wraps into a list)
|
||||||
|
"""
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
if isinstance(data, dict):
|
||||||
|
return [data]
|
||||||
|
if isinstance(data, list):
|
||||||
|
# filter out non-dict items defensively
|
||||||
|
return [x for x in data if isinstance(x, dict)]
|
||||||
|
raise ValueError(f"Unsupported JSON top-level type in {path}: {type(data)}")
|
||||||
|
|
||||||
|
|
||||||
|
def _norm_code(record: Dict[str, Any]) -> str:
|
||||||
|
# Normalize the 'Procedure Code' for matching
|
||||||
|
code = str(record.get("Procedure Code", "")).strip().upper()
|
||||||
|
# Some PDFs might have stray spaces, tabs, or zero-width chars
|
||||||
|
code = "".join(ch for ch in code if not ch.isspace())
|
||||||
|
return code
|
||||||
|
|
||||||
|
|
||||||
|
def collect_main_codes(main_path: str) -> set:
|
||||||
|
main_items = _load_json_any(main_path)
|
||||||
|
codes = {_norm_code(rec) for rec in main_items if _norm_code(rec)}
|
||||||
|
return codes
|
||||||
|
|
||||||
|
|
||||||
|
def collect_missing_records(other_paths: List[str], main_codes: set) -> List[Dict[str, Any]]:
|
||||||
|
missing: Dict[str, Dict[str, Any]] = {} # map normalized code -> record
|
||||||
|
for p in other_paths:
|
||||||
|
items = _load_json_any(p)
|
||||||
|
for rec in items:
|
||||||
|
code_norm = _norm_code(rec)
|
||||||
|
if not code_norm:
|
||||||
|
continue
|
||||||
|
if code_norm not in main_codes and code_norm not in missing:
|
||||||
|
# Keep the full original record
|
||||||
|
missing[code_norm] = rec
|
||||||
|
# return in a stable, sorted order by code
|
||||||
|
return [missing[k] for k in sorted(missing.keys())]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Validate files exist
|
||||||
|
if not Path(MAIN_PATH).exists():
|
||||||
|
raise FileNotFoundError(f"Main file not found: {MAIN_PATH}")
|
||||||
|
for p in OTHER_PATHS:
|
||||||
|
if not Path(p).exists():
|
||||||
|
raise FileNotFoundError(f"Other file not found: {p}")
|
||||||
|
|
||||||
|
main_codes = collect_main_codes(MAIN_PATH)
|
||||||
|
missing_records = collect_missing_records(OTHER_PATHS, main_codes)
|
||||||
|
|
||||||
|
with open(OUT_PATH, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(missing_records, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
print(f"Main codes: {len(main_codes)}")
|
||||||
|
print(f"Missing from main: {len(missing_records)}")
|
||||||
|
print(f"Wrote results to {OUT_PATH}")
|
||||||
|
# Also echo to stdout
|
||||||
|
print(json.dumps(missing_records, ensure_ascii=False, indent=2))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
183
apps/ProcedureCodeFromMhPdf/extract_bypage.py
Normal file
183
apps/ProcedureCodeFromMhPdf/extract_bypage.py
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
from typing import List, Dict
|
||||||
|
import fitz # PyMuPDF
|
||||||
|
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# CONFIG — EDIT THESE ONLY
|
||||||
|
# =========================
|
||||||
|
PDF_PATH = "MH.pdf" # path to your PDF
|
||||||
|
PAGES = [2] # 0-based page indexes to parse, e.g., [2] for the page you showed
|
||||||
|
OUT_PATH = "output.json" # where to write JSON
|
||||||
|
FIRST_PRICE_IS_LTE21 = True # True => first price line is <=21; False => first price is >21
|
||||||
|
PRINT_PAGE_TEXT = False # set True if you want to print the raw page text for sanity check
|
||||||
|
# =========================
|
||||||
|
|
||||||
|
|
||||||
|
# --- patterns ---
|
||||||
|
code_line_re = re.compile(r"^\s*(D\d{4})\s*$")
|
||||||
|
# a price token is either '$123', '$1,234.50', '123', '123.45', or 'NC'
|
||||||
|
price_line_re = re.compile(r"^\s*(?:\$\s*)?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\d+(?:\.\d{2})?|NC)\s*$", re.IGNORECASE)
|
||||||
|
# lines that definitely start a notes block we should ignore once prices are done
|
||||||
|
note_starters_re = re.compile(r"^(Teeth\b|One of\b|--|—|–|Age limitation:|CR\b)", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_ws(s: str) -> str:
|
||||||
|
s = s.replace("\u00a0", " ")
|
||||||
|
s = re.sub(r"[ \t]+", " ", s)
|
||||||
|
s = re.sub(r"\s*\n\s*", " ", s)
|
||||||
|
s = re.sub(r"\s{2,}", " ", s)
|
||||||
|
return s.strip(" ,.;:-•·\n\t")
|
||||||
|
|
||||||
|
|
||||||
|
def clean_money(token: str) -> str:
|
||||||
|
if token.upper() == "NC":
|
||||||
|
return "NC"
|
||||||
|
return token.replace(",", "").lstrip("$").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def get_page_lines(pdf_path: str, pages: List[int]) -> List[str]:
|
||||||
|
doc = fitz.open(pdf_path)
|
||||||
|
try:
|
||||||
|
max_idx = len(doc) - 1
|
||||||
|
for p in pages:
|
||||||
|
if p < 0 or p > max_idx:
|
||||||
|
raise ValueError(f"Invalid page index {p}. Valid range is 0..{max_idx}.")
|
||||||
|
lines: List[str] = []
|
||||||
|
for p in pages:
|
||||||
|
text = doc.load_page(p).get_text("text") or ""
|
||||||
|
if PRINT_PAGE_TEXT:
|
||||||
|
print(f"\n--- RAW PAGE {p} ---\n{text}")
|
||||||
|
# keep line boundaries; later we parse line-by-line
|
||||||
|
lines.extend(text.splitlines())
|
||||||
|
return lines
|
||||||
|
finally:
|
||||||
|
doc.close()
|
||||||
|
|
||||||
|
|
||||||
|
def extract_records(lines: List[str]) -> List[Dict[str, str]]:
|
||||||
|
out: List[Dict[str, str]] = []
|
||||||
|
i = 0
|
||||||
|
n = len(lines)
|
||||||
|
|
||||||
|
while i < n:
|
||||||
|
line = lines[i].strip()
|
||||||
|
|
||||||
|
# seek a code line
|
||||||
|
mcode = code_line_re.match(line)
|
||||||
|
if not mcode:
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
code = mcode.group(1)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# gather description lines until we encounter price lines
|
||||||
|
desc_lines: List[str] = []
|
||||||
|
# skip blank lines before description
|
||||||
|
while i < n and not lines[i].strip():
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# collect description lines (usually 1–3) until first price token
|
||||||
|
# stop also if we accidentally hit another code (defensive)
|
||||||
|
j = i
|
||||||
|
while j < n:
|
||||||
|
s = lines[j].strip()
|
||||||
|
if not s:
|
||||||
|
# blank line inside description — consider description ended if the next is a price
|
||||||
|
# but we don't advance here; break and let price parsing handle it
|
||||||
|
break
|
||||||
|
if code_line_re.match(s):
|
||||||
|
# next code — no prices found; abandon this broken record
|
||||||
|
break
|
||||||
|
if price_line_re.match(s):
|
||||||
|
# reached price section
|
||||||
|
break
|
||||||
|
if note_starters_re.match(s):
|
||||||
|
# encountered a note before price — treat as end of description; prices may be missing
|
||||||
|
break
|
||||||
|
desc_lines.append(s)
|
||||||
|
j += 1
|
||||||
|
|
||||||
|
# advance i to where we left off
|
||||||
|
i = j
|
||||||
|
|
||||||
|
description = normalize_ws(" ".join(desc_lines))
|
||||||
|
|
||||||
|
# collect up to two price tokens
|
||||||
|
prices: List[str] = []
|
||||||
|
while i < n and len(prices) < 2:
|
||||||
|
s = lines[i].strip()
|
||||||
|
if not s:
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
if code_line_re.match(s):
|
||||||
|
# new record — stop; this means we never got prices (malformed)
|
||||||
|
break
|
||||||
|
mprice = price_line_re.match(s)
|
||||||
|
if mprice:
|
||||||
|
prices.append(clean_money(mprice.group(1)))
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
# if we encounter a note/flags block, skip forward until the next code/blank
|
||||||
|
if note_starters_re.match(s) or s in {"Y", "NC"}:
|
||||||
|
# skip this block quickly
|
||||||
|
i += 1
|
||||||
|
# keep skipping subsequent non-empty, non-code lines until a blank or next code
|
||||||
|
while i < n:
|
||||||
|
t = lines[i].strip()
|
||||||
|
if not t or code_line_re.match(t):
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
# now let the outer loop proceed
|
||||||
|
continue
|
||||||
|
# unrecognized line: if prices already found, we can break; else skip
|
||||||
|
if prices:
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if len(prices) < 2:
|
||||||
|
# couldn't find 2 prices reliably; skip this record
|
||||||
|
continue
|
||||||
|
|
||||||
|
if FIRST_PRICE_IS_LTE21:
|
||||||
|
price_lte21, price_gt21 = prices[0], prices[1]
|
||||||
|
else:
|
||||||
|
price_lte21, price_gt21 = prices[1], prices[0]
|
||||||
|
|
||||||
|
out.append(
|
||||||
|
{
|
||||||
|
"Procedure Code": code,
|
||||||
|
"Description": description,
|
||||||
|
"PriceLTEQ21": price_lte21,
|
||||||
|
"PriceGT21": price_gt21,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# after prices, skip forward until next code or blank block end
|
||||||
|
while i < n:
|
||||||
|
s = lines[i].strip()
|
||||||
|
if not s:
|
||||||
|
i += 1
|
||||||
|
break
|
||||||
|
if code_line_re.match(s):
|
||||||
|
# next record will pick this up
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def extract_pdf_to_json(pdf_path: str, pages: List[int], out_path: str) -> List[Dict[str, str]]:
|
||||||
|
lines = get_page_lines(pdf_path, pages)
|
||||||
|
data = extract_records(lines)
|
||||||
|
with open(out_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
data = extract_pdf_to_json(PDF_PATH, PAGES, OUT_PATH)
|
||||||
|
print(f"Wrote {len(data)} rows to {OUT_PATH}")
|
||||||
|
print(json.dumps(data, ensure_ascii=False, indent=2))
|
||||||
208
apps/ProcedureCodeFromMhPdf/extract_byrange.py
Normal file
208
apps/ProcedureCodeFromMhPdf/extract_byrange.py
Normal file
@@ -0,0 +1,208 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
MassHealth dental PDF parser (PyMuPDF / fitz) — PAGE RANGE VERSION
|
||||||
|
|
||||||
|
Parses rows like:
|
||||||
|
|
||||||
|
D2160
|
||||||
|
Amalgam-three surfaces,
|
||||||
|
primary or permanent
|
||||||
|
$110
|
||||||
|
$92
|
||||||
|
Y
|
||||||
|
Y
|
||||||
|
...
|
||||||
|
|
||||||
|
Outputs a single JSON with records from the chosen page range (inclusive).
|
||||||
|
|
||||||
|
Config:
|
||||||
|
- PDF_PATH: path to the PDF
|
||||||
|
- PAGE_START, PAGE_END: 1-based page numbers (inclusive)
|
||||||
|
- FIRST_PRICE_IS_LTE21: True => first price line is <=21; False => first price is >21
|
||||||
|
- OUT_PATH: output JSON path
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
from typing import List, Dict
|
||||||
|
import fitz # PyMuPDF
|
||||||
|
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# CONFIG — EDIT THESE ONLY
|
||||||
|
# =========================
|
||||||
|
PDF_PATH = "MH.pdf" # path to your PDF
|
||||||
|
PAGE_START = 1 # 1-based inclusive start page (e.g., 1)
|
||||||
|
PAGE_END = 12 # 1-based inclusive end page (e.g., 5)
|
||||||
|
OUT_PATH = "output.json" # single JSON file containing all parsed rows
|
||||||
|
FIRST_PRICE_IS_LTE21 = True # True => first price line is <=21; False => first price is >21
|
||||||
|
PRINT_PAGE_TEXT = False # set True to print raw text for each page
|
||||||
|
# =========================
|
||||||
|
|
||||||
|
|
||||||
|
# --- patterns ---
|
||||||
|
code_line_re = re.compile(r"^\s*(D\d{4})\s*$")
|
||||||
|
# a price token is either '$123', '$1,234.50', '123', '123.45', or 'NC'
|
||||||
|
price_line_re = re.compile(r"^\s*(?:\$\s*)?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\d+(?:\.\d{2})?|NC)\s*$", re.IGNORECASE)
|
||||||
|
# lines that definitely start a notes block to ignore once prices are done
|
||||||
|
note_starters_re = re.compile(r"^(Teeth\b|One of\b|--|—|–|Age limitation:|CR\b)", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_ws(s: str) -> str:
|
||||||
|
s = s.replace("\u00a0", " ")
|
||||||
|
s = re.sub(r"[ \t]+", " ", s)
|
||||||
|
s = re.sub(r"\s*\n\s*", " ", s)
|
||||||
|
s = re.sub(r"\s{2,}", " ", s)
|
||||||
|
return s.strip(" ,.;:-•·\n\t")
|
||||||
|
|
||||||
|
|
||||||
|
def clean_money(token: str) -> str:
|
||||||
|
if token.upper() == "NC":
|
||||||
|
return "NC"
|
||||||
|
return token.replace(",", "").lstrip("$").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def get_page_lines(pdf_path: str, page_start_1b: int, page_end_1b: int) -> List[str]:
|
||||||
|
if page_start_1b <= 0 or page_end_1b <= 0:
|
||||||
|
raise ValueError("PAGE_START and PAGE_END must be >= 1 (1-based).")
|
||||||
|
if page_start_1b > page_end_1b:
|
||||||
|
raise ValueError("PAGE_START cannot be greater than PAGE_END.")
|
||||||
|
|
||||||
|
doc = fitz.open(pdf_path)
|
||||||
|
try:
|
||||||
|
last_idx_0b = len(doc) - 1
|
||||||
|
# convert to 0-based inclusive range
|
||||||
|
start_0b = page_start_1b - 1
|
||||||
|
end_0b = page_end_1b - 1
|
||||||
|
if start_0b < 0 or end_0b > last_idx_0b:
|
||||||
|
raise ValueError(f"Page range out of bounds. Valid 1-based range is 1..{last_idx_0b + 1}.")
|
||||||
|
lines: List[str] = []
|
||||||
|
for p in range(start_0b, end_0b + 1):
|
||||||
|
text = doc.load_page(p).get_text("text") or ""
|
||||||
|
if PRINT_PAGE_TEXT:
|
||||||
|
print(f"\n--- RAW PAGE {p} (0-based; shown as {p+1} 1-based) ---\n{text}")
|
||||||
|
lines.extend(text.splitlines())
|
||||||
|
return lines
|
||||||
|
finally:
|
||||||
|
doc.close()
|
||||||
|
|
||||||
|
|
||||||
|
def extract_records(lines: List[str]) -> List[Dict[str, str]]:
|
||||||
|
out: List[Dict[str, str]] = []
|
||||||
|
i = 0
|
||||||
|
n = len(lines)
|
||||||
|
|
||||||
|
while i < n:
|
||||||
|
line = lines[i].strip()
|
||||||
|
|
||||||
|
# seek a code line
|
||||||
|
mcode = code_line_re.match(line)
|
||||||
|
if not mcode:
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
code = mcode.group(1)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# gather description lines until we encounter price lines
|
||||||
|
desc_lines: List[str] = []
|
||||||
|
# skip blank lines before description
|
||||||
|
while i < n and not lines[i].strip():
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# collect description lines (usually 1–3) until first price token
|
||||||
|
# stop also if we accidentally hit another code (defensive)
|
||||||
|
j = i
|
||||||
|
while j < n:
|
||||||
|
s = lines[j].strip()
|
||||||
|
if not s:
|
||||||
|
break
|
||||||
|
if code_line_re.match(s):
|
||||||
|
# next code — description ended abruptly (malformed)
|
||||||
|
break
|
||||||
|
if price_line_re.match(s):
|
||||||
|
# reached price section
|
||||||
|
break
|
||||||
|
if note_starters_re.match(s):
|
||||||
|
# encountered a note before price — treat as end of description; prices may be missing
|
||||||
|
break
|
||||||
|
desc_lines.append(s)
|
||||||
|
j += 1
|
||||||
|
|
||||||
|
# advance i to where we left off
|
||||||
|
i = j
|
||||||
|
|
||||||
|
description = normalize_ws(" ".join(desc_lines))
|
||||||
|
|
||||||
|
# collect up to two price tokens
|
||||||
|
prices: List[str] = []
|
||||||
|
while i < n and len(prices) < 2:
|
||||||
|
s = lines[i].strip()
|
||||||
|
if not s:
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
if code_line_re.match(s):
|
||||||
|
# new record — stop; this means we never got prices (malformed)
|
||||||
|
break
|
||||||
|
mprice = price_line_re.match(s)
|
||||||
|
if mprice:
|
||||||
|
prices.append(clean_money(mprice.group(1)))
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
# if we encounter a note/flags block, skip forward until a blank or next code
|
||||||
|
if note_starters_re.match(s) or s in {"Y", "NC"}:
|
||||||
|
i += 1
|
||||||
|
while i < n:
|
||||||
|
t = lines[i].strip()
|
||||||
|
if not t or code_line_re.match(t):
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
# unrecognized line: if we already captured some prices, break; else skip
|
||||||
|
if prices:
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if len(prices) < 2:
|
||||||
|
# couldn't find 2 prices reliably; skip this record
|
||||||
|
continue
|
||||||
|
|
||||||
|
if FIRST_PRICE_IS_LTE21:
|
||||||
|
price_lte21, price_gt21 = prices[0], prices[1]
|
||||||
|
else:
|
||||||
|
price_lte21, price_gt21 = prices[1], prices[0]
|
||||||
|
|
||||||
|
out.append(
|
||||||
|
{
|
||||||
|
"Procedure Code": code,
|
||||||
|
"Description": description,
|
||||||
|
"PriceLTEQ21": price_lte21,
|
||||||
|
"PriceGT21": price_gt21,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# after prices, skip forward until next code or blank block end
|
||||||
|
while i < n:
|
||||||
|
s = lines[i].strip()
|
||||||
|
if not s:
|
||||||
|
i += 1
|
||||||
|
break
|
||||||
|
if code_line_re.match(s):
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def extract_pdf_range_to_json(pdf_path: str, page_start_1b: int, page_end_1b: int, out_path: str) -> List[Dict[str, str]]:
|
||||||
|
lines = get_page_lines(pdf_path, page_start_1b, page_end_1b)
|
||||||
|
data = extract_records(lines)
|
||||||
|
with open(out_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
data = extract_pdf_range_to_json(PDF_PATH, PAGE_START, PAGE_END, OUT_PATH)
|
||||||
|
print(f"Wrote {len(data)} rows to {OUT_PATH}")
|
||||||
|
print(json.dumps(data, ensure_ascii=False, indent=2))
|
||||||
192
apps/ProcedureCodeFromMhPdf/not_in_main.json
Normal file
192
apps/ProcedureCodeFromMhPdf/not_in_main.json
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0120",
|
||||||
|
"Description": "perio exam",
|
||||||
|
"Price": "105"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0140",
|
||||||
|
"Description": "limited exam",
|
||||||
|
"Price": "90"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0150",
|
||||||
|
"Description": "comprehensive exam",
|
||||||
|
"Price": "120"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0210",
|
||||||
|
"Description": "Fmx.",
|
||||||
|
"Price": "120"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0220",
|
||||||
|
"Description": "first PA.",
|
||||||
|
"Price": "60"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0230",
|
||||||
|
"Description": "2nd PA.",
|
||||||
|
"Price": "50"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0272",
|
||||||
|
"Description": "2 BW",
|
||||||
|
"Price": "80"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0274",
|
||||||
|
"Description": "4BW",
|
||||||
|
"Price": "160"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0330",
|
||||||
|
"Description": "pano",
|
||||||
|
"Price": "150"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0364",
|
||||||
|
"Description": "Less than one jaw",
|
||||||
|
"Price": "350"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0365",
|
||||||
|
"Description": "Mand",
|
||||||
|
"Price": "350"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0366",
|
||||||
|
"Description": "Max",
|
||||||
|
"Price": "350"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0367",
|
||||||
|
"Description": "",
|
||||||
|
"Price": "400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0368",
|
||||||
|
"Description": "include TMJ",
|
||||||
|
"Price": "375"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0380",
|
||||||
|
"Description": "Less than one jaw",
|
||||||
|
"Price": "300"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0381",
|
||||||
|
"Description": "Mand",
|
||||||
|
"Price": "300"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0382",
|
||||||
|
"Description": "Max",
|
||||||
|
"Price": "300"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0383",
|
||||||
|
"Description": "",
|
||||||
|
"Price": "350"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D1110",
|
||||||
|
"Description": "adult prophy",
|
||||||
|
"Price": "150"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D1120",
|
||||||
|
"Description": "child prophy",
|
||||||
|
"Price": "120"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D1208",
|
||||||
|
"Description": "FL",
|
||||||
|
"Price": "90"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D1351",
|
||||||
|
"Description": "sealant",
|
||||||
|
"Price": "80"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D1999",
|
||||||
|
"Description": "",
|
||||||
|
"Price": "50"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2140",
|
||||||
|
"Description": "amalgam, one surface",
|
||||||
|
"Price": "150"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2150",
|
||||||
|
"Description": "amalgam, two surface",
|
||||||
|
"Price": "200"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2955",
|
||||||
|
"Description": "post renoval",
|
||||||
|
"Price": "350"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D4910",
|
||||||
|
"Description": "perio maintains",
|
||||||
|
"Price": "250"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D5510",
|
||||||
|
"Description": "Repair broken complete denture base (QUAD)",
|
||||||
|
"Price": "400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6056",
|
||||||
|
"Description": "pre fab abut",
|
||||||
|
"Price": "750"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6057",
|
||||||
|
"Description": "custom abut",
|
||||||
|
"Price": "800"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6058",
|
||||||
|
"Description": "porcelain, implant crown, ceramic crown",
|
||||||
|
"Price": "1400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6059",
|
||||||
|
"Description": "",
|
||||||
|
"Price": "1400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6100",
|
||||||
|
"Description": "",
|
||||||
|
"Price": "320"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6110",
|
||||||
|
"Description": "implant",
|
||||||
|
"Price": "1600"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6242",
|
||||||
|
"Description": "noble metal. For united",
|
||||||
|
"Price": "1400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6245",
|
||||||
|
"Description": "porcelain, not for united",
|
||||||
|
"Price": "1400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D7910",
|
||||||
|
"Description": "suture, small wound up to 5 mm",
|
||||||
|
"Price": "400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D7950",
|
||||||
|
"Description": "max",
|
||||||
|
"Price": "800"
|
||||||
|
}
|
||||||
|
]
|
||||||
1026
apps/ProcedureCodeFromMhPdf/procedureCodes.json
Normal file
1026
apps/ProcedureCodeFromMhPdf/procedureCodes.json
Normal file
File diff suppressed because it is too large
Load Diff
344
apps/ProcedureCodeFromMhPdf/procedureCodesOld.json
Normal file
344
apps/ProcedureCodeFromMhPdf/procedureCodesOld.json
Normal file
@@ -0,0 +1,344 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"Procedure Code": "D1999",
|
||||||
|
"Description": "",
|
||||||
|
"Price": "50"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0120",
|
||||||
|
"Description": "perio exam",
|
||||||
|
"Price": "105"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0140",
|
||||||
|
"Description": "limited exam",
|
||||||
|
"Price": "90"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0150",
|
||||||
|
"Description": "comprehensive exam",
|
||||||
|
"Price": "120"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0210",
|
||||||
|
"Description": "Fmx.",
|
||||||
|
"Price": "120"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0220",
|
||||||
|
"Description": "first PA.",
|
||||||
|
"Price": "60"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0230",
|
||||||
|
"Description": "2nd PA.",
|
||||||
|
"Price": "50"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0330",
|
||||||
|
"Description": "pano",
|
||||||
|
"Price": "150"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0272",
|
||||||
|
"Description": "2 BW",
|
||||||
|
"Price": "80"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0274",
|
||||||
|
"Description": "4BW",
|
||||||
|
"Price": "160"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D1110",
|
||||||
|
"Description": "adult prophy",
|
||||||
|
"Price": "150"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D1120",
|
||||||
|
"Description": "child prophy",
|
||||||
|
"Price": "120"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D1351",
|
||||||
|
"Description": "sealant",
|
||||||
|
"Price": "80"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D4341",
|
||||||
|
"Description": "srp",
|
||||||
|
"Price": "250"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D4910",
|
||||||
|
"Description": "perio maintains",
|
||||||
|
"Price": "250"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D1208",
|
||||||
|
"Description": "FL",
|
||||||
|
"Price": "90"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2330",
|
||||||
|
"Description": "front composite. 1 s.",
|
||||||
|
"Price": "180"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2331",
|
||||||
|
"Description": "2s",
|
||||||
|
"Price": "220"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2332",
|
||||||
|
"Description": "3s",
|
||||||
|
"Price": "280"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2335",
|
||||||
|
"Description": "4s or more",
|
||||||
|
"Price": "350"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2391",
|
||||||
|
"Description": "back. 1s",
|
||||||
|
"Price": "200"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2392",
|
||||||
|
"Description": "2s",
|
||||||
|
"Price": "250"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2393",
|
||||||
|
"Description": "3s",
|
||||||
|
"Price": "280"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2394",
|
||||||
|
"Description": "4s",
|
||||||
|
"Price": "320"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2140",
|
||||||
|
"Description": "amalgam, one surface",
|
||||||
|
"Price": "150"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2150",
|
||||||
|
"Description": "amalgam, two surface",
|
||||||
|
"Price": "200"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2750",
|
||||||
|
"Description": "high noble",
|
||||||
|
"Price": "1300"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2751",
|
||||||
|
"Description": "base metal",
|
||||||
|
"Price": "1200"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2740",
|
||||||
|
"Description": "crown porcelain",
|
||||||
|
"Price": "1300"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2954",
|
||||||
|
"Description": "p/c",
|
||||||
|
"Price": "450"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D7910",
|
||||||
|
"Description": "suture, small wound up to 5 mm",
|
||||||
|
"Price": "400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D5110",
|
||||||
|
"Description": "FU",
|
||||||
|
"Price": "1200",
|
||||||
|
"Full Price": "1700"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D5120",
|
||||||
|
"Description": "FL",
|
||||||
|
"Price": "1700",
|
||||||
|
"Full Price": "1700"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D5211",
|
||||||
|
"Description": "pu",
|
||||||
|
"Price": "1300"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D5212",
|
||||||
|
"Description": "pl",
|
||||||
|
"Price": "1300"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D5213",
|
||||||
|
"Description": "cast pu.",
|
||||||
|
"Price": "1700"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D5214",
|
||||||
|
"Description": "cast pl",
|
||||||
|
"Price": "1700"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D5510",
|
||||||
|
"Description": "Repair broken complete denture base (QUAD)",
|
||||||
|
"Price": "400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D5520",
|
||||||
|
"Description": "Replace missing or broken teeth - complete denture (each tooth) (TOOTH)",
|
||||||
|
"Price": "200"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D5750",
|
||||||
|
"Description": "lab reline",
|
||||||
|
"Price": "600"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D5730",
|
||||||
|
"Description": "chairside reline",
|
||||||
|
"Price": "500"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2920",
|
||||||
|
"Description": "re cement crown",
|
||||||
|
"Price": "120"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2950",
|
||||||
|
"Description": "core buildup",
|
||||||
|
"Price": "350"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D2955",
|
||||||
|
"Description": "post renoval",
|
||||||
|
"Price": "350"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6100",
|
||||||
|
"Description": "",
|
||||||
|
"Price": "320"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6110",
|
||||||
|
"Description": "implant",
|
||||||
|
"Price": "1600"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6056",
|
||||||
|
"Description": "pre fab abut",
|
||||||
|
"Price": "750"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6057",
|
||||||
|
"Description": "custom abut",
|
||||||
|
"Price": "800"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6058",
|
||||||
|
"Description": "porcelain, implant crown, ceramic crown",
|
||||||
|
"Price": "1400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6059",
|
||||||
|
"Description": "",
|
||||||
|
"Price": "1400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6242",
|
||||||
|
"Description": "noble metal. For united",
|
||||||
|
"Price": "1400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D6245",
|
||||||
|
"Description": "porcelain, not for united",
|
||||||
|
"Price": "1400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0367",
|
||||||
|
"Description": "",
|
||||||
|
"Price": "400"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0364",
|
||||||
|
"Description": "Less than one jaw",
|
||||||
|
"Price": "350"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0365",
|
||||||
|
"Description": "Mand",
|
||||||
|
"Price": "350"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0366",
|
||||||
|
"Description": "Max",
|
||||||
|
"Price": "350"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0368",
|
||||||
|
"Description": "include TMJ",
|
||||||
|
"Price": "375"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0383",
|
||||||
|
"Description": "",
|
||||||
|
"Price": "350"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0380",
|
||||||
|
"Description": "Less than one jaw",
|
||||||
|
"Price": "300"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0381",
|
||||||
|
"Description": "Mand",
|
||||||
|
"Price": "300"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D0382",
|
||||||
|
"Description": "Max",
|
||||||
|
"Price": "300"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D7950",
|
||||||
|
"Description": "max",
|
||||||
|
"Price": "800"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D7140",
|
||||||
|
"Description": "simple ext",
|
||||||
|
"Price": "150"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D7210",
|
||||||
|
"Description": "surgical ext",
|
||||||
|
"Price": "280"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D7220",
|
||||||
|
"Description": "soft impacted",
|
||||||
|
"Price": "380"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D7230",
|
||||||
|
"Description": "partial bony",
|
||||||
|
"Price": "450"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D7240",
|
||||||
|
"Description": "fully bony",
|
||||||
|
"Price": "550"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Procedure Code": "D3320",
|
||||||
|
"Description": "pre M RCT",
|
||||||
|
"Price": "1050"
|
||||||
|
}
|
||||||
|
]
|
||||||
Reference in New Issue
Block a user