structured well
This commit is contained in:
0
apps/PaymentOCRService/.env.example
Normal file
0
apps/PaymentOCRService/.env.example
Normal file
13
apps/PaymentOCRService/README.md
Normal file
13
apps/PaymentOCRService/README.md
Normal file
@@ -0,0 +1,13 @@
|
||||
# Medical Billing OCR API (FastAPI)
|
||||
|
||||
## 1) Prereqs
|
||||
- Google Cloud Vision service-account JSON.
|
||||
- `GOOGLE_APPLICATION_CREDENTIALS` env var pointing to that JSON.
|
||||
- Tesseract installed (for fallback OCR), and on PATH.
|
||||
|
||||
## 2) Install & run (local)
|
||||
```bash
|
||||
python -m venv .venv && source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
export GOOGLE_APPLICATION_CREDENTIALS=/absolute/path/to/service-account.json
|
||||
uvicorn app.main:app --reload --port 8080
|
||||
0
apps/PaymentOCRService/app/init.py
Normal file
0
apps/PaymentOCRService/app/init.py
Normal file
81
apps/PaymentOCRService/app/main.py
Normal file
81
apps/PaymentOCRService/app/main.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from fastapi import FastAPI, UploadFile, File, HTTPException
|
||||
from fastapi.responses import StreamingResponse, JSONResponse, PlainTextResponse
|
||||
from typing import List, Optional
|
||||
import io
|
||||
import os
|
||||
|
||||
from app.pipeline_adapter import (
|
||||
process_images_to_rows,
|
||||
rows_to_csv_bytes,
|
||||
)
|
||||
|
||||
app = FastAPI(
|
||||
title="Medical Billing OCR API",
|
||||
description="FastAPI wrapper around the complete OCR pipeline (Google Vision + deskew + line clustering + extraction).",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
ALLOWED_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp"}
|
||||
|
||||
@app.get("/health", response_class=PlainTextResponse)
|
||||
def health():
|
||||
# Simple sanity check (also ensures GCP creds var visibility)
|
||||
creds = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "")
|
||||
return f"OK | GOOGLE_APPLICATION_CREDENTIALS set: {bool(creds)}"
|
||||
|
||||
@app.post("/extract/json")
|
||||
async def extract_json(files: List[UploadFile] = File(...)):
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="No files provided.")
|
||||
|
||||
# Validate extensions early (not bulletproof, but helpful)
|
||||
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
|
||||
if bad:
|
||||
raise HTTPException(
|
||||
status_code=415,
|
||||
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
|
||||
)
|
||||
|
||||
# Read blobs in-memory
|
||||
blobs = []
|
||||
filenames = []
|
||||
for f in files:
|
||||
blobs.append(await f.read())
|
||||
filenames.append(f.filename or "upload.bin")
|
||||
|
||||
try:
|
||||
rows = process_images_to_rows(blobs, filenames)
|
||||
# rows is a list[dict] where each dict contains the columns you already emit (Patient Name, etc.)
|
||||
return JSONResponse(content={"rows": rows})
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
|
||||
|
||||
@app.post("/extract/csv")
|
||||
async def extract_csv(files: List[UploadFile] = File(...), filename: Optional[str] = None):
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="No files provided.")
|
||||
|
||||
bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS]
|
||||
if bad:
|
||||
raise HTTPException(
|
||||
status_code=415,
|
||||
detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}"
|
||||
)
|
||||
|
||||
blobs = []
|
||||
filenames = []
|
||||
for f in files:
|
||||
blobs.append(await f.read())
|
||||
filenames.append(f.filename or "upload.bin")
|
||||
|
||||
try:
|
||||
rows = process_images_to_rows(blobs, filenames)
|
||||
csv_bytes = rows_to_csv_bytes(rows)
|
||||
out_name = filename or "medical_billing_extract.csv"
|
||||
return StreamingResponse(
|
||||
io.BytesIO(csv_bytes),
|
||||
media_type="text/csv",
|
||||
headers={"Content-Disposition": f'attachment; filename="{out_name}"'}
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Processing error: {e}")
|
||||
77
apps/PaymentOCRService/app/pipeline-adaptor.py
Normal file
77
apps/PaymentOCRService/app/pipeline-adaptor.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import os
|
||||
import tempfile
|
||||
from typing import List, Dict
|
||||
import pandas as pd
|
||||
|
||||
# Import your existing functions directly from complete_pipeline.py
|
||||
from complete_pipeline import (
|
||||
smart_deskew_with_lines,
|
||||
extract_all_clients_from_lines,
|
||||
)
|
||||
|
||||
def _process_single_image_bytes(blob: bytes, display_name: str) -> List[Dict]:
|
||||
"""
|
||||
Saves bytes to a temp file (so OpenCV + Google Vision can read it),
|
||||
runs your existing pipeline functions, and returns extracted rows.
|
||||
"""
|
||||
suffix = os.path.splitext(display_name)[1] or ".jpg"
|
||||
tmp_path = None
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
tmp.write(blob)
|
||||
tmp_path = tmp.name
|
||||
|
||||
# Uses Google Vision + deskew + post-line grouping
|
||||
info = smart_deskew_with_lines(tmp_path, None, clamp_deg=30.0, use_vision=True)
|
||||
post_lines = info.get("post_lines", []) if info else []
|
||||
rows = extract_all_clients_from_lines(post_lines) if post_lines else []
|
||||
|
||||
# Add source file information (same as your Streamlit app)
|
||||
for r in rows:
|
||||
r["Source File"] = display_name
|
||||
|
||||
# If nothing parsed, still return a placeholder row to indicate failure (optional)
|
||||
if not rows:
|
||||
rows.append({
|
||||
'Patient Name': "", 'Patient ID': "", 'ICN': "", 'CDT Code': "",
|
||||
'Tooth': "", 'Date SVC': "",
|
||||
'Billed Amount': "", 'Allowed Amount': "", 'Paid Amount': "",
|
||||
'Extraction Success': False, 'Source File': display_name,
|
||||
})
|
||||
|
||||
return rows
|
||||
|
||||
finally:
|
||||
if tmp_path:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def process_images_to_rows(blobs: List[bytes], filenames: List[str]) -> List[Dict]:
|
||||
"""
|
||||
Public API used by FastAPI routes.
|
||||
blobs: list of image bytes
|
||||
filenames: matching names for display / Source File column
|
||||
"""
|
||||
all_rows: List[Dict] = []
|
||||
for blob, name in zip(blobs, filenames):
|
||||
rows = _process_single_image_bytes(blob, name)
|
||||
all_rows.extend(rows)
|
||||
|
||||
return all_rows
|
||||
|
||||
def rows_to_csv_bytes(rows: List[Dict]) -> bytes:
|
||||
"""
|
||||
Convert pipeline rows to CSV bytes (for frontend to consume as a table).
|
||||
"""
|
||||
df = pd.DataFrame(rows)
|
||||
# Keep a stable column order if present (mirrors your Excel order)
|
||||
desired = [
|
||||
'Patient Name', 'Patient ID', 'ICN', 'CDT Code', 'Tooth', 'Date SVC',
|
||||
'Billed Amount', 'Allowed Amount', 'Paid Amount',
|
||||
'Extraction Success', 'Source File'
|
||||
]
|
||||
cols = [c for c in desired if c in df.columns] + [c for c in df.columns if c not in desired]
|
||||
df = df[cols]
|
||||
return df.to_csv(index=False).encode("utf-8")
|
||||
837
apps/PaymentOCRService/complete-pipeline.py
Normal file
837
apps/PaymentOCRService/complete-pipeline.py
Normal file
@@ -0,0 +1,837 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
End-to-end local pipeline (single script)
|
||||
|
||||
- One Google Vision pass per image (DOCUMENT_TEXT_DETECTION)
|
||||
- Smart deskew (Hough + OCR pairs) with fine grid search (in-memory)
|
||||
- Build slope-aware (pre) and horizontal (post) line dumps (in-memory)
|
||||
- Extract all clients & PD rows per page (robust to headers/EOBS)
|
||||
- Export nicely formatted Excel via ExcelGenerator
|
||||
|
||||
Usage:
|
||||
python ocr_pipeline.py --input "C:\\imgs" --out "results.xlsx"
|
||||
python ocr_pipeline.py --files s1.jpg s2.jpg --out results.xlsx
|
||||
python ocr_pipeline.py --input "C:\\imgs" --out results.xlsx --deskewed-only
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import io
|
||||
import cv2
|
||||
import math
|
||||
import glob
|
||||
import argparse
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from typing import List, Dict, Tuple, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
# ========= Debug switch =========
|
||||
# Set to True to re-enable saving deskewed images, writing *_lines_*.txt,
|
||||
# and printing progress messages.
|
||||
DEBUG = False
|
||||
|
||||
# ---------- Google Vision ----------
|
||||
from google.cloud import vision
|
||||
|
||||
# ---------- openpyxl helpers ----------
|
||||
from openpyxl.utils import get_column_letter
|
||||
from openpyxl.cell.cell import MergedCell
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, PatternFill, Border, Side, Alignment
|
||||
from openpyxl.utils.dataframe import dataframe_to_rows
|
||||
|
||||
# ============================================================
|
||||
# Config (tuning)
|
||||
# ============================================================
|
||||
PERP_TOL_FACTOR = 0.6
|
||||
SEED_BAND_H = 3.0
|
||||
ALLOW_SINGLETON = True
|
||||
|
||||
POST_Y_TOL_FACTOR = 0.55
|
||||
|
||||
# ============================================================
|
||||
# Vision OCR (ONE pass per image)
|
||||
# ============================================================
|
||||
def _open_bytes(path: str) -> bytes:
|
||||
with open(path, "rb") as f:
|
||||
return f.read()
|
||||
|
||||
def extract_words_and_text(image_path: str) -> Tuple[List[Dict], str]:
|
||||
client = vision.ImageAnnotatorClient()
|
||||
resp = client.document_text_detection(image=vision.Image(content=_open_bytes(image_path)))
|
||||
if resp.error.message:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
||||
full_text = resp.full_text_annotation.text or ""
|
||||
|
||||
words: List[Dict] = []
|
||||
for page in resp.full_text_annotation.pages:
|
||||
for block in page.blocks:
|
||||
for para in block.paragraphs:
|
||||
for word in para.words:
|
||||
text = "".join(s.text for s in word.symbols)
|
||||
vs = word.bounding_box.vertices
|
||||
xs = [v.x for v in vs]; ys = [v.y for v in vs]
|
||||
left, top = min(xs), min(ys)
|
||||
w, h = max(xs) - left, max(ys) - top
|
||||
cx, cy = left + w/2.0, top + h/2.0
|
||||
words.append({"text": text, "left": left, "top": top,
|
||||
"w": w, "h": h, "cx": cx, "cy": cy})
|
||||
return words, full_text
|
||||
|
||||
# ============================================================
|
||||
# Skew estimation (Hough + OCR pairs)
|
||||
# ============================================================
|
||||
def weighted_median(pairs: List[Tuple[float, float]]) -> float:
|
||||
if not pairs: return 0.0
|
||||
arr = sorted(pairs, key=lambda t: t[0])
|
||||
tot = sum(w for _, w in arr)
|
||||
acc = 0.0
|
||||
for v, w in arr:
|
||||
acc += w
|
||||
if acc >= tot/2.0:
|
||||
return v
|
||||
return arr[-1][0]
|
||||
|
||||
def estimate_skew_pairs(words: List[Dict],
|
||||
y_band_mult: float = 2.0,
|
||||
min_dx_mult: float = 0.8,
|
||||
max_abs_deg: float = 15.0) -> Tuple[float,int]:
|
||||
if not words: return 0.0, 0
|
||||
widths = [w["w"] for w in words if w["w"]>0]
|
||||
heights = [w["h"] for w in words if w["h"]>0]
|
||||
w_med = float(np.median(widths) if widths else 10.0)
|
||||
h_med = float(np.median(heights) if heights else 16.0)
|
||||
y_band = y_band_mult * h_med
|
||||
min_dx = max(4.0, min_dx_mult * w_med)
|
||||
|
||||
words_sorted = sorted(words, key=lambda w: (w["cy"], w["cx"]))
|
||||
pairs: List[Tuple[float,float]] = []
|
||||
for i, wi in enumerate(words_sorted):
|
||||
best_j = None; best_dx = None
|
||||
for j in range(i+1, len(words_sorted)):
|
||||
wj = words_sorted[j]
|
||||
dy = wj["cy"] - wi["cy"]
|
||||
if dy > y_band: break
|
||||
if abs(dy) <= y_band:
|
||||
dx = wj["cx"] - wi["cx"]
|
||||
if dx <= 0 or dx < min_dx: continue
|
||||
if best_dx is None or dx < best_dx:
|
||||
best_dx, best_j = dx, j
|
||||
if best_j is None: continue
|
||||
wj = words_sorted[best_j]
|
||||
dx = wj["cx"] - wi["cx"]; dy = wj["cy"] - wi["cy"]
|
||||
ang = math.degrees(math.atan2(dy, dx))
|
||||
if abs(ang) <= max_abs_deg:
|
||||
pairs.append((ang, max(1.0, dx)))
|
||||
|
||||
if not pairs: return 0.0, 0
|
||||
vals = np.array([v for v,_ in pairs], dtype=float)
|
||||
q1, q3 = np.percentile(vals, [25,75]); iqr = q3-q1
|
||||
lo, hi = q1 - 1.5*iqr, q3 + 1.5*iqr
|
||||
trimmed = [(v,w) for v,w in pairs if lo <= v <= hi] or pairs
|
||||
return float(weighted_median(trimmed)), len(trimmed)
|
||||
|
||||
def estimate_skew_hough(img: np.ndarray, thr: int = 180) -> Tuple[float,int]:
|
||||
g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
g = cv2.GaussianBlur(g, (3,3), 0)
|
||||
edges = cv2.Canny(g, 60, 160, apertureSize=3)
|
||||
lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=thr)
|
||||
if lines is None: return 0.0, 0
|
||||
angs = []
|
||||
for (rho, theta) in lines[:,0,:]:
|
||||
ang = (theta - np.pi/2.0) * 180.0/np.pi
|
||||
while ang > 45: ang -= 90
|
||||
while ang < -45: ang += 90
|
||||
angs.append(ang)
|
||||
angs = np.array(angs, dtype=float)
|
||||
med = float(np.median(angs))
|
||||
keep = angs[np.abs(angs - med) <= 10.0]
|
||||
return (float(np.median(keep)) if keep.size else med), int(angs.size)
|
||||
|
||||
# ============================================================
|
||||
# Rotation (image + coordinates) and scoring
|
||||
# ============================================================
|
||||
def rotation_matrix_keep_bounds(shape_hw: Tuple[int,int], angle_deg: float) -> Tuple[np.ndarray, Tuple[int,int]]:
|
||||
h, w = shape_hw
|
||||
center = (w/2.0, h/2.0)
|
||||
M = cv2.getRotationMatrix2D(center, angle_deg, 1.0)
|
||||
cos, sin = abs(M[0,0]), abs(M[0,1])
|
||||
new_w = int(h*sin + w*cos)
|
||||
new_h = int(h*cos + w*sin)
|
||||
M[0,2] += (new_w/2) - center[0]
|
||||
M[1,2] += (new_h/2) - center[1]
|
||||
return M, (new_h, new_w)
|
||||
|
||||
def rotate_image_keep_bounds(img: np.ndarray, angle_deg: float, border_value=255) -> np.ndarray:
|
||||
M, (nh, nw) = rotation_matrix_keep_bounds(img.shape[:2], angle_deg)
|
||||
return cv2.warpAffine(img, M, (nw, nh),
|
||||
flags=cv2.INTER_LINEAR,
|
||||
borderMode=cv2.BORDER_CONSTANT,
|
||||
borderValue=border_value)
|
||||
|
||||
def transform_words(words: List[Dict], shape_hw: Tuple[int,int], angle_deg: float) -> List[Dict]:
|
||||
M, _ = rotation_matrix_keep_bounds(shape_hw, angle_deg)
|
||||
out = []
|
||||
for w in words:
|
||||
x, y = (M @ np.array([w["cx"], w["cy"], 1.0])).tolist()
|
||||
ww = dict(w)
|
||||
ww["cx_rot"], ww["cy_rot"] = float(x), float(y)
|
||||
out.append(ww)
|
||||
return out
|
||||
|
||||
def preview_score(img: np.ndarray, deskew_angle: float) -> float:
|
||||
h, w = img.shape[:2]
|
||||
scale = 1200.0 / max(h, w)
|
||||
small = cv2.resize(img, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA) if scale < 1 else img
|
||||
rot = rotate_image_keep_bounds(small, deskew_angle, border_value=255)
|
||||
resid, n = estimate_skew_hough(rot, thr=140)
|
||||
return abs(resid) if n > 0 else 90.0
|
||||
|
||||
# ============================================================
|
||||
# Slope-based clustering (pre-rotation)
|
||||
# ============================================================
|
||||
def line_from_points(p0, p1):
|
||||
(x0,y0),(x1,y1)=p0,p1
|
||||
dx = x1-x0
|
||||
if abs(dx) < 1e-9: return float("inf"), x0
|
||||
m = (y1-y0)/dx; b = y0 - m*x0
|
||||
return m,b
|
||||
|
||||
def perp_distance(m,b,x,y):
|
||||
if math.isinf(m): return abs(x-b)
|
||||
return abs(m*x - y + b) / math.sqrt(m*m + 1.0)
|
||||
|
||||
def refit_line(points: List[Tuple[float,float]]) -> Tuple[float,float]:
|
||||
if len(points) == 1:
|
||||
x,y = points[0]; return 0.0, y
|
||||
xs=[p[0] for p in points]; ys=[p[1] for p in points]
|
||||
xm = sum(xs)/len(xs); ym = sum(ys)/len(ys)
|
||||
num = sum((x-xm)*(y-ym) for x,y in zip(xs,ys))
|
||||
den = sum((x-xm)**2 for x in xs)
|
||||
if abs(den) < 1e-12: return float("inf"), xm
|
||||
m = num/den; b = ym - m*xm
|
||||
return m,b
|
||||
|
||||
def project_t(m,b,x0,y0,x,y):
|
||||
if math.isinf(m): return y - y0
|
||||
denom = math.sqrt(1+m*m)
|
||||
return ((x-x0) + m*(y-y0))/denom
|
||||
|
||||
def _build_line_result(words, idxs, m, b, rotated=False):
|
||||
origin_idx = min(idxs, key=lambda i: (words[i]["cx_rot"] if rotated else words[i]["cx"]))
|
||||
x0 = words[origin_idx]["cx_rot"] if rotated else words[origin_idx]["cx"]
|
||||
y0 = words[origin_idx]["cy_rot"] if rotated else words[origin_idx]["cy"]
|
||||
|
||||
ordered = sorted(
|
||||
idxs,
|
||||
key=lambda i: project_t(
|
||||
m, b, x0, y0,
|
||||
words[i]["cx_rot"] if rotated else words[i]["cx"],
|
||||
words[i]["cy_rot"] if rotated else words[i]["cy"]
|
||||
)
|
||||
)
|
||||
line_words = [words[i] for i in ordered]
|
||||
text = " ".join(w["text"] for w in line_words)
|
||||
|
||||
xs = [(w["cx_rot"] if rotated else w["cx"]) for w in line_words]
|
||||
ys = [(w["cy_rot"] if rotated else w["cy"]) for w in line_words]
|
||||
return {
|
||||
"text": text,
|
||||
"words": line_words,
|
||||
"slope": m,
|
||||
"center_x": float(sum(xs)/len(xs)),
|
||||
"center_y": float(sum(ys)/len(ys)),
|
||||
"count": len(line_words),
|
||||
}
|
||||
|
||||
def cluster_tilted_lines(words: List[Dict]) -> List[Dict]:
|
||||
if not words: return []
|
||||
hs = sorted([w["h"] for w in words if w["h"]>0])
|
||||
h_med = hs[len(hs)//2] if hs else 16.0
|
||||
perp_tol = PERP_TOL_FACTOR * h_med
|
||||
band_dy = SEED_BAND_H * h_med
|
||||
|
||||
remaining = set(range(len(words)))
|
||||
order = sorted(remaining, key=lambda i: (words[i]["cy"], words[i]["cx"]))
|
||||
lines = []
|
||||
|
||||
while remaining:
|
||||
seed_idx = next(i for i in order if i in remaining)
|
||||
remaining.remove(seed_idx)
|
||||
sx, sy = words[seed_idx]["cx"], words[seed_idx]["cy"]
|
||||
|
||||
cand_idxs = [j for j in remaining if abs(words[j]["cy"] - sy) <= band_dy]
|
||||
if not cand_idxs:
|
||||
if ALLOW_SINGLETON:
|
||||
m,b = refit_line([(sx,sy)])
|
||||
lines.append(_build_line_result(words, {seed_idx}, m, b))
|
||||
continue
|
||||
|
||||
cand_idxs.sort(key=lambda j: abs(words[j]["cx"] - sx))
|
||||
best_inliers = None; best_mb = None
|
||||
for j in cand_idxs[:min(10, len(cand_idxs))]:
|
||||
m,b = line_from_points((sx,sy), (words[j]["cx"], words[j]["cy"]))
|
||||
inliers = {seed_idx, j}
|
||||
for k in remaining:
|
||||
xk, yk = words[k]["cx"], words[k]["cy"]
|
||||
if perp_distance(m,b,xk,yk) <= perp_tol:
|
||||
inliers.add(k)
|
||||
if best_inliers is None or len(inliers) > len(best_inliers):
|
||||
best_inliers, best_mb = inliers, (m,b)
|
||||
|
||||
m,b = best_mb
|
||||
pts = [(words[i]["cx"], words[i]["cy"]) for i in best_inliers]
|
||||
m,b = refit_line(pts)
|
||||
|
||||
expanded = set(best_inliers)
|
||||
for idx in list(remaining):
|
||||
xk, yk = words[idx]["cx"], words[idx]["cy"]
|
||||
if perp_distance(m,b,xk,yk) <= perp_tol:
|
||||
expanded.add(idx)
|
||||
|
||||
for idx in expanded:
|
||||
if idx in remaining:
|
||||
remaining.remove(idx)
|
||||
lines.append(_build_line_result(words, expanded, m, b))
|
||||
|
||||
lines.sort(key=lambda L: L["center_y"])
|
||||
return lines
|
||||
|
||||
# ============================================================
|
||||
# Post-rotation grouping (simple horizontal lines)
|
||||
# ============================================================
|
||||
def group_horizontal_lines(rotated_words: List[Dict]) -> List[Dict]:
|
||||
if not rotated_words: return []
|
||||
hs = sorted([w["h"] for w in rotated_words if w["h"]>0])
|
||||
h_med = hs[len(hs)//2] if hs else 16.0
|
||||
y_tol = POST_Y_TOL_FACTOR * h_med
|
||||
|
||||
idxs = list(range(len(rotated_words)))
|
||||
idxs.sort(key=lambda i: (rotated_words[i]["cy_rot"], rotated_words[i]["cx_rot"]))
|
||||
lines = []
|
||||
cur = []
|
||||
|
||||
def flush():
|
||||
nonlocal cur
|
||||
if not cur: return
|
||||
xs = [rotated_words[i]["cx_rot"] for i in cur]
|
||||
ys = [rotated_words[i]["cy_rot"] for i in cur]
|
||||
m,b = refit_line(list(zip(xs,ys)))
|
||||
cur_sorted = sorted(cur, key=lambda i: rotated_words[i]["cx_rot"])
|
||||
lines.append(_build_line_result(rotated_words, set(cur_sorted), m, b, rotated=True))
|
||||
cur = []
|
||||
|
||||
for i in idxs:
|
||||
if not cur:
|
||||
cur = [i]
|
||||
else:
|
||||
y0 = rotated_words[cur[0]]["cy_rot"]
|
||||
yi = rotated_words[i]["cy_rot"]
|
||||
if abs(yi - y0) <= y_tol:
|
||||
cur.append(i)
|
||||
else:
|
||||
flush()
|
||||
cur = [i]
|
||||
flush()
|
||||
lines.sort(key=lambda L: L["center_y"])
|
||||
return lines
|
||||
|
||||
# ============================================================
|
||||
# Utilities: dump lines to txt (only if DEBUG)
|
||||
# ============================================================
|
||||
def slope_to_deg(m: float) -> float:
|
||||
if math.isinf(m): return 90.0
|
||||
return math.degrees(math.atan(m))
|
||||
|
||||
def write_lines_txt(base_path: str, suffix: str, lines: List[Dict]) -> Optional[str]:
|
||||
if not DEBUG:
|
||||
return None
|
||||
txt_path = f"{os.path.splitext(base_path)[0]}_{suffix}.txt"
|
||||
with open(txt_path, "w", encoding="utf-8") as f:
|
||||
f.write(f"# {os.path.basename(base_path)} ({suffix})\n")
|
||||
for i, L in enumerate(lines, 1):
|
||||
ang = slope_to_deg(L["slope"])
|
||||
f.write(f"[{i:03d}] words={L['count']:>3} slope={ang:+.3f}°\n")
|
||||
f.write(L["text"] + "\n\n")
|
||||
return txt_path
|
||||
|
||||
# ============================================================
|
||||
# Smart deskew + full pipeline (in-memory; returns words + full_text)
|
||||
# ============================================================
|
||||
def smart_deskew_with_lines(image_path: str,
|
||||
out_path: Optional[str] = None,
|
||||
clamp_deg: float = 30.0,
|
||||
use_vision: bool = True) -> Dict:
|
||||
img = cv2.imread(image_path, cv2.IMREAD_COLOR)
|
||||
if img is None: raise FileNotFoundError(image_path)
|
||||
|
||||
words, full_text = ([], "")
|
||||
if use_vision:
|
||||
words, full_text = extract_words_and_text(image_path)
|
||||
|
||||
a_h, n_h = estimate_skew_hough(img)
|
||||
a_p, n_p = (0.0, 0)
|
||||
if words:
|
||||
a_p, n_p = estimate_skew_pairs(words, y_band_mult=2.0, min_dx_mult=0.8, max_abs_deg=15.0)
|
||||
|
||||
candidates = []
|
||||
if n_h >= 10: candidates += [a_h, -a_h]
|
||||
if n_p >= 10: candidates += [a_p, -a_p]
|
||||
if not candidates: candidates = [0.0]
|
||||
|
||||
cand = []
|
||||
for a in candidates:
|
||||
a = float(max(-clamp_deg, min(clamp_deg, a)))
|
||||
if all(abs(a - b) > 0.05 for b in cand):
|
||||
cand.append(a)
|
||||
|
||||
grid = []
|
||||
for a in cand:
|
||||
for d in (-0.6, -0.4, -0.2, 0.0, 0.2, 0.4, 0.6):
|
||||
g = a + d
|
||||
if all(abs(g - x) > 0.05 for x in grid):
|
||||
grid.append(g)
|
||||
|
||||
scored = [(a, preview_score(img, -a)) for a in grid]
|
||||
best_angle, best_cost = min(scored, key=lambda t: t[1])
|
||||
|
||||
# Debug print kept as a comment
|
||||
# print(f"[smart] hough={a_h:.3f}°(n={n_h}) pairs={a_p:.3f}°(n={n_p}) tried={', '.join(f'{a:+.2f}°' for a,_ in scored)} → chosen {best_angle:+.2f}° (cost={best_cost:.3f})")
|
||||
|
||||
# Rotate in-memory. Save only if DEBUG.
|
||||
rotated = rotate_image_keep_bounds(img, -best_angle, border_value=255)
|
||||
if DEBUG and out_path:
|
||||
cv2.imwrite(out_path, rotated)
|
||||
|
||||
result = {
|
||||
"angle_deg": float(best_angle),
|
||||
"hough_lines": int(n_h),
|
||||
"pair_samples": int(n_p),
|
||||
"out_path": out_path if DEBUG else None,
|
||||
"pre_txt": None,
|
||||
"post_txt": None,
|
||||
"pre_lines": [],
|
||||
"post_lines": [],
|
||||
"words": words,
|
||||
"full_text": full_text,
|
||||
}
|
||||
|
||||
if words:
|
||||
pre_lines = cluster_tilted_lines(words)
|
||||
result["pre_lines"] = pre_lines
|
||||
result["pre_txt"] = write_lines_txt(image_path, "lines_pre", pre_lines) # only if DEBUG
|
||||
|
||||
rot_words = transform_words(words, img.shape[:2], -best_angle)
|
||||
post_lines = group_horizontal_lines(rot_words)
|
||||
result["post_lines"] = post_lines
|
||||
result["post_txt"] = write_lines_txt(image_path, "lines_post", post_lines) # only if DEBUG
|
||||
|
||||
# More debug prints kept as comments
|
||||
# def preview(lines, tag):
|
||||
# print(f" {tag} ({len(lines)} lines)")
|
||||
# for L in lines[:5]:
|
||||
# ang = slope_to_deg(L["slope"])
|
||||
# print(f" [{L['count']:>3} w] slope={ang:+.3f}° | {L['text'][:90]}")
|
||||
# preview(pre_lines, "pre (slope-aware)")
|
||||
# preview(post_lines, "post (horizontal)")
|
||||
# if DEBUG:
|
||||
# print(f" → wrote: {result['pre_txt']} and {result['post_txt']}")
|
||||
|
||||
return result
|
||||
|
||||
# ============================================================
|
||||
# Multi-client extraction from post lines (robust)
|
||||
# ============================================================
|
||||
MEMBER_RE = re.compile(r'\bMEMBER NAME\s*:\s*(.+)', re.IGNORECASE)
|
||||
MEMBERID_RE = re.compile(r'\bMEMBER ID\s*:\s*([A-Za-z0-9]+)', re.IGNORECASE)
|
||||
ICN_LINE_RE = re.compile(r'^\s*\d{12,}\b')
|
||||
|
||||
AMOUNT_RE = re.compile(r'(\d{1,3}(?:,\d{3})*\.\d{2})') # decimals only
|
||||
DATE6_RE = re.compile(r'\b\d{6}\b')
|
||||
PD_ROW_RE = re.compile(r'\bPD\s+(D?\d{4})\b', re.IGNORECASE)
|
||||
TOOTH_RE = re.compile(r'^(?:[1-9]|[12][0-9]|3[0-2]|[A-Ta-t])$')
|
||||
SURFACE_RE = re.compile(r'^[MDBOILFP]{1,4}$', re.IGNORECASE)
|
||||
|
||||
def _to_float(s: str) -> float:
|
||||
try:
|
||||
return float(s.replace(',', ''))
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
def _parse_pd_line(t: str) -> Optional[Tuple[str, Optional[float], Optional[float], Optional[float], Optional[str], Optional[str], Optional[str]]]:
|
||||
"""
|
||||
Parse a single PD line.
|
||||
Returns: (CDT, billed, allowed, paid, date6, tooth, surface)
|
||||
"""
|
||||
m = PD_ROW_RE.search(t)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
code = m.group(1)
|
||||
code = code if code.upper().startswith('D') else f'D{code}'
|
||||
|
||||
amts = [_to_float(x) for x in AMOUNT_RE.findall(t)]
|
||||
billed = allowed = paid = None
|
||||
if len(amts) >= 3:
|
||||
billed, allowed, paid = amts[-3:]
|
||||
|
||||
d = None
|
||||
md = DATE6_RE.search(t)
|
||||
if md:
|
||||
d = md.group(0)
|
||||
|
||||
tooth = None
|
||||
surface = None
|
||||
|
||||
tokens = t.split()
|
||||
try:
|
||||
code_idx = tokens.index(code)
|
||||
except ValueError:
|
||||
code_idx = None
|
||||
for i, tok in enumerate(tokens):
|
||||
if PD_ROW_RE.match(f'PD {tok}'):
|
||||
code_idx = i
|
||||
break
|
||||
|
||||
if code_idx is not None:
|
||||
date_idx = None
|
||||
for i in range(code_idx + 1, len(tokens)):
|
||||
if DATE6_RE.fullmatch(tokens[i]):
|
||||
date_idx = i
|
||||
break
|
||||
|
||||
window = tokens[code_idx + 1: date_idx if date_idx is not None else len(tokens)]
|
||||
|
||||
for tok in window:
|
||||
if TOOTH_RE.fullmatch(tok):
|
||||
tooth = tok.upper()
|
||||
break
|
||||
|
||||
start_j = 0
|
||||
if tooth is not None:
|
||||
for j, tok in enumerate(window):
|
||||
if tok.upper() == tooth:
|
||||
start_j = j + 1
|
||||
break
|
||||
for tok in window[start_j:]:
|
||||
if SURFACE_RE.fullmatch(tok):
|
||||
surface = tok.upper()
|
||||
break
|
||||
|
||||
return code, billed, allowed, paid, d, tooth, surface
|
||||
|
||||
def extract_all_clients_from_lines(post_lines: List[dict]) -> List[dict]:
|
||||
"""
|
||||
Split strictly by MEMBER NAME lines; ignore anything before the first name.
|
||||
For each member block, look up ICN from the nearest line above the member header.
|
||||
Parse each PD line for CDT, Date SVC, Billed, Allowed, Paid (decimals only).
|
||||
"""
|
||||
texts = [L["text"] for L in post_lines]
|
||||
starts = [i for i,t in enumerate(texts) if MEMBER_RE.search(t)]
|
||||
if not starts:
|
||||
return []
|
||||
|
||||
out_rows = []
|
||||
|
||||
for si, start in enumerate(starts):
|
||||
end = starts[si+1] if si+1 < len(starts) else len(texts)
|
||||
|
||||
# header line with MEMBER NAME
|
||||
name_line = texts[start]
|
||||
raw_name = MEMBER_RE.search(name_line).group(1).strip()
|
||||
# Stop at "MEMBER ID" (case-insensitive) and other headers
|
||||
cut_points = ["MEMBER ID", "OTH INS CD", "PA:", "DIAG:"]
|
||||
mname = raw_name
|
||||
for cp in cut_points:
|
||||
idx = mname.upper().find(cp)
|
||||
if idx != -1:
|
||||
mname = mname[:idx].strip()
|
||||
# Debug
|
||||
# print(raw_name); print(mname)
|
||||
|
||||
# member id: search within the block
|
||||
mid = ""
|
||||
for t in texts[start:end]:
|
||||
m = MEMBERID_RE.search(t)
|
||||
if m:
|
||||
mid = m.group(1).strip()
|
||||
break
|
||||
|
||||
# ICN: search a few lines ABOVE the member header
|
||||
icn = ""
|
||||
for k in range(start-1, max(-1, start-6), -1):
|
||||
if k < 0: break
|
||||
mm = ICN_LINE_RE.match(texts[k])
|
||||
if mm:
|
||||
icn = mm.group(0)
|
||||
break
|
||||
|
||||
# PD lines in the block
|
||||
had_pd = False
|
||||
for t in texts[start:end]:
|
||||
if " PD " not in f" {t} ":
|
||||
continue
|
||||
parsed = _parse_pd_line(t)
|
||||
if not parsed:
|
||||
continue
|
||||
had_pd = True
|
||||
code, billed, allowed, paid, dsvc, tooth, surface = parsed
|
||||
out_rows.append({
|
||||
'Patient Name': mname.title() if mname else "",
|
||||
'Patient ID': mid,
|
||||
'ICN': icn,
|
||||
'CDT Code': code,
|
||||
'Tooth': tooth if tooth else "",
|
||||
#'Surface': surface if surface else "",
|
||||
'Date SVC': dsvc if dsvc else "",
|
||||
'Billed Amount': billed if billed is not None else "",
|
||||
'Allowed Amount': allowed if allowed is not None else "",
|
||||
'Paid Amount': paid if paid is not None else "",
|
||||
'Extraction Success': True,
|
||||
})
|
||||
|
||||
if not had_pd:
|
||||
out_rows.append({
|
||||
'Patient Name': mname.title() if mname else "",
|
||||
'Patient ID': mid,
|
||||
'ICN': icn,
|
||||
'CDT Code': "",
|
||||
'Tooth': "",
|
||||
#'Surface': "",
|
||||
'Date SVC': "",
|
||||
'Billed Amount': "",
|
||||
'Allowed Amount': "",
|
||||
'Paid Amount': "",
|
||||
'Extraction Success': bool(mname or mid),
|
||||
})
|
||||
|
||||
return out_rows
|
||||
|
||||
# ============================================================
|
||||
# ExcelGenerator
|
||||
# ============================================================
|
||||
class ExcelGenerator:
|
||||
def __init__(self):
|
||||
self.header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
|
||||
self.header_font = Font(color="FFFFFF", bold=True)
|
||||
self.border = Border(
|
||||
left=Side(style='thin'),
|
||||
right=Side(style='thin'),
|
||||
top=Side(style='thin'),
|
||||
bottom=Side(style='thin')
|
||||
)
|
||||
self.center_alignment = Alignment(horizontal='center', vertical='center')
|
||||
|
||||
def create_excel_file(self, df: pd.DataFrame) -> bytes:
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "Medical Billing Extract"
|
||||
ws['A1'] = f"Medical Billing OCR Extract - Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
||||
ws.merge_cells('A1:H1')
|
||||
ws['A1'].font = Font(size=14, bold=True)
|
||||
ws['A1'].alignment = self.center_alignment
|
||||
ws.append([])
|
||||
|
||||
excel_df = self.prepare_dataframe_for_excel(df)
|
||||
for r in dataframe_to_rows(excel_df, index=False, header=True):
|
||||
ws.append(r)
|
||||
|
||||
self.format_worksheet(ws, len(excel_df) + 3)
|
||||
self.add_summary_sheet(wb, excel_df)
|
||||
|
||||
output = io.BytesIO()
|
||||
wb.save(output)
|
||||
output.seek(0)
|
||||
return output.getvalue()
|
||||
|
||||
def prepare_dataframe_for_excel(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
excel_df = df.copy()
|
||||
column_order = [
|
||||
'Patient Name', 'Patient ID', 'ICN', 'CDT Code', 'Tooth', 'Date SVC', #'Surface',
|
||||
'Billed Amount', 'Allowed Amount', 'Paid Amount',
|
||||
'Extraction Success', 'Source File'
|
||||
]
|
||||
existing = [c for c in column_order if c in excel_df.columns]
|
||||
excel_df = excel_df[existing]
|
||||
for amount_col in ['Billed Amount', 'Allowed Amount', 'Paid Amount']:
|
||||
if amount_col in excel_df.columns:
|
||||
excel_df[amount_col] = excel_df[amount_col].apply(self.format_currency)
|
||||
if 'Extraction Success' in excel_df.columns:
|
||||
excel_df['Extraction Success'] = excel_df['Extraction Success'].apply(lambda x: 'Yes' if x else 'No')
|
||||
return excel_df
|
||||
|
||||
def format_currency(self, value):
|
||||
if pd.isna(value) or value == "":
|
||||
return ""
|
||||
try:
|
||||
if isinstance(value, str):
|
||||
clean_value = value.replace('$', '').replace(',', '')
|
||||
value = float(clean_value)
|
||||
return f"${value:,.2f}"
|
||||
except (ValueError, TypeError):
|
||||
return str(value)
|
||||
|
||||
def format_worksheet(self, ws, data_rows):
|
||||
header_row = 3
|
||||
for cell in ws[header_row]:
|
||||
if cell.value:
|
||||
cell.fill = self.header_fill
|
||||
cell.font = self.header_font
|
||||
cell.alignment = self.center_alignment
|
||||
cell.border = self.border
|
||||
for row in range(header_row + 1, data_rows + 1):
|
||||
for cell in ws[row]:
|
||||
cell.border = self.border
|
||||
cell.alignment = Alignment(horizontal='left', vertical='center')
|
||||
self.auto_adjust_columns(ws)
|
||||
self.add_conditional_formatting(ws, header_row, data_rows)
|
||||
|
||||
def auto_adjust_columns(self, ws):
|
||||
max_col = ws.max_column
|
||||
max_row = ws.max_row
|
||||
for col_idx in range(1, max_col + 1):
|
||||
max_len = 0
|
||||
for row in range(1, max_row + 1):
|
||||
cell = ws.cell(row=row, column=col_idx)
|
||||
if isinstance(cell, MergedCell):
|
||||
continue
|
||||
try:
|
||||
val = cell.value
|
||||
if val is None:
|
||||
continue
|
||||
max_len = max(max_len, len(str(val)))
|
||||
except Exception:
|
||||
pass
|
||||
letter = get_column_letter(col_idx)
|
||||
ws.column_dimensions[letter].width = min(max_len + 2, 50)
|
||||
|
||||
def add_conditional_formatting(self, ws, header_row, data_rows):
|
||||
success_col = None
|
||||
for col, cell in enumerate(ws[header_row], 1):
|
||||
if cell.value == 'Extraction Success':
|
||||
success_col = col
|
||||
break
|
||||
if success_col:
|
||||
for row in range(header_row + 1, data_rows + 1):
|
||||
cell = ws.cell(row=row, column=success_col)
|
||||
if cell.value == 'Yes':
|
||||
cell.fill = PatternFill(start_color="90EE90", end_color="90EE90", fill_type="solid")
|
||||
elif cell.value == 'No':
|
||||
cell.fill = PatternFill(start_color="FFB6C1", end_color="FFB6C1", fill_type="solid")
|
||||
|
||||
def add_summary_sheet(self, wb, df):
|
||||
ws = wb.create_sheet(title="Summary")
|
||||
ws['A1'] = "Extraction Summary"
|
||||
ws['A1'].font = Font(size=16, bold=True)
|
||||
ws.merge_cells('A1:B1')
|
||||
row = 3
|
||||
stats = [
|
||||
("Total Rows", len(df)),
|
||||
("Successful", len(df[df['Extraction Success'] == 'Yes']) if 'Extraction Success' in df.columns else 0),
|
||||
("Failed", len(df[df['Extraction Success'] == 'No']) if 'Extraction Success' in df.columns else 0),
|
||||
]
|
||||
for name, val in stats:
|
||||
ws[f'A{row}'] = name
|
||||
ws[f'B{row}'] = val
|
||||
ws[f'A{row}'].font = Font(bold=True)
|
||||
row += 1
|
||||
ExcelGenerator().auto_adjust_columns(ws)
|
||||
row += 2
|
||||
ws[f'A{row}'] = f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
||||
ws[f'A{row}'].font = Font(italic=True)
|
||||
|
||||
# ============================================================
|
||||
# Runner: glue everything together
|
||||
# ============================================================
|
||||
def process_images_to_excel(files: List[str], out_excel: str, deskewed_only: bool=False) -> None:
|
||||
excel_gen = ExcelGenerator()
|
||||
records: List[Dict[str, Any]] = []
|
||||
|
||||
for src in files:
|
||||
try:
|
||||
if deskewed_only:
|
||||
img = cv2.imread(src, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
raise FileNotFoundError(src)
|
||||
words, _ = extract_words_and_text(src)
|
||||
rot_words = []
|
||||
for w in words:
|
||||
ww = dict(w)
|
||||
ww["cx_rot"], ww["cy_rot"] = w["cx"], w["cy"]
|
||||
rot_words.append(ww)
|
||||
post_lines = group_horizontal_lines(rot_words)
|
||||
|
||||
post_txt = write_lines_txt(src, "lines_post", post_lines) # only if DEBUG
|
||||
|
||||
rows = extract_all_clients_from_lines(post_lines)
|
||||
for r in rows:
|
||||
r["Source File"] = os.path.basename(src)
|
||||
records.append(r)
|
||||
# if DEBUG: print(f"{src} → parsed {len(rows)} PD rows (wrote {post_txt})")
|
||||
|
||||
else:
|
||||
base, ext = os.path.splitext(src)
|
||||
dst = f"{base}_deskewed{ext if ext else '.jpg'}" if DEBUG else None
|
||||
info = smart_deskew_with_lines(src, dst, clamp_deg=30.0, use_vision=True)
|
||||
post_lines = info.get("post_lines", []) if info else []
|
||||
rows = extract_all_clients_from_lines(post_lines) if post_lines else []
|
||||
for r in rows:
|
||||
r["Source File"] = os.path.basename(src)
|
||||
records.append(r)
|
||||
# if DEBUG: print(f"{src} → rotated by {-info['angle_deg']:.3f}° → {dst}")
|
||||
|
||||
except Exception as e:
|
||||
# if DEBUG: print(f"{src}: {e}")
|
||||
records.append({
|
||||
'Patient Name': "", 'Patient ID': "", 'ICN': "", 'CDT Code': "",
|
||||
'Date SVC': "", 'Billed Amount': "", 'Allowed Amount': "", 'Paid Amount': "",
|
||||
'Extraction Success': False, 'Source File': os.path.basename(src),
|
||||
})
|
||||
|
||||
df = pd.DataFrame.from_records(records)
|
||||
data = excel_gen.create_excel_file(df)
|
||||
with open(out_excel, "wb") as f:
|
||||
f.write(data)
|
||||
# if DEBUG:
|
||||
# print(f"\n✅ Wrote Excel → {out_excel}")
|
||||
# print(" (and per-image: *_lines_pre.txt, *_lines_post.txt, *_deskewed.* when DEBUG=True)")
|
||||
|
||||
# ============================================================
|
||||
# CLI
|
||||
# ============================================================
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--input", help="Folder of images (jpg/png/tif).", default=None)
|
||||
ap.add_argument("--files", nargs="*", help="Specific image files.", default=None)
|
||||
ap.add_argument("--out", help="Output Excel path.", required=True)
|
||||
ap.add_argument("--deskewed-only", action="store_true",
|
||||
help="Only process files whose name contains '_deskewed'; skip deskew step.")
|
||||
args = ap.parse_args()
|
||||
|
||||
paths: List[str] = []
|
||||
if args.files:
|
||||
for f in args.files:
|
||||
if os.path.isfile(f):
|
||||
paths.append(f)
|
||||
if args.input and os.path.isdir(args.input):
|
||||
for ext in ("*.jpg","*.jpeg","*.png","*.tif","*.tiff","*.bmp"):
|
||||
paths.extend(glob.glob(os.path.join(args.input, ext)))
|
||||
|
||||
if args.deskewed_only:
|
||||
paths = [p for p in paths if "_deskewed" in os.path.basename(p).lower()]
|
||||
|
||||
if not paths:
|
||||
raise SystemExit("No input images found. Use --files or --input (and --deskewed-only if desired).")
|
||||
|
||||
if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
|
||||
# print("WARNING: GOOGLE_APPLICATION_CREDENTIALS not set. Set it to your local service account JSON path.")
|
||||
pass
|
||||
|
||||
process_images_to_excel(paths, args.out, deskewed_only=args.deskewed_only)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
8
apps/PaymentOCRService/package.json
Normal file
8
apps/PaymentOCRService/package.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "pdfservice",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"postinstall": "pip install -r requirements.txt",
|
||||
"dev": "python main.py"
|
||||
}
|
||||
}
|
||||
10
apps/PaymentOCRService/requirements.txt
Normal file
10
apps/PaymentOCRService/requirements.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
google-cloud-vision
|
||||
opencv-python-headless
|
||||
pytesseract
|
||||
pillow
|
||||
pandas
|
||||
openpyxl
|
||||
numpy
|
||||
python-multipart
|
||||
BIN
apps/ProcedureCodeFromMhPdf/MH.pdf
Normal file
BIN
apps/ProcedureCodeFromMhPdf/MH.pdf
Normal file
Binary file not shown.
5
apps/ProcedureCodeFromMhPdf/Readme.md
Normal file
5
apps/ProcedureCodeFromMhPdf/Readme.md
Normal file
@@ -0,0 +1,5 @@
|
||||
This code was written only while extracting procedure code data from Mass Health pdf, to make process easy.
|
||||
|
||||
Only was a one time process, not used as core functionality in this whole app.
|
||||
|
||||
Keeping it as in future might need to extract again.
|
||||
96
apps/ProcedureCodeFromMhPdf/compareJson.py
Normal file
96
apps/ProcedureCodeFromMhPdf/compareJson.py
Normal file
@@ -0,0 +1,96 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Compare a main dental JSON file with one or more other JSON files and
|
||||
return all records whose 'Procedure Code' is NOT present in the main file.
|
||||
|
||||
- Matching key: 'Procedure Code' (case-insensitive, trimmed).
|
||||
- Keeps the full record from the other files (including extra fields like 'Full Price').
|
||||
- Deduplicates by Procedure Code across the collected "missing" results.
|
||||
|
||||
CONFIG: set MAIN_PATH, OTHER_PATHS, OUT_PATH below.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
|
||||
# =========================
|
||||
# CONFIG — EDIT THESE ONLY
|
||||
# =========================
|
||||
MAIN_PATH = "procedureCodesMain.json" # your main JSON (with PriceLTEQ21/PriceGT21)
|
||||
OTHER_PATHS = [
|
||||
"procedureCodesOld.json", # one or more other JSON files to compare against the main
|
||||
# "other2.json",
|
||||
]
|
||||
OUT_PATH = "not_in_main.json" # where to write the results
|
||||
# =========================
|
||||
|
||||
|
||||
def _load_json_any(path: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Load JSON. Accept:
|
||||
- a list of objects
|
||||
- a single object (wraps into a list)
|
||||
"""
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, dict):
|
||||
return [data]
|
||||
if isinstance(data, list):
|
||||
# filter out non-dict items defensively
|
||||
return [x for x in data if isinstance(x, dict)]
|
||||
raise ValueError(f"Unsupported JSON top-level type in {path}: {type(data)}")
|
||||
|
||||
|
||||
def _norm_code(record: Dict[str, Any]) -> str:
|
||||
# Normalize the 'Procedure Code' for matching
|
||||
code = str(record.get("Procedure Code", "")).strip().upper()
|
||||
# Some PDFs might have stray spaces, tabs, or zero-width chars
|
||||
code = "".join(ch for ch in code if not ch.isspace())
|
||||
return code
|
||||
|
||||
|
||||
def collect_main_codes(main_path: str) -> set:
|
||||
main_items = _load_json_any(main_path)
|
||||
codes = {_norm_code(rec) for rec in main_items if _norm_code(rec)}
|
||||
return codes
|
||||
|
||||
|
||||
def collect_missing_records(other_paths: List[str], main_codes: set) -> List[Dict[str, Any]]:
|
||||
missing: Dict[str, Dict[str, Any]] = {} # map normalized code -> record
|
||||
for p in other_paths:
|
||||
items = _load_json_any(p)
|
||||
for rec in items:
|
||||
code_norm = _norm_code(rec)
|
||||
if not code_norm:
|
||||
continue
|
||||
if code_norm not in main_codes and code_norm not in missing:
|
||||
# Keep the full original record
|
||||
missing[code_norm] = rec
|
||||
# return in a stable, sorted order by code
|
||||
return [missing[k] for k in sorted(missing.keys())]
|
||||
|
||||
|
||||
def main():
|
||||
# Validate files exist
|
||||
if not Path(MAIN_PATH).exists():
|
||||
raise FileNotFoundError(f"Main file not found: {MAIN_PATH}")
|
||||
for p in OTHER_PATHS:
|
||||
if not Path(p).exists():
|
||||
raise FileNotFoundError(f"Other file not found: {p}")
|
||||
|
||||
main_codes = collect_main_codes(MAIN_PATH)
|
||||
missing_records = collect_missing_records(OTHER_PATHS, main_codes)
|
||||
|
||||
with open(OUT_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(missing_records, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"Main codes: {len(main_codes)}")
|
||||
print(f"Missing from main: {len(missing_records)}")
|
||||
print(f"Wrote results to {OUT_PATH}")
|
||||
# Also echo to stdout
|
||||
print(json.dumps(missing_records, ensure_ascii=False, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
183
apps/ProcedureCodeFromMhPdf/extract_bypage.py
Normal file
183
apps/ProcedureCodeFromMhPdf/extract_bypage.py
Normal file
@@ -0,0 +1,183 @@
|
||||
import re
|
||||
import json
|
||||
from typing import List, Dict
|
||||
import fitz # PyMuPDF
|
||||
|
||||
|
||||
# =========================
|
||||
# CONFIG — EDIT THESE ONLY
|
||||
# =========================
|
||||
PDF_PATH = "MH.pdf" # path to your PDF
|
||||
PAGES = [2] # 0-based page indexes to parse, e.g., [2] for the page you showed
|
||||
OUT_PATH = "output.json" # where to write JSON
|
||||
FIRST_PRICE_IS_LTE21 = True # True => first price line is <=21; False => first price is >21
|
||||
PRINT_PAGE_TEXT = False # set True if you want to print the raw page text for sanity check
|
||||
# =========================
|
||||
|
||||
|
||||
# --- patterns ---
|
||||
code_line_re = re.compile(r"^\s*(D\d{4})\s*$")
|
||||
# a price token is either '$123', '$1,234.50', '123', '123.45', or 'NC'
|
||||
price_line_re = re.compile(r"^\s*(?:\$\s*)?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\d+(?:\.\d{2})?|NC)\s*$", re.IGNORECASE)
|
||||
# lines that definitely start a notes block we should ignore once prices are done
|
||||
note_starters_re = re.compile(r"^(Teeth\b|One of\b|--|—|–|Age limitation:|CR\b)", re.IGNORECASE)
|
||||
|
||||
|
||||
def normalize_ws(s: str) -> str:
|
||||
s = s.replace("\u00a0", " ")
|
||||
s = re.sub(r"[ \t]+", " ", s)
|
||||
s = re.sub(r"\s*\n\s*", " ", s)
|
||||
s = re.sub(r"\s{2,}", " ", s)
|
||||
return s.strip(" ,.;:-•·\n\t")
|
||||
|
||||
|
||||
def clean_money(token: str) -> str:
|
||||
if token.upper() == "NC":
|
||||
return "NC"
|
||||
return token.replace(",", "").lstrip("$").strip()
|
||||
|
||||
|
||||
def get_page_lines(pdf_path: str, pages: List[int]) -> List[str]:
|
||||
doc = fitz.open(pdf_path)
|
||||
try:
|
||||
max_idx = len(doc) - 1
|
||||
for p in pages:
|
||||
if p < 0 or p > max_idx:
|
||||
raise ValueError(f"Invalid page index {p}. Valid range is 0..{max_idx}.")
|
||||
lines: List[str] = []
|
||||
for p in pages:
|
||||
text = doc.load_page(p).get_text("text") or ""
|
||||
if PRINT_PAGE_TEXT:
|
||||
print(f"\n--- RAW PAGE {p} ---\n{text}")
|
||||
# keep line boundaries; later we parse line-by-line
|
||||
lines.extend(text.splitlines())
|
||||
return lines
|
||||
finally:
|
||||
doc.close()
|
||||
|
||||
|
||||
def extract_records(lines: List[str]) -> List[Dict[str, str]]:
|
||||
out: List[Dict[str, str]] = []
|
||||
i = 0
|
||||
n = len(lines)
|
||||
|
||||
while i < n:
|
||||
line = lines[i].strip()
|
||||
|
||||
# seek a code line
|
||||
mcode = code_line_re.match(line)
|
||||
if not mcode:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
code = mcode.group(1)
|
||||
i += 1
|
||||
|
||||
# gather description lines until we encounter price lines
|
||||
desc_lines: List[str] = []
|
||||
# skip blank lines before description
|
||||
while i < n and not lines[i].strip():
|
||||
i += 1
|
||||
|
||||
# collect description lines (usually 1–3) until first price token
|
||||
# stop also if we accidentally hit another code (defensive)
|
||||
j = i
|
||||
while j < n:
|
||||
s = lines[j].strip()
|
||||
if not s:
|
||||
# blank line inside description — consider description ended if the next is a price
|
||||
# but we don't advance here; break and let price parsing handle it
|
||||
break
|
||||
if code_line_re.match(s):
|
||||
# next code — no prices found; abandon this broken record
|
||||
break
|
||||
if price_line_re.match(s):
|
||||
# reached price section
|
||||
break
|
||||
if note_starters_re.match(s):
|
||||
# encountered a note before price — treat as end of description; prices may be missing
|
||||
break
|
||||
desc_lines.append(s)
|
||||
j += 1
|
||||
|
||||
# advance i to where we left off
|
||||
i = j
|
||||
|
||||
description = normalize_ws(" ".join(desc_lines))
|
||||
|
||||
# collect up to two price tokens
|
||||
prices: List[str] = []
|
||||
while i < n and len(prices) < 2:
|
||||
s = lines[i].strip()
|
||||
if not s:
|
||||
i += 1
|
||||
continue
|
||||
if code_line_re.match(s):
|
||||
# new record — stop; this means we never got prices (malformed)
|
||||
break
|
||||
mprice = price_line_re.match(s)
|
||||
if mprice:
|
||||
prices.append(clean_money(mprice.group(1)))
|
||||
i += 1
|
||||
continue
|
||||
# if we encounter a note/flags block, skip forward until the next code/blank
|
||||
if note_starters_re.match(s) or s in {"Y", "NC"}:
|
||||
# skip this block quickly
|
||||
i += 1
|
||||
# keep skipping subsequent non-empty, non-code lines until a blank or next code
|
||||
while i < n:
|
||||
t = lines[i].strip()
|
||||
if not t or code_line_re.match(t):
|
||||
break
|
||||
i += 1
|
||||
# now let the outer loop proceed
|
||||
continue
|
||||
# unrecognized line: if prices already found, we can break; else skip
|
||||
if prices:
|
||||
break
|
||||
i += 1
|
||||
|
||||
if len(prices) < 2:
|
||||
# couldn't find 2 prices reliably; skip this record
|
||||
continue
|
||||
|
||||
if FIRST_PRICE_IS_LTE21:
|
||||
price_lte21, price_gt21 = prices[0], prices[1]
|
||||
else:
|
||||
price_lte21, price_gt21 = prices[1], prices[0]
|
||||
|
||||
out.append(
|
||||
{
|
||||
"Procedure Code": code,
|
||||
"Description": description,
|
||||
"PriceLTEQ21": price_lte21,
|
||||
"PriceGT21": price_gt21,
|
||||
}
|
||||
)
|
||||
|
||||
# after prices, skip forward until next code or blank block end
|
||||
while i < n:
|
||||
s = lines[i].strip()
|
||||
if not s:
|
||||
i += 1
|
||||
break
|
||||
if code_line_re.match(s):
|
||||
# next record will pick this up
|
||||
break
|
||||
i += 1
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def extract_pdf_to_json(pdf_path: str, pages: List[int], out_path: str) -> List[Dict[str, str]]:
|
||||
lines = get_page_lines(pdf_path, pages)
|
||||
data = extract_records(lines)
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
data = extract_pdf_to_json(PDF_PATH, PAGES, OUT_PATH)
|
||||
print(f"Wrote {len(data)} rows to {OUT_PATH}")
|
||||
print(json.dumps(data, ensure_ascii=False, indent=2))
|
||||
208
apps/ProcedureCodeFromMhPdf/extract_byrange.py
Normal file
208
apps/ProcedureCodeFromMhPdf/extract_byrange.py
Normal file
@@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MassHealth dental PDF parser (PyMuPDF / fitz) — PAGE RANGE VERSION
|
||||
|
||||
Parses rows like:
|
||||
|
||||
D2160
|
||||
Amalgam-three surfaces,
|
||||
primary or permanent
|
||||
$110
|
||||
$92
|
||||
Y
|
||||
Y
|
||||
...
|
||||
|
||||
Outputs a single JSON with records from the chosen page range (inclusive).
|
||||
|
||||
Config:
|
||||
- PDF_PATH: path to the PDF
|
||||
- PAGE_START, PAGE_END: 1-based page numbers (inclusive)
|
||||
- FIRST_PRICE_IS_LTE21: True => first price line is <=21; False => first price is >21
|
||||
- OUT_PATH: output JSON path
|
||||
"""
|
||||
|
||||
import re
|
||||
import json
|
||||
from typing import List, Dict
|
||||
import fitz # PyMuPDF
|
||||
|
||||
|
||||
# =========================
|
||||
# CONFIG — EDIT THESE ONLY
|
||||
# =========================
|
||||
PDF_PATH = "MH.pdf" # path to your PDF
|
||||
PAGE_START = 1 # 1-based inclusive start page (e.g., 1)
|
||||
PAGE_END = 12 # 1-based inclusive end page (e.g., 5)
|
||||
OUT_PATH = "output.json" # single JSON file containing all parsed rows
|
||||
FIRST_PRICE_IS_LTE21 = True # True => first price line is <=21; False => first price is >21
|
||||
PRINT_PAGE_TEXT = False # set True to print raw text for each page
|
||||
# =========================
|
||||
|
||||
|
||||
# --- patterns ---
|
||||
code_line_re = re.compile(r"^\s*(D\d{4})\s*$")
|
||||
# a price token is either '$123', '$1,234.50', '123', '123.45', or 'NC'
|
||||
price_line_re = re.compile(r"^\s*(?:\$\s*)?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\d+(?:\.\d{2})?|NC)\s*$", re.IGNORECASE)
|
||||
# lines that definitely start a notes block to ignore once prices are done
|
||||
note_starters_re = re.compile(r"^(Teeth\b|One of\b|--|—|–|Age limitation:|CR\b)", re.IGNORECASE)
|
||||
|
||||
|
||||
def normalize_ws(s: str) -> str:
|
||||
s = s.replace("\u00a0", " ")
|
||||
s = re.sub(r"[ \t]+", " ", s)
|
||||
s = re.sub(r"\s*\n\s*", " ", s)
|
||||
s = re.sub(r"\s{2,}", " ", s)
|
||||
return s.strip(" ,.;:-•·\n\t")
|
||||
|
||||
|
||||
def clean_money(token: str) -> str:
|
||||
if token.upper() == "NC":
|
||||
return "NC"
|
||||
return token.replace(",", "").lstrip("$").strip()
|
||||
|
||||
|
||||
def get_page_lines(pdf_path: str, page_start_1b: int, page_end_1b: int) -> List[str]:
|
||||
if page_start_1b <= 0 or page_end_1b <= 0:
|
||||
raise ValueError("PAGE_START and PAGE_END must be >= 1 (1-based).")
|
||||
if page_start_1b > page_end_1b:
|
||||
raise ValueError("PAGE_START cannot be greater than PAGE_END.")
|
||||
|
||||
doc = fitz.open(pdf_path)
|
||||
try:
|
||||
last_idx_0b = len(doc) - 1
|
||||
# convert to 0-based inclusive range
|
||||
start_0b = page_start_1b - 1
|
||||
end_0b = page_end_1b - 1
|
||||
if start_0b < 0 or end_0b > last_idx_0b:
|
||||
raise ValueError(f"Page range out of bounds. Valid 1-based range is 1..{last_idx_0b + 1}.")
|
||||
lines: List[str] = []
|
||||
for p in range(start_0b, end_0b + 1):
|
||||
text = doc.load_page(p).get_text("text") or ""
|
||||
if PRINT_PAGE_TEXT:
|
||||
print(f"\n--- RAW PAGE {p} (0-based; shown as {p+1} 1-based) ---\n{text}")
|
||||
lines.extend(text.splitlines())
|
||||
return lines
|
||||
finally:
|
||||
doc.close()
|
||||
|
||||
|
||||
def extract_records(lines: List[str]) -> List[Dict[str, str]]:
|
||||
out: List[Dict[str, str]] = []
|
||||
i = 0
|
||||
n = len(lines)
|
||||
|
||||
while i < n:
|
||||
line = lines[i].strip()
|
||||
|
||||
# seek a code line
|
||||
mcode = code_line_re.match(line)
|
||||
if not mcode:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
code = mcode.group(1)
|
||||
i += 1
|
||||
|
||||
# gather description lines until we encounter price lines
|
||||
desc_lines: List[str] = []
|
||||
# skip blank lines before description
|
||||
while i < n and not lines[i].strip():
|
||||
i += 1
|
||||
|
||||
# collect description lines (usually 1–3) until first price token
|
||||
# stop also if we accidentally hit another code (defensive)
|
||||
j = i
|
||||
while j < n:
|
||||
s = lines[j].strip()
|
||||
if not s:
|
||||
break
|
||||
if code_line_re.match(s):
|
||||
# next code — description ended abruptly (malformed)
|
||||
break
|
||||
if price_line_re.match(s):
|
||||
# reached price section
|
||||
break
|
||||
if note_starters_re.match(s):
|
||||
# encountered a note before price — treat as end of description; prices may be missing
|
||||
break
|
||||
desc_lines.append(s)
|
||||
j += 1
|
||||
|
||||
# advance i to where we left off
|
||||
i = j
|
||||
|
||||
description = normalize_ws(" ".join(desc_lines))
|
||||
|
||||
# collect up to two price tokens
|
||||
prices: List[str] = []
|
||||
while i < n and len(prices) < 2:
|
||||
s = lines[i].strip()
|
||||
if not s:
|
||||
i += 1
|
||||
continue
|
||||
if code_line_re.match(s):
|
||||
# new record — stop; this means we never got prices (malformed)
|
||||
break
|
||||
mprice = price_line_re.match(s)
|
||||
if mprice:
|
||||
prices.append(clean_money(mprice.group(1)))
|
||||
i += 1
|
||||
continue
|
||||
# if we encounter a note/flags block, skip forward until a blank or next code
|
||||
if note_starters_re.match(s) or s in {"Y", "NC"}:
|
||||
i += 1
|
||||
while i < n:
|
||||
t = lines[i].strip()
|
||||
if not t or code_line_re.match(t):
|
||||
break
|
||||
i += 1
|
||||
continue
|
||||
# unrecognized line: if we already captured some prices, break; else skip
|
||||
if prices:
|
||||
break
|
||||
i += 1
|
||||
|
||||
if len(prices) < 2:
|
||||
# couldn't find 2 prices reliably; skip this record
|
||||
continue
|
||||
|
||||
if FIRST_PRICE_IS_LTE21:
|
||||
price_lte21, price_gt21 = prices[0], prices[1]
|
||||
else:
|
||||
price_lte21, price_gt21 = prices[1], prices[0]
|
||||
|
||||
out.append(
|
||||
{
|
||||
"Procedure Code": code,
|
||||
"Description": description,
|
||||
"PriceLTEQ21": price_lte21,
|
||||
"PriceGT21": price_gt21,
|
||||
}
|
||||
)
|
||||
|
||||
# after prices, skip forward until next code or blank block end
|
||||
while i < n:
|
||||
s = lines[i].strip()
|
||||
if not s:
|
||||
i += 1
|
||||
break
|
||||
if code_line_re.match(s):
|
||||
break
|
||||
i += 1
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def extract_pdf_range_to_json(pdf_path: str, page_start_1b: int, page_end_1b: int, out_path: str) -> List[Dict[str, str]]:
|
||||
lines = get_page_lines(pdf_path, page_start_1b, page_end_1b)
|
||||
data = extract_records(lines)
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
data = extract_pdf_range_to_json(PDF_PATH, PAGE_START, PAGE_END, OUT_PATH)
|
||||
print(f"Wrote {len(data)} rows to {OUT_PATH}")
|
||||
print(json.dumps(data, ensure_ascii=False, indent=2))
|
||||
192
apps/ProcedureCodeFromMhPdf/not_in_main.json
Normal file
192
apps/ProcedureCodeFromMhPdf/not_in_main.json
Normal file
@@ -0,0 +1,192 @@
|
||||
[
|
||||
{
|
||||
"Procedure Code": "D0120",
|
||||
"Description": "perio exam",
|
||||
"Price": "105"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0140",
|
||||
"Description": "limited exam",
|
||||
"Price": "90"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0150",
|
||||
"Description": "comprehensive exam",
|
||||
"Price": "120"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0210",
|
||||
"Description": "Fmx.",
|
||||
"Price": "120"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0220",
|
||||
"Description": "first PA.",
|
||||
"Price": "60"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0230",
|
||||
"Description": "2nd PA.",
|
||||
"Price": "50"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0272",
|
||||
"Description": "2 BW",
|
||||
"Price": "80"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0274",
|
||||
"Description": "4BW",
|
||||
"Price": "160"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0330",
|
||||
"Description": "pano",
|
||||
"Price": "150"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0364",
|
||||
"Description": "Less than one jaw",
|
||||
"Price": "350"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0365",
|
||||
"Description": "Mand",
|
||||
"Price": "350"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0366",
|
||||
"Description": "Max",
|
||||
"Price": "350"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0367",
|
||||
"Description": "",
|
||||
"Price": "400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0368",
|
||||
"Description": "include TMJ",
|
||||
"Price": "375"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0380",
|
||||
"Description": "Less than one jaw",
|
||||
"Price": "300"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0381",
|
||||
"Description": "Mand",
|
||||
"Price": "300"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0382",
|
||||
"Description": "Max",
|
||||
"Price": "300"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0383",
|
||||
"Description": "",
|
||||
"Price": "350"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D1110",
|
||||
"Description": "adult prophy",
|
||||
"Price": "150"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D1120",
|
||||
"Description": "child prophy",
|
||||
"Price": "120"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D1208",
|
||||
"Description": "FL",
|
||||
"Price": "90"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D1351",
|
||||
"Description": "sealant",
|
||||
"Price": "80"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D1999",
|
||||
"Description": "",
|
||||
"Price": "50"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2140",
|
||||
"Description": "amalgam, one surface",
|
||||
"Price": "150"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2150",
|
||||
"Description": "amalgam, two surface",
|
||||
"Price": "200"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2955",
|
||||
"Description": "post renoval",
|
||||
"Price": "350"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D4910",
|
||||
"Description": "perio maintains",
|
||||
"Price": "250"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D5510",
|
||||
"Description": "Repair broken complete denture base (QUAD)",
|
||||
"Price": "400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6056",
|
||||
"Description": "pre fab abut",
|
||||
"Price": "750"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6057",
|
||||
"Description": "custom abut",
|
||||
"Price": "800"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6058",
|
||||
"Description": "porcelain, implant crown, ceramic crown",
|
||||
"Price": "1400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6059",
|
||||
"Description": "",
|
||||
"Price": "1400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6100",
|
||||
"Description": "",
|
||||
"Price": "320"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6110",
|
||||
"Description": "implant",
|
||||
"Price": "1600"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6242",
|
||||
"Description": "noble metal. For united",
|
||||
"Price": "1400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6245",
|
||||
"Description": "porcelain, not for united",
|
||||
"Price": "1400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D7910",
|
||||
"Description": "suture, small wound up to 5 mm",
|
||||
"Price": "400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D7950",
|
||||
"Description": "max",
|
||||
"Price": "800"
|
||||
}
|
||||
]
|
||||
1026
apps/ProcedureCodeFromMhPdf/procedureCodes.json
Normal file
1026
apps/ProcedureCodeFromMhPdf/procedureCodes.json
Normal file
File diff suppressed because it is too large
Load Diff
344
apps/ProcedureCodeFromMhPdf/procedureCodesOld.json
Normal file
344
apps/ProcedureCodeFromMhPdf/procedureCodesOld.json
Normal file
@@ -0,0 +1,344 @@
|
||||
[
|
||||
{
|
||||
"Procedure Code": "D1999",
|
||||
"Description": "",
|
||||
"Price": "50"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0120",
|
||||
"Description": "perio exam",
|
||||
"Price": "105"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0140",
|
||||
"Description": "limited exam",
|
||||
"Price": "90"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0150",
|
||||
"Description": "comprehensive exam",
|
||||
"Price": "120"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0210",
|
||||
"Description": "Fmx.",
|
||||
"Price": "120"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0220",
|
||||
"Description": "first PA.",
|
||||
"Price": "60"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0230",
|
||||
"Description": "2nd PA.",
|
||||
"Price": "50"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0330",
|
||||
"Description": "pano",
|
||||
"Price": "150"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0272",
|
||||
"Description": "2 BW",
|
||||
"Price": "80"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0274",
|
||||
"Description": "4BW",
|
||||
"Price": "160"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D1110",
|
||||
"Description": "adult prophy",
|
||||
"Price": "150"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D1120",
|
||||
"Description": "child prophy",
|
||||
"Price": "120"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D1351",
|
||||
"Description": "sealant",
|
||||
"Price": "80"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D4341",
|
||||
"Description": "srp",
|
||||
"Price": "250"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D4910",
|
||||
"Description": "perio maintains",
|
||||
"Price": "250"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D1208",
|
||||
"Description": "FL",
|
||||
"Price": "90"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2330",
|
||||
"Description": "front composite. 1 s.",
|
||||
"Price": "180"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2331",
|
||||
"Description": "2s",
|
||||
"Price": "220"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2332",
|
||||
"Description": "3s",
|
||||
"Price": "280"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2335",
|
||||
"Description": "4s or more",
|
||||
"Price": "350"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2391",
|
||||
"Description": "back. 1s",
|
||||
"Price": "200"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2392",
|
||||
"Description": "2s",
|
||||
"Price": "250"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2393",
|
||||
"Description": "3s",
|
||||
"Price": "280"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2394",
|
||||
"Description": "4s",
|
||||
"Price": "320"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2140",
|
||||
"Description": "amalgam, one surface",
|
||||
"Price": "150"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2150",
|
||||
"Description": "amalgam, two surface",
|
||||
"Price": "200"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2750",
|
||||
"Description": "high noble",
|
||||
"Price": "1300"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2751",
|
||||
"Description": "base metal",
|
||||
"Price": "1200"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2740",
|
||||
"Description": "crown porcelain",
|
||||
"Price": "1300"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2954",
|
||||
"Description": "p/c",
|
||||
"Price": "450"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D7910",
|
||||
"Description": "suture, small wound up to 5 mm",
|
||||
"Price": "400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D5110",
|
||||
"Description": "FU",
|
||||
"Price": "1200",
|
||||
"Full Price": "1700"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D5120",
|
||||
"Description": "FL",
|
||||
"Price": "1700",
|
||||
"Full Price": "1700"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D5211",
|
||||
"Description": "pu",
|
||||
"Price": "1300"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D5212",
|
||||
"Description": "pl",
|
||||
"Price": "1300"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D5213",
|
||||
"Description": "cast pu.",
|
||||
"Price": "1700"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D5214",
|
||||
"Description": "cast pl",
|
||||
"Price": "1700"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D5510",
|
||||
"Description": "Repair broken complete denture base (QUAD)",
|
||||
"Price": "400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D5520",
|
||||
"Description": "Replace missing or broken teeth - complete denture (each tooth) (TOOTH)",
|
||||
"Price": "200"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D5750",
|
||||
"Description": "lab reline",
|
||||
"Price": "600"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D5730",
|
||||
"Description": "chairside reline",
|
||||
"Price": "500"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2920",
|
||||
"Description": "re cement crown",
|
||||
"Price": "120"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2950",
|
||||
"Description": "core buildup",
|
||||
"Price": "350"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D2955",
|
||||
"Description": "post renoval",
|
||||
"Price": "350"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6100",
|
||||
"Description": "",
|
||||
"Price": "320"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6110",
|
||||
"Description": "implant",
|
||||
"Price": "1600"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6056",
|
||||
"Description": "pre fab abut",
|
||||
"Price": "750"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6057",
|
||||
"Description": "custom abut",
|
||||
"Price": "800"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6058",
|
||||
"Description": "porcelain, implant crown, ceramic crown",
|
||||
"Price": "1400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6059",
|
||||
"Description": "",
|
||||
"Price": "1400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6242",
|
||||
"Description": "noble metal. For united",
|
||||
"Price": "1400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D6245",
|
||||
"Description": "porcelain, not for united",
|
||||
"Price": "1400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0367",
|
||||
"Description": "",
|
||||
"Price": "400"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0364",
|
||||
"Description": "Less than one jaw",
|
||||
"Price": "350"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0365",
|
||||
"Description": "Mand",
|
||||
"Price": "350"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0366",
|
||||
"Description": "Max",
|
||||
"Price": "350"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0368",
|
||||
"Description": "include TMJ",
|
||||
"Price": "375"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0383",
|
||||
"Description": "",
|
||||
"Price": "350"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0380",
|
||||
"Description": "Less than one jaw",
|
||||
"Price": "300"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0381",
|
||||
"Description": "Mand",
|
||||
"Price": "300"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D0382",
|
||||
"Description": "Max",
|
||||
"Price": "300"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D7950",
|
||||
"Description": "max",
|
||||
"Price": "800"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D7140",
|
||||
"Description": "simple ext",
|
||||
"Price": "150"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D7210",
|
||||
"Description": "surgical ext",
|
||||
"Price": "280"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D7220",
|
||||
"Description": "soft impacted",
|
||||
"Price": "380"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D7230",
|
||||
"Description": "partial bony",
|
||||
"Price": "450"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D7240",
|
||||
"Description": "fully bony",
|
||||
"Price": "550"
|
||||
},
|
||||
{
|
||||
"Procedure Code": "D3320",
|
||||
"Description": "pre M RCT",
|
||||
"Price": "1050"
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user