diff --git a/apps/PaymentOCRService/.env.example b/apps/PaymentOCRService/.env.example new file mode 100644 index 0000000..e69de29 diff --git a/apps/PaymentOCRService/README.md b/apps/PaymentOCRService/README.md new file mode 100644 index 0000000..a2a397d --- /dev/null +++ b/apps/PaymentOCRService/README.md @@ -0,0 +1,13 @@ +# Medical Billing OCR API (FastAPI) + +## 1) Prereqs +- Google Cloud Vision service-account JSON. +- `GOOGLE_APPLICATION_CREDENTIALS` env var pointing to that JSON. +- Tesseract installed (for fallback OCR), and on PATH. + +## 2) Install & run (local) +```bash +python -m venv .venv && source .venv/bin/activate +pip install -r requirements.txt +export GOOGLE_APPLICATION_CREDENTIALS=/absolute/path/to/service-account.json +uvicorn app.main:app --reload --port 8080 diff --git a/apps/PaymentOCRService/app/init.py b/apps/PaymentOCRService/app/init.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/PaymentOCRService/app/main.py b/apps/PaymentOCRService/app/main.py new file mode 100644 index 0000000..044937d --- /dev/null +++ b/apps/PaymentOCRService/app/main.py @@ -0,0 +1,81 @@ +from fastapi import FastAPI, UploadFile, File, HTTPException +from fastapi.responses import StreamingResponse, JSONResponse, PlainTextResponse +from typing import List, Optional +import io +import os + +from app.pipeline_adapter import ( + process_images_to_rows, + rows_to_csv_bytes, +) + +app = FastAPI( + title="Medical Billing OCR API", + description="FastAPI wrapper around the complete OCR pipeline (Google Vision + deskew + line clustering + extraction).", + version="1.0.0", +) + +ALLOWED_EXTS = {".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp"} + +@app.get("/health", response_class=PlainTextResponse) +def health(): + # Simple sanity check (also ensures GCP creds var visibility) + creds = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "") + return f"OK | GOOGLE_APPLICATION_CREDENTIALS set: {bool(creds)}" + +@app.post("/extract/json") +async def extract_json(files: List[UploadFile] = File(...)): + if not files: + raise HTTPException(status_code=400, detail="No files provided.") + + # Validate extensions early (not bulletproof, but helpful) + bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS] + if bad: + raise HTTPException( + status_code=415, + detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}" + ) + + # Read blobs in-memory + blobs = [] + filenames = [] + for f in files: + blobs.append(await f.read()) + filenames.append(f.filename or "upload.bin") + + try: + rows = process_images_to_rows(blobs, filenames) + # rows is a list[dict] where each dict contains the columns you already emit (Patient Name, etc.) + return JSONResponse(content={"rows": rows}) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Processing error: {e}") + +@app.post("/extract/csv") +async def extract_csv(files: List[UploadFile] = File(...), filename: Optional[str] = None): + if not files: + raise HTTPException(status_code=400, detail="No files provided.") + + bad = [f.filename for f in files if os.path.splitext(f.filename or "")[1].lower() not in ALLOWED_EXTS] + if bad: + raise HTTPException( + status_code=415, + detail=f"Unsupported file types: {', '.join(bad)}. Allowed: {', '.join(sorted(ALLOWED_EXTS))}" + ) + + blobs = [] + filenames = [] + for f in files: + blobs.append(await f.read()) + filenames.append(f.filename or "upload.bin") + + try: + rows = process_images_to_rows(blobs, filenames) + csv_bytes = rows_to_csv_bytes(rows) + out_name = filename or "medical_billing_extract.csv" + return StreamingResponse( + io.BytesIO(csv_bytes), + media_type="text/csv", + headers={"Content-Disposition": f'attachment; filename="{out_name}"'} + ) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Processing error: {e}") diff --git a/apps/PaymentOCRService/app/pipeline-adaptor.py b/apps/PaymentOCRService/app/pipeline-adaptor.py new file mode 100644 index 0000000..c52fb04 --- /dev/null +++ b/apps/PaymentOCRService/app/pipeline-adaptor.py @@ -0,0 +1,77 @@ +import os +import tempfile +from typing import List, Dict +import pandas as pd + +# Import your existing functions directly from complete_pipeline.py +from complete_pipeline import ( + smart_deskew_with_lines, + extract_all_clients_from_lines, +) + +def _process_single_image_bytes(blob: bytes, display_name: str) -> List[Dict]: + """ + Saves bytes to a temp file (so OpenCV + Google Vision can read it), + runs your existing pipeline functions, and returns extracted rows. + """ + suffix = os.path.splitext(display_name)[1] or ".jpg" + tmp_path = None + try: + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: + tmp.write(blob) + tmp_path = tmp.name + + # Uses Google Vision + deskew + post-line grouping + info = smart_deskew_with_lines(tmp_path, None, clamp_deg=30.0, use_vision=True) + post_lines = info.get("post_lines", []) if info else [] + rows = extract_all_clients_from_lines(post_lines) if post_lines else [] + + # Add source file information (same as your Streamlit app) + for r in rows: + r["Source File"] = display_name + + # If nothing parsed, still return a placeholder row to indicate failure (optional) + if not rows: + rows.append({ + 'Patient Name': "", 'Patient ID': "", 'ICN': "", 'CDT Code': "", + 'Tooth': "", 'Date SVC': "", + 'Billed Amount': "", 'Allowed Amount': "", 'Paid Amount': "", + 'Extraction Success': False, 'Source File': display_name, + }) + + return rows + + finally: + if tmp_path: + try: + os.unlink(tmp_path) + except Exception: + pass + +def process_images_to_rows(blobs: List[bytes], filenames: List[str]) -> List[Dict]: + """ + Public API used by FastAPI routes. + blobs: list of image bytes + filenames: matching names for display / Source File column + """ + all_rows: List[Dict] = [] + for blob, name in zip(blobs, filenames): + rows = _process_single_image_bytes(blob, name) + all_rows.extend(rows) + + return all_rows + +def rows_to_csv_bytes(rows: List[Dict]) -> bytes: + """ + Convert pipeline rows to CSV bytes (for frontend to consume as a table). + """ + df = pd.DataFrame(rows) + # Keep a stable column order if present (mirrors your Excel order) + desired = [ + 'Patient Name', 'Patient ID', 'ICN', 'CDT Code', 'Tooth', 'Date SVC', + 'Billed Amount', 'Allowed Amount', 'Paid Amount', + 'Extraction Success', 'Source File' + ] + cols = [c for c in desired if c in df.columns] + [c for c in df.columns if c not in desired] + df = df[cols] + return df.to_csv(index=False).encode("utf-8") diff --git a/apps/PaymentOCRService/complete-pipeline.py b/apps/PaymentOCRService/complete-pipeline.py new file mode 100644 index 0000000..d713127 --- /dev/null +++ b/apps/PaymentOCRService/complete-pipeline.py @@ -0,0 +1,837 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +End-to-end local pipeline (single script) + +- One Google Vision pass per image (DOCUMENT_TEXT_DETECTION) +- Smart deskew (Hough + OCR pairs) with fine grid search (in-memory) +- Build slope-aware (pre) and horizontal (post) line dumps (in-memory) +- Extract all clients & PD rows per page (robust to headers/EOBS) +- Export nicely formatted Excel via ExcelGenerator + +Usage: + python ocr_pipeline.py --input "C:\\imgs" --out "results.xlsx" + python ocr_pipeline.py --files s1.jpg s2.jpg --out results.xlsx + python ocr_pipeline.py --input "C:\\imgs" --out results.xlsx --deskewed-only +""" + +import os +import re +import io +import cv2 +import math +import glob +import argparse +import numpy as np +import pandas as pd +from typing import List, Dict, Tuple, Any, Optional +from datetime import datetime + +# ========= Debug switch ========= +# Set to True to re-enable saving deskewed images, writing *_lines_*.txt, +# and printing progress messages. +DEBUG = False + +# ---------- Google Vision ---------- +from google.cloud import vision + +# ---------- openpyxl helpers ---------- +from openpyxl.utils import get_column_letter +from openpyxl.cell.cell import MergedCell +from openpyxl import Workbook +from openpyxl.styles import Font, PatternFill, Border, Side, Alignment +from openpyxl.utils.dataframe import dataframe_to_rows + +# ============================================================ +# Config (tuning) +# ============================================================ +PERP_TOL_FACTOR = 0.6 +SEED_BAND_H = 3.0 +ALLOW_SINGLETON = True + +POST_Y_TOL_FACTOR = 0.55 + +# ============================================================ +# Vision OCR (ONE pass per image) +# ============================================================ +def _open_bytes(path: str) -> bytes: + with open(path, "rb") as f: + return f.read() + +def extract_words_and_text(image_path: str) -> Tuple[List[Dict], str]: + client = vision.ImageAnnotatorClient() + resp = client.document_text_detection(image=vision.Image(content=_open_bytes(image_path))) + if resp.error.message: + raise RuntimeError(resp.error.message) + + full_text = resp.full_text_annotation.text or "" + + words: List[Dict] = [] + for page in resp.full_text_annotation.pages: + for block in page.blocks: + for para in block.paragraphs: + for word in para.words: + text = "".join(s.text for s in word.symbols) + vs = word.bounding_box.vertices + xs = [v.x for v in vs]; ys = [v.y for v in vs] + left, top = min(xs), min(ys) + w, h = max(xs) - left, max(ys) - top + cx, cy = left + w/2.0, top + h/2.0 + words.append({"text": text, "left": left, "top": top, + "w": w, "h": h, "cx": cx, "cy": cy}) + return words, full_text + +# ============================================================ +# Skew estimation (Hough + OCR pairs) +# ============================================================ +def weighted_median(pairs: List[Tuple[float, float]]) -> float: + if not pairs: return 0.0 + arr = sorted(pairs, key=lambda t: t[0]) + tot = sum(w for _, w in arr) + acc = 0.0 + for v, w in arr: + acc += w + if acc >= tot/2.0: + return v + return arr[-1][0] + +def estimate_skew_pairs(words: List[Dict], + y_band_mult: float = 2.0, + min_dx_mult: float = 0.8, + max_abs_deg: float = 15.0) -> Tuple[float,int]: + if not words: return 0.0, 0 + widths = [w["w"] for w in words if w["w"]>0] + heights = [w["h"] for w in words if w["h"]>0] + w_med = float(np.median(widths) if widths else 10.0) + h_med = float(np.median(heights) if heights else 16.0) + y_band = y_band_mult * h_med + min_dx = max(4.0, min_dx_mult * w_med) + + words_sorted = sorted(words, key=lambda w: (w["cy"], w["cx"])) + pairs: List[Tuple[float,float]] = [] + for i, wi in enumerate(words_sorted): + best_j = None; best_dx = None + for j in range(i+1, len(words_sorted)): + wj = words_sorted[j] + dy = wj["cy"] - wi["cy"] + if dy > y_band: break + if abs(dy) <= y_band: + dx = wj["cx"] - wi["cx"] + if dx <= 0 or dx < min_dx: continue + if best_dx is None or dx < best_dx: + best_dx, best_j = dx, j + if best_j is None: continue + wj = words_sorted[best_j] + dx = wj["cx"] - wi["cx"]; dy = wj["cy"] - wi["cy"] + ang = math.degrees(math.atan2(dy, dx)) + if abs(ang) <= max_abs_deg: + pairs.append((ang, max(1.0, dx))) + + if not pairs: return 0.0, 0 + vals = np.array([v for v,_ in pairs], dtype=float) + q1, q3 = np.percentile(vals, [25,75]); iqr = q3-q1 + lo, hi = q1 - 1.5*iqr, q3 + 1.5*iqr + trimmed = [(v,w) for v,w in pairs if lo <= v <= hi] or pairs + return float(weighted_median(trimmed)), len(trimmed) + +def estimate_skew_hough(img: np.ndarray, thr: int = 180) -> Tuple[float,int]: + g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + g = cv2.GaussianBlur(g, (3,3), 0) + edges = cv2.Canny(g, 60, 160, apertureSize=3) + lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=thr) + if lines is None: return 0.0, 0 + angs = [] + for (rho, theta) in lines[:,0,:]: + ang = (theta - np.pi/2.0) * 180.0/np.pi + while ang > 45: ang -= 90 + while ang < -45: ang += 90 + angs.append(ang) + angs = np.array(angs, dtype=float) + med = float(np.median(angs)) + keep = angs[np.abs(angs - med) <= 10.0] + return (float(np.median(keep)) if keep.size else med), int(angs.size) + +# ============================================================ +# Rotation (image + coordinates) and scoring +# ============================================================ +def rotation_matrix_keep_bounds(shape_hw: Tuple[int,int], angle_deg: float) -> Tuple[np.ndarray, Tuple[int,int]]: + h, w = shape_hw + center = (w/2.0, h/2.0) + M = cv2.getRotationMatrix2D(center, angle_deg, 1.0) + cos, sin = abs(M[0,0]), abs(M[0,1]) + new_w = int(h*sin + w*cos) + new_h = int(h*cos + w*sin) + M[0,2] += (new_w/2) - center[0] + M[1,2] += (new_h/2) - center[1] + return M, (new_h, new_w) + +def rotate_image_keep_bounds(img: np.ndarray, angle_deg: float, border_value=255) -> np.ndarray: + M, (nh, nw) = rotation_matrix_keep_bounds(img.shape[:2], angle_deg) + return cv2.warpAffine(img, M, (nw, nh), + flags=cv2.INTER_LINEAR, + borderMode=cv2.BORDER_CONSTANT, + borderValue=border_value) + +def transform_words(words: List[Dict], shape_hw: Tuple[int,int], angle_deg: float) -> List[Dict]: + M, _ = rotation_matrix_keep_bounds(shape_hw, angle_deg) + out = [] + for w in words: + x, y = (M @ np.array([w["cx"], w["cy"], 1.0])).tolist() + ww = dict(w) + ww["cx_rot"], ww["cy_rot"] = float(x), float(y) + out.append(ww) + return out + +def preview_score(img: np.ndarray, deskew_angle: float) -> float: + h, w = img.shape[:2] + scale = 1200.0 / max(h, w) + small = cv2.resize(img, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA) if scale < 1 else img + rot = rotate_image_keep_bounds(small, deskew_angle, border_value=255) + resid, n = estimate_skew_hough(rot, thr=140) + return abs(resid) if n > 0 else 90.0 + +# ============================================================ +# Slope-based clustering (pre-rotation) +# ============================================================ +def line_from_points(p0, p1): + (x0,y0),(x1,y1)=p0,p1 + dx = x1-x0 + if abs(dx) < 1e-9: return float("inf"), x0 + m = (y1-y0)/dx; b = y0 - m*x0 + return m,b + +def perp_distance(m,b,x,y): + if math.isinf(m): return abs(x-b) + return abs(m*x - y + b) / math.sqrt(m*m + 1.0) + +def refit_line(points: List[Tuple[float,float]]) -> Tuple[float,float]: + if len(points) == 1: + x,y = points[0]; return 0.0, y + xs=[p[0] for p in points]; ys=[p[1] for p in points] + xm = sum(xs)/len(xs); ym = sum(ys)/len(ys) + num = sum((x-xm)*(y-ym) for x,y in zip(xs,ys)) + den = sum((x-xm)**2 for x in xs) + if abs(den) < 1e-12: return float("inf"), xm + m = num/den; b = ym - m*xm + return m,b + +def project_t(m,b,x0,y0,x,y): + if math.isinf(m): return y - y0 + denom = math.sqrt(1+m*m) + return ((x-x0) + m*(y-y0))/denom + +def _build_line_result(words, idxs, m, b, rotated=False): + origin_idx = min(idxs, key=lambda i: (words[i]["cx_rot"] if rotated else words[i]["cx"])) + x0 = words[origin_idx]["cx_rot"] if rotated else words[origin_idx]["cx"] + y0 = words[origin_idx]["cy_rot"] if rotated else words[origin_idx]["cy"] + + ordered = sorted( + idxs, + key=lambda i: project_t( + m, b, x0, y0, + words[i]["cx_rot"] if rotated else words[i]["cx"], + words[i]["cy_rot"] if rotated else words[i]["cy"] + ) + ) + line_words = [words[i] for i in ordered] + text = " ".join(w["text"] for w in line_words) + + xs = [(w["cx_rot"] if rotated else w["cx"]) for w in line_words] + ys = [(w["cy_rot"] if rotated else w["cy"]) for w in line_words] + return { + "text": text, + "words": line_words, + "slope": m, + "center_x": float(sum(xs)/len(xs)), + "center_y": float(sum(ys)/len(ys)), + "count": len(line_words), + } + +def cluster_tilted_lines(words: List[Dict]) -> List[Dict]: + if not words: return [] + hs = sorted([w["h"] for w in words if w["h"]>0]) + h_med = hs[len(hs)//2] if hs else 16.0 + perp_tol = PERP_TOL_FACTOR * h_med + band_dy = SEED_BAND_H * h_med + + remaining = set(range(len(words))) + order = sorted(remaining, key=lambda i: (words[i]["cy"], words[i]["cx"])) + lines = [] + + while remaining: + seed_idx = next(i for i in order if i in remaining) + remaining.remove(seed_idx) + sx, sy = words[seed_idx]["cx"], words[seed_idx]["cy"] + + cand_idxs = [j for j in remaining if abs(words[j]["cy"] - sy) <= band_dy] + if not cand_idxs: + if ALLOW_SINGLETON: + m,b = refit_line([(sx,sy)]) + lines.append(_build_line_result(words, {seed_idx}, m, b)) + continue + + cand_idxs.sort(key=lambda j: abs(words[j]["cx"] - sx)) + best_inliers = None; best_mb = None + for j in cand_idxs[:min(10, len(cand_idxs))]: + m,b = line_from_points((sx,sy), (words[j]["cx"], words[j]["cy"])) + inliers = {seed_idx, j} + for k in remaining: + xk, yk = words[k]["cx"], words[k]["cy"] + if perp_distance(m,b,xk,yk) <= perp_tol: + inliers.add(k) + if best_inliers is None or len(inliers) > len(best_inliers): + best_inliers, best_mb = inliers, (m,b) + + m,b = best_mb + pts = [(words[i]["cx"], words[i]["cy"]) for i in best_inliers] + m,b = refit_line(pts) + + expanded = set(best_inliers) + for idx in list(remaining): + xk, yk = words[idx]["cx"], words[idx]["cy"] + if perp_distance(m,b,xk,yk) <= perp_tol: + expanded.add(idx) + + for idx in expanded: + if idx in remaining: + remaining.remove(idx) + lines.append(_build_line_result(words, expanded, m, b)) + + lines.sort(key=lambda L: L["center_y"]) + return lines + +# ============================================================ +# Post-rotation grouping (simple horizontal lines) +# ============================================================ +def group_horizontal_lines(rotated_words: List[Dict]) -> List[Dict]: + if not rotated_words: return [] + hs = sorted([w["h"] for w in rotated_words if w["h"]>0]) + h_med = hs[len(hs)//2] if hs else 16.0 + y_tol = POST_Y_TOL_FACTOR * h_med + + idxs = list(range(len(rotated_words))) + idxs.sort(key=lambda i: (rotated_words[i]["cy_rot"], rotated_words[i]["cx_rot"])) + lines = [] + cur = [] + + def flush(): + nonlocal cur + if not cur: return + xs = [rotated_words[i]["cx_rot"] for i in cur] + ys = [rotated_words[i]["cy_rot"] for i in cur] + m,b = refit_line(list(zip(xs,ys))) + cur_sorted = sorted(cur, key=lambda i: rotated_words[i]["cx_rot"]) + lines.append(_build_line_result(rotated_words, set(cur_sorted), m, b, rotated=True)) + cur = [] + + for i in idxs: + if not cur: + cur = [i] + else: + y0 = rotated_words[cur[0]]["cy_rot"] + yi = rotated_words[i]["cy_rot"] + if abs(yi - y0) <= y_tol: + cur.append(i) + else: + flush() + cur = [i] + flush() + lines.sort(key=lambda L: L["center_y"]) + return lines + +# ============================================================ +# Utilities: dump lines to txt (only if DEBUG) +# ============================================================ +def slope_to_deg(m: float) -> float: + if math.isinf(m): return 90.0 + return math.degrees(math.atan(m)) + +def write_lines_txt(base_path: str, suffix: str, lines: List[Dict]) -> Optional[str]: + if not DEBUG: + return None + txt_path = f"{os.path.splitext(base_path)[0]}_{suffix}.txt" + with open(txt_path, "w", encoding="utf-8") as f: + f.write(f"# {os.path.basename(base_path)} ({suffix})\n") + for i, L in enumerate(lines, 1): + ang = slope_to_deg(L["slope"]) + f.write(f"[{i:03d}] words={L['count']:>3} slope={ang:+.3f}°\n") + f.write(L["text"] + "\n\n") + return txt_path + +# ============================================================ +# Smart deskew + full pipeline (in-memory; returns words + full_text) +# ============================================================ +def smart_deskew_with_lines(image_path: str, + out_path: Optional[str] = None, + clamp_deg: float = 30.0, + use_vision: bool = True) -> Dict: + img = cv2.imread(image_path, cv2.IMREAD_COLOR) + if img is None: raise FileNotFoundError(image_path) + + words, full_text = ([], "") + if use_vision: + words, full_text = extract_words_and_text(image_path) + + a_h, n_h = estimate_skew_hough(img) + a_p, n_p = (0.0, 0) + if words: + a_p, n_p = estimate_skew_pairs(words, y_band_mult=2.0, min_dx_mult=0.8, max_abs_deg=15.0) + + candidates = [] + if n_h >= 10: candidates += [a_h, -a_h] + if n_p >= 10: candidates += [a_p, -a_p] + if not candidates: candidates = [0.0] + + cand = [] + for a in candidates: + a = float(max(-clamp_deg, min(clamp_deg, a))) + if all(abs(a - b) > 0.05 for b in cand): + cand.append(a) + + grid = [] + for a in cand: + for d in (-0.6, -0.4, -0.2, 0.0, 0.2, 0.4, 0.6): + g = a + d + if all(abs(g - x) > 0.05 for x in grid): + grid.append(g) + + scored = [(a, preview_score(img, -a)) for a in grid] + best_angle, best_cost = min(scored, key=lambda t: t[1]) + + # Debug print kept as a comment + # print(f"[smart] hough={a_h:.3f}°(n={n_h}) pairs={a_p:.3f}°(n={n_p}) tried={', '.join(f'{a:+.2f}°' for a,_ in scored)} → chosen {best_angle:+.2f}° (cost={best_cost:.3f})") + + # Rotate in-memory. Save only if DEBUG. + rotated = rotate_image_keep_bounds(img, -best_angle, border_value=255) + if DEBUG and out_path: + cv2.imwrite(out_path, rotated) + + result = { + "angle_deg": float(best_angle), + "hough_lines": int(n_h), + "pair_samples": int(n_p), + "out_path": out_path if DEBUG else None, + "pre_txt": None, + "post_txt": None, + "pre_lines": [], + "post_lines": [], + "words": words, + "full_text": full_text, + } + + if words: + pre_lines = cluster_tilted_lines(words) + result["pre_lines"] = pre_lines + result["pre_txt"] = write_lines_txt(image_path, "lines_pre", pre_lines) # only if DEBUG + + rot_words = transform_words(words, img.shape[:2], -best_angle) + post_lines = group_horizontal_lines(rot_words) + result["post_lines"] = post_lines + result["post_txt"] = write_lines_txt(image_path, "lines_post", post_lines) # only if DEBUG + + # More debug prints kept as comments + # def preview(lines, tag): + # print(f" {tag} ({len(lines)} lines)") + # for L in lines[:5]: + # ang = slope_to_deg(L["slope"]) + # print(f" [{L['count']:>3} w] slope={ang:+.3f}° | {L['text'][:90]}") + # preview(pre_lines, "pre (slope-aware)") + # preview(post_lines, "post (horizontal)") + # if DEBUG: + # print(f" → wrote: {result['pre_txt']} and {result['post_txt']}") + + return result + +# ============================================================ +# Multi-client extraction from post lines (robust) +# ============================================================ +MEMBER_RE = re.compile(r'\bMEMBER NAME\s*:\s*(.+)', re.IGNORECASE) +MEMBERID_RE = re.compile(r'\bMEMBER ID\s*:\s*([A-Za-z0-9]+)', re.IGNORECASE) +ICN_LINE_RE = re.compile(r'^\s*\d{12,}\b') + +AMOUNT_RE = re.compile(r'(\d{1,3}(?:,\d{3})*\.\d{2})') # decimals only +DATE6_RE = re.compile(r'\b\d{6}\b') +PD_ROW_RE = re.compile(r'\bPD\s+(D?\d{4})\b', re.IGNORECASE) +TOOTH_RE = re.compile(r'^(?:[1-9]|[12][0-9]|3[0-2]|[A-Ta-t])$') +SURFACE_RE = re.compile(r'^[MDBOILFP]{1,4}$', re.IGNORECASE) + +def _to_float(s: str) -> float: + try: + return float(s.replace(',', '')) + except Exception: + return 0.0 + +def _parse_pd_line(t: str) -> Optional[Tuple[str, Optional[float], Optional[float], Optional[float], Optional[str], Optional[str], Optional[str]]]: + """ + Parse a single PD line. + Returns: (CDT, billed, allowed, paid, date6, tooth, surface) + """ + m = PD_ROW_RE.search(t) + if not m: + return None + + code = m.group(1) + code = code if code.upper().startswith('D') else f'D{code}' + + amts = [_to_float(x) for x in AMOUNT_RE.findall(t)] + billed = allowed = paid = None + if len(amts) >= 3: + billed, allowed, paid = amts[-3:] + + d = None + md = DATE6_RE.search(t) + if md: + d = md.group(0) + + tooth = None + surface = None + + tokens = t.split() + try: + code_idx = tokens.index(code) + except ValueError: + code_idx = None + for i, tok in enumerate(tokens): + if PD_ROW_RE.match(f'PD {tok}'): + code_idx = i + break + + if code_idx is not None: + date_idx = None + for i in range(code_idx + 1, len(tokens)): + if DATE6_RE.fullmatch(tokens[i]): + date_idx = i + break + + window = tokens[code_idx + 1: date_idx if date_idx is not None else len(tokens)] + + for tok in window: + if TOOTH_RE.fullmatch(tok): + tooth = tok.upper() + break + + start_j = 0 + if tooth is not None: + for j, tok in enumerate(window): + if tok.upper() == tooth: + start_j = j + 1 + break + for tok in window[start_j:]: + if SURFACE_RE.fullmatch(tok): + surface = tok.upper() + break + + return code, billed, allowed, paid, d, tooth, surface + +def extract_all_clients_from_lines(post_lines: List[dict]) -> List[dict]: + """ + Split strictly by MEMBER NAME lines; ignore anything before the first name. + For each member block, look up ICN from the nearest line above the member header. + Parse each PD line for CDT, Date SVC, Billed, Allowed, Paid (decimals only). + """ + texts = [L["text"] for L in post_lines] + starts = [i for i,t in enumerate(texts) if MEMBER_RE.search(t)] + if not starts: + return [] + + out_rows = [] + + for si, start in enumerate(starts): + end = starts[si+1] if si+1 < len(starts) else len(texts) + + # header line with MEMBER NAME + name_line = texts[start] + raw_name = MEMBER_RE.search(name_line).group(1).strip() + # Stop at "MEMBER ID" (case-insensitive) and other headers + cut_points = ["MEMBER ID", "OTH INS CD", "PA:", "DIAG:"] + mname = raw_name + for cp in cut_points: + idx = mname.upper().find(cp) + if idx != -1: + mname = mname[:idx].strip() + # Debug + # print(raw_name); print(mname) + + # member id: search within the block + mid = "" + for t in texts[start:end]: + m = MEMBERID_RE.search(t) + if m: + mid = m.group(1).strip() + break + + # ICN: search a few lines ABOVE the member header + icn = "" + for k in range(start-1, max(-1, start-6), -1): + if k < 0: break + mm = ICN_LINE_RE.match(texts[k]) + if mm: + icn = mm.group(0) + break + + # PD lines in the block + had_pd = False + for t in texts[start:end]: + if " PD " not in f" {t} ": + continue + parsed = _parse_pd_line(t) + if not parsed: + continue + had_pd = True + code, billed, allowed, paid, dsvc, tooth, surface = parsed + out_rows.append({ + 'Patient Name': mname.title() if mname else "", + 'Patient ID': mid, + 'ICN': icn, + 'CDT Code': code, + 'Tooth': tooth if tooth else "", + #'Surface': surface if surface else "", + 'Date SVC': dsvc if dsvc else "", + 'Billed Amount': billed if billed is not None else "", + 'Allowed Amount': allowed if allowed is not None else "", + 'Paid Amount': paid if paid is not None else "", + 'Extraction Success': True, + }) + + if not had_pd: + out_rows.append({ + 'Patient Name': mname.title() if mname else "", + 'Patient ID': mid, + 'ICN': icn, + 'CDT Code': "", + 'Tooth': "", + #'Surface': "", + 'Date SVC': "", + 'Billed Amount': "", + 'Allowed Amount': "", + 'Paid Amount': "", + 'Extraction Success': bool(mname or mid), + }) + + return out_rows + +# ============================================================ +# ExcelGenerator +# ============================================================ +class ExcelGenerator: + def __init__(self): + self.header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid") + self.header_font = Font(color="FFFFFF", bold=True) + self.border = Border( + left=Side(style='thin'), + right=Side(style='thin'), + top=Side(style='thin'), + bottom=Side(style='thin') + ) + self.center_alignment = Alignment(horizontal='center', vertical='center') + + def create_excel_file(self, df: pd.DataFrame) -> bytes: + wb = Workbook() + ws = wb.active + ws.title = "Medical Billing Extract" + ws['A1'] = f"Medical Billing OCR Extract - Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + ws.merge_cells('A1:H1') + ws['A1'].font = Font(size=14, bold=True) + ws['A1'].alignment = self.center_alignment + ws.append([]) + + excel_df = self.prepare_dataframe_for_excel(df) + for r in dataframe_to_rows(excel_df, index=False, header=True): + ws.append(r) + + self.format_worksheet(ws, len(excel_df) + 3) + self.add_summary_sheet(wb, excel_df) + + output = io.BytesIO() + wb.save(output) + output.seek(0) + return output.getvalue() + + def prepare_dataframe_for_excel(self, df: pd.DataFrame) -> pd.DataFrame: + excel_df = df.copy() + column_order = [ + 'Patient Name', 'Patient ID', 'ICN', 'CDT Code', 'Tooth', 'Date SVC', #'Surface', + 'Billed Amount', 'Allowed Amount', 'Paid Amount', + 'Extraction Success', 'Source File' + ] + existing = [c for c in column_order if c in excel_df.columns] + excel_df = excel_df[existing] + for amount_col in ['Billed Amount', 'Allowed Amount', 'Paid Amount']: + if amount_col in excel_df.columns: + excel_df[amount_col] = excel_df[amount_col].apply(self.format_currency) + if 'Extraction Success' in excel_df.columns: + excel_df['Extraction Success'] = excel_df['Extraction Success'].apply(lambda x: 'Yes' if x else 'No') + return excel_df + + def format_currency(self, value): + if pd.isna(value) or value == "": + return "" + try: + if isinstance(value, str): + clean_value = value.replace('$', '').replace(',', '') + value = float(clean_value) + return f"${value:,.2f}" + except (ValueError, TypeError): + return str(value) + + def format_worksheet(self, ws, data_rows): + header_row = 3 + for cell in ws[header_row]: + if cell.value: + cell.fill = self.header_fill + cell.font = self.header_font + cell.alignment = self.center_alignment + cell.border = self.border + for row in range(header_row + 1, data_rows + 1): + for cell in ws[row]: + cell.border = self.border + cell.alignment = Alignment(horizontal='left', vertical='center') + self.auto_adjust_columns(ws) + self.add_conditional_formatting(ws, header_row, data_rows) + + def auto_adjust_columns(self, ws): + max_col = ws.max_column + max_row = ws.max_row + for col_idx in range(1, max_col + 1): + max_len = 0 + for row in range(1, max_row + 1): + cell = ws.cell(row=row, column=col_idx) + if isinstance(cell, MergedCell): + continue + try: + val = cell.value + if val is None: + continue + max_len = max(max_len, len(str(val))) + except Exception: + pass + letter = get_column_letter(col_idx) + ws.column_dimensions[letter].width = min(max_len + 2, 50) + + def add_conditional_formatting(self, ws, header_row, data_rows): + success_col = None + for col, cell in enumerate(ws[header_row], 1): + if cell.value == 'Extraction Success': + success_col = col + break + if success_col: + for row in range(header_row + 1, data_rows + 1): + cell = ws.cell(row=row, column=success_col) + if cell.value == 'Yes': + cell.fill = PatternFill(start_color="90EE90", end_color="90EE90", fill_type="solid") + elif cell.value == 'No': + cell.fill = PatternFill(start_color="FFB6C1", end_color="FFB6C1", fill_type="solid") + + def add_summary_sheet(self, wb, df): + ws = wb.create_sheet(title="Summary") + ws['A1'] = "Extraction Summary" + ws['A1'].font = Font(size=16, bold=True) + ws.merge_cells('A1:B1') + row = 3 + stats = [ + ("Total Rows", len(df)), + ("Successful", len(df[df['Extraction Success'] == 'Yes']) if 'Extraction Success' in df.columns else 0), + ("Failed", len(df[df['Extraction Success'] == 'No']) if 'Extraction Success' in df.columns else 0), + ] + for name, val in stats: + ws[f'A{row}'] = name + ws[f'B{row}'] = val + ws[f'A{row}'].font = Font(bold=True) + row += 1 + ExcelGenerator().auto_adjust_columns(ws) + row += 2 + ws[f'A{row}'] = f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + ws[f'A{row}'].font = Font(italic=True) + +# ============================================================ +# Runner: glue everything together +# ============================================================ +def process_images_to_excel(files: List[str], out_excel: str, deskewed_only: bool=False) -> None: + excel_gen = ExcelGenerator() + records: List[Dict[str, Any]] = [] + + for src in files: + try: + if deskewed_only: + img = cv2.imread(src, cv2.IMREAD_COLOR) + if img is None: + raise FileNotFoundError(src) + words, _ = extract_words_and_text(src) + rot_words = [] + for w in words: + ww = dict(w) + ww["cx_rot"], ww["cy_rot"] = w["cx"], w["cy"] + rot_words.append(ww) + post_lines = group_horizontal_lines(rot_words) + + post_txt = write_lines_txt(src, "lines_post", post_lines) # only if DEBUG + + rows = extract_all_clients_from_lines(post_lines) + for r in rows: + r["Source File"] = os.path.basename(src) + records.append(r) + # if DEBUG: print(f"{src} → parsed {len(rows)} PD rows (wrote {post_txt})") + + else: + base, ext = os.path.splitext(src) + dst = f"{base}_deskewed{ext if ext else '.jpg'}" if DEBUG else None + info = smart_deskew_with_lines(src, dst, clamp_deg=30.0, use_vision=True) + post_lines = info.get("post_lines", []) if info else [] + rows = extract_all_clients_from_lines(post_lines) if post_lines else [] + for r in rows: + r["Source File"] = os.path.basename(src) + records.append(r) + # if DEBUG: print(f"{src} → rotated by {-info['angle_deg']:.3f}° → {dst}") + + except Exception as e: + # if DEBUG: print(f"{src}: {e}") + records.append({ + 'Patient Name': "", 'Patient ID': "", 'ICN': "", 'CDT Code': "", + 'Date SVC': "", 'Billed Amount': "", 'Allowed Amount': "", 'Paid Amount': "", + 'Extraction Success': False, 'Source File': os.path.basename(src), + }) + + df = pd.DataFrame.from_records(records) + data = excel_gen.create_excel_file(df) + with open(out_excel, "wb") as f: + f.write(data) + # if DEBUG: + # print(f"\n✅ Wrote Excel → {out_excel}") + # print(" (and per-image: *_lines_pre.txt, *_lines_post.txt, *_deskewed.* when DEBUG=True)") + +# ============================================================ +# CLI +# ============================================================ +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--input", help="Folder of images (jpg/png/tif).", default=None) + ap.add_argument("--files", nargs="*", help="Specific image files.", default=None) + ap.add_argument("--out", help="Output Excel path.", required=True) + ap.add_argument("--deskewed-only", action="store_true", + help="Only process files whose name contains '_deskewed'; skip deskew step.") + args = ap.parse_args() + + paths: List[str] = [] + if args.files: + for f in args.files: + if os.path.isfile(f): + paths.append(f) + if args.input and os.path.isdir(args.input): + for ext in ("*.jpg","*.jpeg","*.png","*.tif","*.tiff","*.bmp"): + paths.extend(glob.glob(os.path.join(args.input, ext))) + + if args.deskewed_only: + paths = [p for p in paths if "_deskewed" in os.path.basename(p).lower()] + + if not paths: + raise SystemExit("No input images found. Use --files or --input (and --deskewed-only if desired).") + + if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"): + # print("WARNING: GOOGLE_APPLICATION_CREDENTIALS not set. Set it to your local service account JSON path.") + pass + + process_images_to_excel(paths, args.out, deskewed_only=args.deskewed_only) + +if __name__ == "__main__": + main() diff --git a/apps/PaymentOCRService/package.json b/apps/PaymentOCRService/package.json new file mode 100644 index 0000000..a4416d1 --- /dev/null +++ b/apps/PaymentOCRService/package.json @@ -0,0 +1,8 @@ +{ + "name": "pdfservice", + "private": true, + "scripts": { + "postinstall": "pip install -r requirements.txt", + "dev": "python main.py" + } +} diff --git a/apps/PaymentOCRService/requirements.txt b/apps/PaymentOCRService/requirements.txt new file mode 100644 index 0000000..7320dce --- /dev/null +++ b/apps/PaymentOCRService/requirements.txt @@ -0,0 +1,10 @@ +fastapi +uvicorn[standard] +google-cloud-vision +opencv-python-headless +pytesseract +pillow +pandas +openpyxl +numpy +python-multipart diff --git a/apps/ProcedureCodeFromMhPdf/MH.pdf b/apps/ProcedureCodeFromMhPdf/MH.pdf new file mode 100644 index 0000000..a4d6334 Binary files /dev/null and b/apps/ProcedureCodeFromMhPdf/MH.pdf differ diff --git a/apps/ProcedureCodeFromMhPdf/Readme.md b/apps/ProcedureCodeFromMhPdf/Readme.md new file mode 100644 index 0000000..691a428 --- /dev/null +++ b/apps/ProcedureCodeFromMhPdf/Readme.md @@ -0,0 +1,5 @@ +This code was written only while extracting procedure code data from Mass Health pdf, to make process easy. + +Only was a one time process, not used as core functionality in this whole app. + +Keeping it as in future might need to extract again. \ No newline at end of file diff --git a/apps/ProcedureCodeFromMhPdf/compareJson.py b/apps/ProcedureCodeFromMhPdf/compareJson.py new file mode 100644 index 0000000..dedce3f --- /dev/null +++ b/apps/ProcedureCodeFromMhPdf/compareJson.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +""" +Compare a main dental JSON file with one or more other JSON files and +return all records whose 'Procedure Code' is NOT present in the main file. + +- Matching key: 'Procedure Code' (case-insensitive, trimmed). +- Keeps the full record from the other files (including extra fields like 'Full Price'). +- Deduplicates by Procedure Code across the collected "missing" results. + +CONFIG: set MAIN_PATH, OTHER_PATHS, OUT_PATH below. +""" + +import json +from pathlib import Path +from typing import List, Dict, Any + +# ========================= +# CONFIG — EDIT THESE ONLY +# ========================= +MAIN_PATH = "procedureCodesMain.json" # your main JSON (with PriceLTEQ21/PriceGT21) +OTHER_PATHS = [ + "procedureCodesOld.json", # one or more other JSON files to compare against the main + # "other2.json", +] +OUT_PATH = "not_in_main.json" # where to write the results +# ========================= + + +def _load_json_any(path: str) -> List[Dict[str, Any]]: + """ + Load JSON. Accept: + - a list of objects + - a single object (wraps into a list) + """ + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, dict): + return [data] + if isinstance(data, list): + # filter out non-dict items defensively + return [x for x in data if isinstance(x, dict)] + raise ValueError(f"Unsupported JSON top-level type in {path}: {type(data)}") + + +def _norm_code(record: Dict[str, Any]) -> str: + # Normalize the 'Procedure Code' for matching + code = str(record.get("Procedure Code", "")).strip().upper() + # Some PDFs might have stray spaces, tabs, or zero-width chars + code = "".join(ch for ch in code if not ch.isspace()) + return code + + +def collect_main_codes(main_path: str) -> set: + main_items = _load_json_any(main_path) + codes = {_norm_code(rec) for rec in main_items if _norm_code(rec)} + return codes + + +def collect_missing_records(other_paths: List[str], main_codes: set) -> List[Dict[str, Any]]: + missing: Dict[str, Dict[str, Any]] = {} # map normalized code -> record + for p in other_paths: + items = _load_json_any(p) + for rec in items: + code_norm = _norm_code(rec) + if not code_norm: + continue + if code_norm not in main_codes and code_norm not in missing: + # Keep the full original record + missing[code_norm] = rec + # return in a stable, sorted order by code + return [missing[k] for k in sorted(missing.keys())] + + +def main(): + # Validate files exist + if not Path(MAIN_PATH).exists(): + raise FileNotFoundError(f"Main file not found: {MAIN_PATH}") + for p in OTHER_PATHS: + if not Path(p).exists(): + raise FileNotFoundError(f"Other file not found: {p}") + + main_codes = collect_main_codes(MAIN_PATH) + missing_records = collect_missing_records(OTHER_PATHS, main_codes) + + with open(OUT_PATH, "w", encoding="utf-8") as f: + json.dump(missing_records, f, ensure_ascii=False, indent=2) + + print(f"Main codes: {len(main_codes)}") + print(f"Missing from main: {len(missing_records)}") + print(f"Wrote results to {OUT_PATH}") + # Also echo to stdout + print(json.dumps(missing_records, ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/apps/ProcedureCodeFromMhPdf/extract_bypage.py b/apps/ProcedureCodeFromMhPdf/extract_bypage.py new file mode 100644 index 0000000..ab13136 --- /dev/null +++ b/apps/ProcedureCodeFromMhPdf/extract_bypage.py @@ -0,0 +1,183 @@ +import re +import json +from typing import List, Dict +import fitz # PyMuPDF + + +# ========================= +# CONFIG — EDIT THESE ONLY +# ========================= +PDF_PATH = "MH.pdf" # path to your PDF +PAGES = [2] # 0-based page indexes to parse, e.g., [2] for the page you showed +OUT_PATH = "output.json" # where to write JSON +FIRST_PRICE_IS_LTE21 = True # True => first price line is <=21; False => first price is >21 +PRINT_PAGE_TEXT = False # set True if you want to print the raw page text for sanity check +# ========================= + + +# --- patterns --- +code_line_re = re.compile(r"^\s*(D\d{4})\s*$") +# a price token is either '$123', '$1,234.50', '123', '123.45', or 'NC' +price_line_re = re.compile(r"^\s*(?:\$\s*)?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\d+(?:\.\d{2})?|NC)\s*$", re.IGNORECASE) +# lines that definitely start a notes block we should ignore once prices are done +note_starters_re = re.compile(r"^(Teeth\b|One of\b|--|—|–|Age limitation:|CR\b)", re.IGNORECASE) + + +def normalize_ws(s: str) -> str: + s = s.replace("\u00a0", " ") + s = re.sub(r"[ \t]+", " ", s) + s = re.sub(r"\s*\n\s*", " ", s) + s = re.sub(r"\s{2,}", " ", s) + return s.strip(" ,.;:-•·\n\t") + + +def clean_money(token: str) -> str: + if token.upper() == "NC": + return "NC" + return token.replace(",", "").lstrip("$").strip() + + +def get_page_lines(pdf_path: str, pages: List[int]) -> List[str]: + doc = fitz.open(pdf_path) + try: + max_idx = len(doc) - 1 + for p in pages: + if p < 0 or p > max_idx: + raise ValueError(f"Invalid page index {p}. Valid range is 0..{max_idx}.") + lines: List[str] = [] + for p in pages: + text = doc.load_page(p).get_text("text") or "" + if PRINT_PAGE_TEXT: + print(f"\n--- RAW PAGE {p} ---\n{text}") + # keep line boundaries; later we parse line-by-line + lines.extend(text.splitlines()) + return lines + finally: + doc.close() + + +def extract_records(lines: List[str]) -> List[Dict[str, str]]: + out: List[Dict[str, str]] = [] + i = 0 + n = len(lines) + + while i < n: + line = lines[i].strip() + + # seek a code line + mcode = code_line_re.match(line) + if not mcode: + i += 1 + continue + + code = mcode.group(1) + i += 1 + + # gather description lines until we encounter price lines + desc_lines: List[str] = [] + # skip blank lines before description + while i < n and not lines[i].strip(): + i += 1 + + # collect description lines (usually 1–3) until first price token + # stop also if we accidentally hit another code (defensive) + j = i + while j < n: + s = lines[j].strip() + if not s: + # blank line inside description — consider description ended if the next is a price + # but we don't advance here; break and let price parsing handle it + break + if code_line_re.match(s): + # next code — no prices found; abandon this broken record + break + if price_line_re.match(s): + # reached price section + break + if note_starters_re.match(s): + # encountered a note before price — treat as end of description; prices may be missing + break + desc_lines.append(s) + j += 1 + + # advance i to where we left off + i = j + + description = normalize_ws(" ".join(desc_lines)) + + # collect up to two price tokens + prices: List[str] = [] + while i < n and len(prices) < 2: + s = lines[i].strip() + if not s: + i += 1 + continue + if code_line_re.match(s): + # new record — stop; this means we never got prices (malformed) + break + mprice = price_line_re.match(s) + if mprice: + prices.append(clean_money(mprice.group(1))) + i += 1 + continue + # if we encounter a note/flags block, skip forward until the next code/blank + if note_starters_re.match(s) or s in {"Y", "NC"}: + # skip this block quickly + i += 1 + # keep skipping subsequent non-empty, non-code lines until a blank or next code + while i < n: + t = lines[i].strip() + if not t or code_line_re.match(t): + break + i += 1 + # now let the outer loop proceed + continue + # unrecognized line: if prices already found, we can break; else skip + if prices: + break + i += 1 + + if len(prices) < 2: + # couldn't find 2 prices reliably; skip this record + continue + + if FIRST_PRICE_IS_LTE21: + price_lte21, price_gt21 = prices[0], prices[1] + else: + price_lte21, price_gt21 = prices[1], prices[0] + + out.append( + { + "Procedure Code": code, + "Description": description, + "PriceLTEQ21": price_lte21, + "PriceGT21": price_gt21, + } + ) + + # after prices, skip forward until next code or blank block end + while i < n: + s = lines[i].strip() + if not s: + i += 1 + break + if code_line_re.match(s): + # next record will pick this up + break + i += 1 + + return out + + +def extract_pdf_to_json(pdf_path: str, pages: List[int], out_path: str) -> List[Dict[str, str]]: + lines = get_page_lines(pdf_path, pages) + data = extract_records(lines) + with open(out_path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + return data + + +if __name__ == "__main__": + data = extract_pdf_to_json(PDF_PATH, PAGES, OUT_PATH) + print(f"Wrote {len(data)} rows to {OUT_PATH}") + print(json.dumps(data, ensure_ascii=False, indent=2)) diff --git a/apps/ProcedureCodeFromMhPdf/extract_byrange.py b/apps/ProcedureCodeFromMhPdf/extract_byrange.py new file mode 100644 index 0000000..2256799 --- /dev/null +++ b/apps/ProcedureCodeFromMhPdf/extract_byrange.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +""" +MassHealth dental PDF parser (PyMuPDF / fitz) — PAGE RANGE VERSION + +Parses rows like: + +D2160 +Amalgam-three surfaces, +primary or permanent +$110 +$92 +Y +Y +... + +Outputs a single JSON with records from the chosen page range (inclusive). + +Config: +- PDF_PATH: path to the PDF +- PAGE_START, PAGE_END: 1-based page numbers (inclusive) +- FIRST_PRICE_IS_LTE21: True => first price line is <=21; False => first price is >21 +- OUT_PATH: output JSON path +""" + +import re +import json +from typing import List, Dict +import fitz # PyMuPDF + + +# ========================= +# CONFIG — EDIT THESE ONLY +# ========================= +PDF_PATH = "MH.pdf" # path to your PDF +PAGE_START = 1 # 1-based inclusive start page (e.g., 1) +PAGE_END = 12 # 1-based inclusive end page (e.g., 5) +OUT_PATH = "output.json" # single JSON file containing all parsed rows +FIRST_PRICE_IS_LTE21 = True # True => first price line is <=21; False => first price is >21 +PRINT_PAGE_TEXT = False # set True to print raw text for each page +# ========================= + + +# --- patterns --- +code_line_re = re.compile(r"^\s*(D\d{4})\s*$") +# a price token is either '$123', '$1,234.50', '123', '123.45', or 'NC' +price_line_re = re.compile(r"^\s*(?:\$\s*)?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\d+(?:\.\d{2})?|NC)\s*$", re.IGNORECASE) +# lines that definitely start a notes block to ignore once prices are done +note_starters_re = re.compile(r"^(Teeth\b|One of\b|--|—|–|Age limitation:|CR\b)", re.IGNORECASE) + + +def normalize_ws(s: str) -> str: + s = s.replace("\u00a0", " ") + s = re.sub(r"[ \t]+", " ", s) + s = re.sub(r"\s*\n\s*", " ", s) + s = re.sub(r"\s{2,}", " ", s) + return s.strip(" ,.;:-•·\n\t") + + +def clean_money(token: str) -> str: + if token.upper() == "NC": + return "NC" + return token.replace(",", "").lstrip("$").strip() + + +def get_page_lines(pdf_path: str, page_start_1b: int, page_end_1b: int) -> List[str]: + if page_start_1b <= 0 or page_end_1b <= 0: + raise ValueError("PAGE_START and PAGE_END must be >= 1 (1-based).") + if page_start_1b > page_end_1b: + raise ValueError("PAGE_START cannot be greater than PAGE_END.") + + doc = fitz.open(pdf_path) + try: + last_idx_0b = len(doc) - 1 + # convert to 0-based inclusive range + start_0b = page_start_1b - 1 + end_0b = page_end_1b - 1 + if start_0b < 0 or end_0b > last_idx_0b: + raise ValueError(f"Page range out of bounds. Valid 1-based range is 1..{last_idx_0b + 1}.") + lines: List[str] = [] + for p in range(start_0b, end_0b + 1): + text = doc.load_page(p).get_text("text") or "" + if PRINT_PAGE_TEXT: + print(f"\n--- RAW PAGE {p} (0-based; shown as {p+1} 1-based) ---\n{text}") + lines.extend(text.splitlines()) + return lines + finally: + doc.close() + + +def extract_records(lines: List[str]) -> List[Dict[str, str]]: + out: List[Dict[str, str]] = [] + i = 0 + n = len(lines) + + while i < n: + line = lines[i].strip() + + # seek a code line + mcode = code_line_re.match(line) + if not mcode: + i += 1 + continue + + code = mcode.group(1) + i += 1 + + # gather description lines until we encounter price lines + desc_lines: List[str] = [] + # skip blank lines before description + while i < n and not lines[i].strip(): + i += 1 + + # collect description lines (usually 1–3) until first price token + # stop also if we accidentally hit another code (defensive) + j = i + while j < n: + s = lines[j].strip() + if not s: + break + if code_line_re.match(s): + # next code — description ended abruptly (malformed) + break + if price_line_re.match(s): + # reached price section + break + if note_starters_re.match(s): + # encountered a note before price — treat as end of description; prices may be missing + break + desc_lines.append(s) + j += 1 + + # advance i to where we left off + i = j + + description = normalize_ws(" ".join(desc_lines)) + + # collect up to two price tokens + prices: List[str] = [] + while i < n and len(prices) < 2: + s = lines[i].strip() + if not s: + i += 1 + continue + if code_line_re.match(s): + # new record — stop; this means we never got prices (malformed) + break + mprice = price_line_re.match(s) + if mprice: + prices.append(clean_money(mprice.group(1))) + i += 1 + continue + # if we encounter a note/flags block, skip forward until a blank or next code + if note_starters_re.match(s) or s in {"Y", "NC"}: + i += 1 + while i < n: + t = lines[i].strip() + if not t or code_line_re.match(t): + break + i += 1 + continue + # unrecognized line: if we already captured some prices, break; else skip + if prices: + break + i += 1 + + if len(prices) < 2: + # couldn't find 2 prices reliably; skip this record + continue + + if FIRST_PRICE_IS_LTE21: + price_lte21, price_gt21 = prices[0], prices[1] + else: + price_lte21, price_gt21 = prices[1], prices[0] + + out.append( + { + "Procedure Code": code, + "Description": description, + "PriceLTEQ21": price_lte21, + "PriceGT21": price_gt21, + } + ) + + # after prices, skip forward until next code or blank block end + while i < n: + s = lines[i].strip() + if not s: + i += 1 + break + if code_line_re.match(s): + break + i += 1 + + return out + + +def extract_pdf_range_to_json(pdf_path: str, page_start_1b: int, page_end_1b: int, out_path: str) -> List[Dict[str, str]]: + lines = get_page_lines(pdf_path, page_start_1b, page_end_1b) + data = extract_records(lines) + with open(out_path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + return data + + +if __name__ == "__main__": + data = extract_pdf_range_to_json(PDF_PATH, PAGE_START, PAGE_END, OUT_PATH) + print(f"Wrote {len(data)} rows to {OUT_PATH}") + print(json.dumps(data, ensure_ascii=False, indent=2)) diff --git a/apps/ProcedureCodeFromMhPdf/not_in_main.json b/apps/ProcedureCodeFromMhPdf/not_in_main.json new file mode 100644 index 0000000..e762c9d --- /dev/null +++ b/apps/ProcedureCodeFromMhPdf/not_in_main.json @@ -0,0 +1,192 @@ +[ + { + "Procedure Code": "D0120", + "Description": "perio exam", + "Price": "105" + }, + { + "Procedure Code": "D0140", + "Description": "limited exam", + "Price": "90" + }, + { + "Procedure Code": "D0150", + "Description": "comprehensive exam", + "Price": "120" + }, + { + "Procedure Code": "D0210", + "Description": "Fmx.", + "Price": "120" + }, + { + "Procedure Code": "D0220", + "Description": "first PA.", + "Price": "60" + }, + { + "Procedure Code": "D0230", + "Description": "2nd PA.", + "Price": "50" + }, + { + "Procedure Code": "D0272", + "Description": "2 BW", + "Price": "80" + }, + { + "Procedure Code": "D0274", + "Description": "4BW", + "Price": "160" + }, + { + "Procedure Code": "D0330", + "Description": "pano", + "Price": "150" + }, + { + "Procedure Code": "D0364", + "Description": "Less than one jaw", + "Price": "350" + }, + { + "Procedure Code": "D0365", + "Description": "Mand", + "Price": "350" + }, + { + "Procedure Code": "D0366", + "Description": "Max", + "Price": "350" + }, + { + "Procedure Code": "D0367", + "Description": "", + "Price": "400" + }, + { + "Procedure Code": "D0368", + "Description": "include TMJ", + "Price": "375" + }, + { + "Procedure Code": "D0380", + "Description": "Less than one jaw", + "Price": "300" + }, + { + "Procedure Code": "D0381", + "Description": "Mand", + "Price": "300" + }, + { + "Procedure Code": "D0382", + "Description": "Max", + "Price": "300" + }, + { + "Procedure Code": "D0383", + "Description": "", + "Price": "350" + }, + { + "Procedure Code": "D1110", + "Description": "adult prophy", + "Price": "150" + }, + { + "Procedure Code": "D1120", + "Description": "child prophy", + "Price": "120" + }, + { + "Procedure Code": "D1208", + "Description": "FL", + "Price": "90" + }, + { + "Procedure Code": "D1351", + "Description": "sealant", + "Price": "80" + }, + { + "Procedure Code": "D1999", + "Description": "", + "Price": "50" + }, + { + "Procedure Code": "D2140", + "Description": "amalgam, one surface", + "Price": "150" + }, + { + "Procedure Code": "D2150", + "Description": "amalgam, two surface", + "Price": "200" + }, + { + "Procedure Code": "D2955", + "Description": "post renoval", + "Price": "350" + }, + { + "Procedure Code": "D4910", + "Description": "perio maintains", + "Price": "250" + }, + { + "Procedure Code": "D5510", + "Description": "Repair broken complete denture base (QUAD)", + "Price": "400" + }, + { + "Procedure Code": "D6056", + "Description": "pre fab abut", + "Price": "750" + }, + { + "Procedure Code": "D6057", + "Description": "custom abut", + "Price": "800" + }, + { + "Procedure Code": "D6058", + "Description": "porcelain, implant crown, ceramic crown", + "Price": "1400" + }, + { + "Procedure Code": "D6059", + "Description": "", + "Price": "1400" + }, + { + "Procedure Code": "D6100", + "Description": "", + "Price": "320" + }, + { + "Procedure Code": "D6110", + "Description": "implant", + "Price": "1600" + }, + { + "Procedure Code": "D6242", + "Description": "noble metal. For united", + "Price": "1400" + }, + { + "Procedure Code": "D6245", + "Description": "porcelain, not for united", + "Price": "1400" + }, + { + "Procedure Code": "D7910", + "Description": "suture, small wound up to 5 mm", + "Price": "400" + }, + { + "Procedure Code": "D7950", + "Description": "max", + "Price": "800" + } +] \ No newline at end of file diff --git a/apps/ProcedureCodeFromMhPdf/procedureCodes.json b/apps/ProcedureCodeFromMhPdf/procedureCodes.json new file mode 100644 index 0000000..912b351 --- /dev/null +++ b/apps/ProcedureCodeFromMhPdf/procedureCodes.json @@ -0,0 +1,1026 @@ +[ + { + "Procedure Code": "D0120", + "Description": "perio exam", + "Price": "105" + }, + { + "Procedure Code": "D0140", + "Description": "limited exam", + "Price": "90" + }, + { + "Procedure Code": "D0150", + "Description": "comprehensive exam", + "Price": "120" + }, + { + "Procedure Code": "D0210", + "Description": "Fmx.", + "Price": "120" + }, + { + "Procedure Code": "D0220", + "Description": "first PA.", + "Price": "60" + }, + { + "Procedure Code": "D0230", + "Description": "2nd PA.", + "Price": "50" + }, + { + "Procedure Code": "D0272", + "Description": "2 BW", + "Price": "80" + }, + { + "Procedure Code": "D0274", + "Description": "4BW", + "Price": "160" + }, + { + "Procedure Code": "D0330", + "Description": "pano", + "Price": "150" + }, + { + "Procedure Code": "D0364", + "Description": "Less than one jaw", + "Price": "350" + }, + { + "Procedure Code": "D0365", + "Description": "Mand", + "Price": "350" + }, + { + "Procedure Code": "D0366", + "Description": "Max", + "Price": "350" + }, + { + "Procedure Code": "D0367", + "Description": "", + "Price": "400" + }, + { + "Procedure Code": "D0368", + "Description": "include TMJ", + "Price": "375" + }, + { + "Procedure Code": "D0380", + "Description": "Less than one jaw", + "Price": "300" + }, + { + "Procedure Code": "D0381", + "Description": "Mand", + "Price": "300" + }, + { + "Procedure Code": "D0382", + "Description": "Max", + "Price": "300" + }, + { + "Procedure Code": "D0383", + "Description": "", + "Price": "350" + }, + { + "Procedure Code": "D1110", + "Description": "adult prophy", + "Price": "150" + }, + { + "Procedure Code": "D1120", + "Description": "child prophy", + "Price": "120" + }, + { + "Procedure Code": "D1208", + "Description": "FL", + "Price": "90" + }, + { + "Procedure Code": "D1351", + "Description": "sealant", + "Price": "80" + }, + { + "Procedure Code": "D1999", + "Description": "", + "Price": "50" + }, + { + "Procedure Code": "D2140", + "Description": "amalgam, one surface", + "Price": "150" + }, + { + "Procedure Code": "D2150", + "Description": "amalgam, two surface", + "Price": "200" + }, + { + "Procedure Code": "D2955", + "Description": "post renoval", + "Price": "350" + }, + { + "Procedure Code": "D4910", + "Description": "perio maintains", + "Price": "250" + }, + { + "Procedure Code": "D5510", + "Description": "Repair broken complete denture base (QUAD)", + "Price": "400" + }, + { + "Procedure Code": "D6056", + "Description": "pre fab abut", + "Price": "750" + }, + { + "Procedure Code": "D6057", + "Description": "custom abut", + "Price": "800" + }, + { + "Procedure Code": "D6058", + "Description": "porcelain, implant crown, ceramic crown", + "Price": "1400" + }, + { + "Procedure Code": "D6059", + "Description": "", + "Price": "1400" + }, + { + "Procedure Code": "D6100", + "Description": "", + "Price": "320" + }, + { + "Procedure Code": "D6110", + "Description": "implant", + "Price": "1600" + }, + { + "Procedure Code": "D6242", + "Description": "noble metal. For united", + "Price": "1400" + }, + { + "Procedure Code": "D6245", + "Description": "porcelain, not for united", + "Price": "1400" + }, + { + "Procedure Code": "D7910", + "Description": "suture, small wound up to 5 mm", + "Price": "400" + }, + { + "Procedure Code": "D7950", + "Description": "max", + "Price": "800" + }, + { + "Procedure Code": "D2160", + "Description": "Amalgam-three surfaces, primary or permanent", + "PriceLTEQ21": "110", + "PriceGT21": "92" + }, + { + "Procedure Code": "D2161", + "Description": "Amalgam-four or more surfaces, primary or permanent", + "PriceLTEQ21": "137", + "PriceGT21": "116" + }, + { + "Procedure Code": "D2330", + "Description": "Resin-based composite – one surface, anterior", + "PriceLTEQ21": "98", + "PriceGT21": "72" + }, + { + "Procedure Code": "D2331", + "Description": "Resin-based composite – two surfaces, anterior", + "PriceLTEQ21": "118", + "PriceGT21": "92" + }, + { + "Procedure Code": "D2332", + "Description": "Resin-based composite – three surfaces, anterior", + "PriceLTEQ21": "147", + "PriceGT21": "116" + }, + { + "Procedure Code": "D2335", + "Description": "Resin-based composite – four or more surfaces or involving incisal angle (anterior)", + "PriceLTEQ21": "188", + "PriceGT21": "146" + }, + { + "Procedure Code": "D2390", + "Description": "Resin-based composite crown, anterior", + "PriceLTEQ21": "133", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D2391", + "Description": "Resin-based composite – one surface, posterior", + "PriceLTEQ21": "99", + "PriceGT21": "62" + }, + { + "Procedure Code": "D2392", + "Description": "Resin-based composite – two surfaces, posterior", + "PriceLTEQ21": "123", + "PriceGT21": "77" + }, + { + "Procedure Code": "D2393", + "Description": "Resin-based composite – three surfaces, posterior", + "PriceLTEQ21": "133", + "PriceGT21": "92" + }, + { + "Procedure Code": "D2394", + "Description": "Resin-based composite – four or more surfaces, posterior", + "PriceLTEQ21": "182", + "PriceGT21": "116" + }, + { + "Procedure Code": "D2710", + "Description": "Crown – resin-based composite (indirect)", + "PriceLTEQ21": "244", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D2740", + "Description": "Crown – porcelain/ceramic", + "PriceLTEQ21": "853", + "PriceGT21": "729" + }, + { + "Procedure Code": "D2750", + "Description": "Crown – porcelain fused to high noble metal", + "PriceLTEQ21": "800", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D2751", + "Description": "Crown – porcelain fused to predominantly base metal", + "PriceLTEQ21": "727", + "PriceGT21": "613" + }, + { + "Procedure Code": "D2752", + "Description": "Crown – porcelain fused to noble metal", + "PriceLTEQ21": "735", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D2790", + "Description": "Crown – full cast high noble metal", + "PriceLTEQ21": "808", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D2910", + "Description": "Re-cement or re-bond inlay, onlay or partial coverage restoration", + "PriceLTEQ21": "69", + "PriceGT21": "57" + }, + { + "Procedure Code": "D2920", + "Description": "Re-cement or re-bond crown", + "PriceLTEQ21": "68", + "PriceGT21": "57" + }, + { + "Procedure Code": "D2929", + "Description": "Prefabricated porcelain/ceramic crown – primary tooth", + "PriceLTEQ21": "224", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D2930", + "Description": "Prefabricated stainless steel crown – primary tooth", + "PriceLTEQ21": "205", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D2931", + "Description": "Prefabricated stainless steel crown – permanent tooth", + "PriceLTEQ21": "199", + "PriceGT21": "171" + }, + { + "Procedure Code": "D2932", + "Description": "Prefabricated resin crown", + "PriceLTEQ21": "224", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D2934", + "Description": "Prefabricated esthetic coated stainless steel crown – primary tooth", + "PriceLTEQ21": "184", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D2950", + "Description": "Core buildup, including any pins when required", + "PriceLTEQ21": "197", + "PriceGT21": "164" + }, + { + "Procedure Code": "D2951", + "Description": "Pin retention – per tooth, in addition to restoration", + "PriceLTEQ21": "31", + "PriceGT21": "27" + }, + { + "Procedure Code": "D2954", + "Description": "Prefabricated post and core in addition to crown", + "PriceLTEQ21": "229", + "PriceGT21": "191" + }, + { + "Procedure Code": "D2980", + "Description": "Crown repair necessitated by restorative material failure", + "PriceLTEQ21": "137", + "PriceGT21": "115" + }, + { + "Procedure Code": "D2999", + "Description": "Unspecified restorative procedure, by report", + "PriceLTEQ21": "IC", + "PriceGT21": "IC" + }, + { + "Procedure Code": "D3120", + "Description": "Pulp cap – indirect (excluding final restoration)", + "PriceLTEQ21": "40", + "PriceGT21": "34" + }, + { + "Procedure Code": "D3220", + "Description": "Therapeutic pulpotomy (excluding final restoration) – removal of pulp coronal to the dentinocemental junction and application of medicament", + "PriceLTEQ21": "106", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D3310", + "Description": "Endodontic therapy, anterior (excluding final restoration)", + "PriceLTEQ21": "544", + "PriceGT21": "544" + }, + { + "Procedure Code": "D3320", + "Description": "Endodontic therapy, premolar tooth (excluding final restoration)", + "PriceLTEQ21": "639", + "PriceGT21": "639" + }, + { + "Procedure Code": "D3330", + "Description": "Endodontic therapy, molar tooth (excluding final restoration)", + "PriceLTEQ21": "829", + "PriceGT21": "829" + }, + { + "Procedure Code": "D3346", + "Description": "Retreatment of previous root canal therapy – anterior", + "PriceLTEQ21": "545", + "PriceGT21": "456" + }, + { + "Procedure Code": "D3347", + "Description": "Retreatment of previous root canal therapy – premolar", + "PriceLTEQ21": "641", + "PriceGT21": "538" + }, + { + "Procedure Code": "D3348", + "Description": "Retreatment of previous root canal therapy – molar", + "PriceLTEQ21": "789", + "PriceGT21": "613" + }, + { + "Procedure Code": "D3410", + "Description": "Apicoectomy – anterior", + "PriceLTEQ21": "471", + "PriceGT21": "407" + }, + { + "Procedure Code": "D3421", + "Description": "Apicoectomy – premolar (first root)", + "PriceLTEQ21": "550", + "PriceGT21": "460" + }, + { + "Procedure Code": "D3425", + "Description": "Apicoectomy – molar (first root)", + "PriceLTEQ21": "639", + "PriceGT21": "598" + }, + { + "Procedure Code": "D3426", + "Description": "Apicoectomy (each additional root)", + "PriceLTEQ21": "264", + "PriceGT21": "230" + }, + { + "Procedure Code": "D4210", + "Description": "Gingivectomy or gingivoplasty - Four or more contiguous teeth or bounded teeth spaces per quadrant", + "PriceLTEQ21": "343", + "PriceGT21": "307" + }, + { + "Procedure Code": "D4211", + "Description": "Gingivectomy or gingivoplasty - one to three contiguous teeth or bounded teeth spaces per quadrant", + "PriceLTEQ21": "133", + "PriceGT21": "111" + }, + { + "Procedure Code": "D4341", + "Description": "Periodontal scaling and root planing - four or more teeth per quadrant", + "PriceLTEQ21": "160", + "PriceGT21": "134" + }, + { + "Procedure Code": "D4342", + "Description": "Periodontal scaling and root planing - one to three teeth, per quadrant", + "PriceLTEQ21": "107", + "PriceGT21": "90" + }, + { + "Procedure Code": "D4346", + "Description": "Scaling in presence of generalized moderate or severe gingival inflammation – full mouth, after oral evaluation", + "PriceLTEQ21": "75", + "PriceGT21": "60" + }, + { + "Procedure Code": "D5110", + "Description": "Complete denture – maxillary", + "PriceLTEQ21": "858", + "PriceGT21": "730" + }, + { + "Procedure Code": "D5120", + "Description": "Complete denture – mandibular", + "PriceLTEQ21": "852", + "PriceGT21": "730" + }, + { + "Procedure Code": "D5130", + "Description": "Immediate denture – maxillary", + "PriceLTEQ21": "935", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D5140", + "Description": "Immediate denture - mandibular", + "PriceLTEQ21": "934", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D5211", + "Description": "Maxillary partial denture - resin base (including retentive/clasping materials, rests and teeth)", + "PriceLTEQ21": "650", + "PriceGT21": "556" + }, + { + "Procedure Code": "D5212", + "Description": "Mandibular partial denture - resin base (including retentive/clasping materials, rests and teeth)", + "PriceLTEQ21": "691", + "PriceGT21": "595" + }, + { + "Procedure Code": "D5213", + "Description": "Maxillary partial denture- cast metal framework with resin denture bases (including retentive/clasping materials, rests and teeth)", + "PriceLTEQ21": "974", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D5214", + "Description": "Mandibular partial denture - cast metal framework with resin denture bases (including retentive/clasping materials, rests and teeth)", + "PriceLTEQ21": "986", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D5225", + "Description": "Maxillary partial denture- flexible base", + "PriceLTEQ21": "974", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D5226", + "Description": "Mandibular partial denture- flexible base", + "PriceLTEQ21": "986", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D5511", + "Description": "Repair broken complete denture base, mandibular", + "PriceLTEQ21": "109", + "PriceGT21": "85" + }, + { + "Procedure Code": "D5512", + "Description": "Repair broken complete denture base, maxillary", + "PriceLTEQ21": "109", + "PriceGT21": "85" + }, + { + "Procedure Code": "D5520", + "Description": "Replace missing or broken teeth - complete denture (each tooth)", + "PriceLTEQ21": "89", + "PriceGT21": "77" + }, + { + "Procedure Code": "D5611", + "Description": "Repair broken resin partial denture base, mandibular", + "PriceLTEQ21": "93", + "PriceGT21": "77" + }, + { + "Procedure Code": "D5612", + "Description": "Repair broken resin partial denture base, maxillary", + "PriceLTEQ21": "93", + "PriceGT21": "77" + }, + { + "Procedure Code": "D5621", + "Description": "Repair broken cast partial denture base, mandibular", + "PriceLTEQ21": "121", + "PriceGT21": "104" + }, + { + "Procedure Code": "D5622", + "Description": "Repair broken cast partial denture base, maxillary", + "PriceLTEQ21": "121", + "PriceGT21": "104" + }, + { + "Procedure Code": "D5630", + "Description": "Repair or replace broken retentive/clasping materials – per tooth", + "PriceLTEQ21": "107", + "PriceGT21": "99" + }, + { + "Procedure Code": "D5640", + "Description": "Replace broken teeth - per tooth", + "PriceLTEQ21": "91", + "PriceGT21": "77" + }, + { + "Procedure Code": "D5650", + "Description": "Add tooth to existing partial denture", + "PriceLTEQ21": "110", + "PriceGT21": "92" + }, + { + "Procedure Code": "D5660", + "Description": "Add clasp to existing partial denture per tooth", + "PriceLTEQ21": "125", + "PriceGT21": "98" + }, + { + "Procedure Code": "D5730", + "Description": "Reline complete maxillary denture (direct)", + "PriceLTEQ21": "188", + "PriceGT21": "158" + }, + { + "Procedure Code": "D5731", + "Description": "Reline lower complete mandibular denture (direct)", + "PriceLTEQ21": "184", + "PriceGT21": "173" + }, + { + "Procedure Code": "D5740", + "Description": "Reline maxillary partial denture(chairside)", + "PriceLTEQ21": "169", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D5741", + "Description": "Reline mandibular partial denture(chairside)", + "PriceLTEQ21": "160", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D5750", + "Description": "Reline complete maxillary denture (indirect)", + "PriceLTEQ21": "255", + "PriceGT21": "214" + }, + { + "Procedure Code": "D5751", + "Description": "Reline complete mandibular denture (indirect)", + "PriceLTEQ21": "256", + "PriceGT21": "215" + }, + { + "Procedure Code": "D5760", + "Description": "Reline maxillary partial denture (laboratory)", + "PriceLTEQ21": "252", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D5761", + "Description": "Reline mandibular partial denture (laboratory)", + "PriceLTEQ21": "252", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D6241", + "Description": "Pontic-porcelain fused metal", + "PriceLTEQ21": "691", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D6751", + "Description": "Retainer crown-porcelain fused to metal", + "PriceLTEQ21": "691", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D6930", + "Description": "Re-cement or re-bond fixed partial denture", + "PriceLTEQ21": "87", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D6980", + "Description": "Fixed partial denture repair", + "PriceLTEQ21": "155", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D6999", + "Description": "Fixed prosthodontic procedure", + "PriceLTEQ21": "IC", + "PriceGT21": "IC" + }, + { + "Procedure Code": "D7111", + "Description": "Extraction, coronal remnants - primary tooth", + "PriceLTEQ21": "80", + "PriceGT21": "75" + }, + { + "Procedure Code": "D7140", + "Description": "Extraction, erupted tooth or exposed root (elevation and/or forceps removal)", + "PriceLTEQ21": "107", + "PriceGT21": "77" + }, + { + "Procedure Code": "D7210", + "Description": "Extraction, erupted tooth requiring removal of bone and/or sectioning of tooth, and including elevation of mucoperiosteal flap if indicated", + "PriceLTEQ21": "179", + "PriceGT21": "149" + }, + { + "Procedure Code": "D7220", + "Description": "Removal of impacted tooth - soft tissue", + "PriceLTEQ21": "223", + "PriceGT21": "191" + }, + { + "Procedure Code": "D7230", + "Description": "Removal of impacted tooth - partially bony", + "PriceLTEQ21": "286", + "PriceGT21": "249" + }, + { + "Procedure Code": "D7240", + "Description": "Removal of impacted tooth - completely bony", + "PriceLTEQ21": "378", + "PriceGT21": "295" + }, + { + "Procedure Code": "D7250", + "Description": "Surgical removal of residual tooth roots (cutting procedure)", + "PriceLTEQ21": "173", + "PriceGT21": "144" + }, + { + "Procedure Code": "D7251", + "Description": "Coronectomy- intentional partial tooth removal, impacted teeth only", + "PriceLTEQ21": "173", + "PriceGT21": "134" + }, + { + "Procedure Code": "D7270", + "Description": "Tooth reimplantation and/or stabilization of accidentally evulsed or displaced tooth", + "PriceLTEQ21": "145", + "PriceGT21": "106" + }, + { + "Procedure Code": "D7280", + "Description": "Surgical access of an unerupted tooth", + "PriceLTEQ21": "452", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D7283", + "Description": "Placement of device to facilitate eruption of impacted tooth", + "PriceLTEQ21": "84", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D7310", + "Description": "Alveoloplasty in conjunction with extractions-four or more teeth or tooth spaces, per quadrant", + "PriceLTEQ21": "163", + "PriceGT21": "142" + }, + { + "Procedure Code": "D7311", + "Description": "Alveoloplasty in conjunction with extractions - one to three teeth or tooth spaces, per quadrant", + "PriceLTEQ21": "146", + "PriceGT21": "128" + }, + { + "Procedure Code": "D7320", + "Description": "Alveoloplasty not in conjunction with extractions- four or more teeth or tooth spaces, per quadrant", + "PriceLTEQ21": "202", + "PriceGT21": "187" + }, + { + "Procedure Code": "D7321", + "Description": "Alveoloplasty not in conjunction with extractions - one to three teeth or tooth spaces, per quadrant", + "PriceLTEQ21": "162", + "PriceGT21": "149" + }, + { + "Procedure Code": "D7340", + "Description": "Vestibuloplasty - ridge extension (second epithelialization)", + "PriceLTEQ21": "796", + "PriceGT21": "747" + }, + { + "Procedure Code": "D7350", + "Description": "Vestibuloplasty - ridge extension (Oral surgeon only)", + "PriceLTEQ21": "1236", + "PriceGT21": "943" + }, + { + "Procedure Code": "D7410", + "Description": "Radical excision - lesion diameter up to 1.25cm", + "PriceLTEQ21": "124", + "PriceGT21": "115" + }, + { + "Procedure Code": "D7411", + "Description": "Excision of benign lesion greater than 1.25 cm", + "PriceLTEQ21": "254", + "PriceGT21": "208" + }, + { + "Procedure Code": "D7450", + "Description": "Removal of benign odontogenic cyst or tumor - lesion diameter up to 1.25 cm", + "PriceLTEQ21": "252", + "PriceGT21": "248" + }, + { + "Procedure Code": "D7451", + "Description": "Removal of benign odontogenic cyst or tumor - lesion diameter greater than 1.25 cm", + "PriceLTEQ21": "343", + "PriceGT21": "288" + }, + { + "Procedure Code": "D7460", + "Description": "Removal of benign nonodontogenic cyst or tumor - lesion diameter up to 1.25 cm", + "PriceLTEQ21": "142", + "PriceGT21": "121" + }, + { + "Procedure Code": "D7461", + "Description": "Removal of benign nonodontogenic cyst or tumor - lesion diameter greater than 1.25 cm", + "PriceLTEQ21": "194", + "PriceGT21": "143" + }, + { + "Procedure Code": "D7471", + "Description": "Removal of lateral exostosis (maxilla or mandible) (Oral surgeon only)", + "PriceLTEQ21": "194", + "PriceGT21": "143" + }, + { + "Procedure Code": "D7472", + "Description": "Removal of torus palatinus (Oral surgeon only)", + "PriceLTEQ21": "194", + "PriceGT21": "143" + }, + { + "Procedure Code": "D7473", + "Description": "Removal of torus mandibularis (Oral surgeon only)", + "PriceLTEQ21": "194", + "PriceGT21": "143" + }, + { + "Procedure Code": "D7961", + "Description": "Buccal/labial frenectomy (frenulectomy)", + "PriceLTEQ21": "353", + "PriceGT21": "107" + }, + { + "Procedure Code": "D7962", + "Description": "Lingual frenectomy (frenulectomy)", + "PriceLTEQ21": "353", + "PriceGT21": "107" + }, + { + "Procedure Code": "D7963", + "Description": "Frenuloplasty", + "PriceLTEQ21": "480", + "PriceGT21": "416" + }, + { + "Procedure Code": "D7970", + "Description": "Excision of hyperplastic tissue - per arch", + "PriceLTEQ21": "334", + "PriceGT21": "246" + }, + { + "Procedure Code": "D7999", + "Description": "Unspecified oral surgery procedure, by report", + "PriceLTEQ21": "IC", + "PriceGT21": "IC" + }, + { + "Procedure Code": "D8010", + "Description": "Limited orthodontic treamtnent of the primary transition (Orthodontist only)", + "PriceLTEQ21": "250", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D8020", + "Description": "Limited orthodontic treatment of the transitional dentition (Orthodontist only)", + "PriceLTEQ21": "250", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D8030", + "Description": "Limited orthodontic treatment of the adolescent dentition (Orthodontist only)", + "PriceLTEQ21": "250", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D8040", + "Description": "Limited orthodontic treatment of the adult dentition (Orthodontist only)", + "PriceLTEQ21": "250", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D8070", + "Description": "Comprehensive orthodontic treatment of the transitional dentition (Orthodontist only)", + "PriceLTEQ21": "1302", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D8080", + "Description": "Comprehensive orthodontic treatment of the adolescent dentition (Orthodontist only)", + "PriceLTEQ21": "1302", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D8090", + "Description": "Comprehensive orthodontic treatment of the adult dentition (Orthodontist only)", + "PriceLTEQ21": "1302", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D8660", + "Description": "Pre-orthodontic treatment examination to monitor growth and development (records fee) (Orthodontist only)", + "PriceLTEQ21": "136", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D8670", + "Description": "Periodic orthodontic treatment visit (Orthodontist only)", + "PriceLTEQ21": "288", + "PriceGT21": "215" + }, + { + "Procedure Code": "D8680", + "Description": "Orthodontic retention (removal of appliances, construction and placement of retainer(s)) (Orthodontist only)", + "PriceLTEQ21": "102", + "PriceGT21": "85" + }, + { + "Procedure Code": "D8703", + "Description": "Replacement of lost or broken retainer- maxillary (Orthodontist only)", + "PriceLTEQ21": "95", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D8704", + "Description": "Replacement of lost or broken retainer- mandibular (Orthodontist only)", + "PriceLTEQ21": "95", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D8999", + "Description": "Unspecified orthodontic procedure, by report (Orthodontist only) I.C I.C** Y Y**", + "PriceLTEQ21": "IC", + "PriceGT21": "IC" + }, + { + "Procedure Code": "D9110", + "Description": "Palliative treatment of dental pain – per visit", + "PriceLTEQ21": "75", + "PriceGT21": "36" + }, + { + "Procedure Code": "D9222", + "Description": "Deep sedation/general anesthesia – first 15 minutes", + "PriceLTEQ21": "109", + "PriceGT21": "90" + }, + { + "Procedure Code": "D9223", + "Description": "Deep sedation/general anesthesia – each additional 15- minute increment", + "PriceLTEQ21": "109", + "PriceGT21": "90" + }, + { + "Procedure Code": "D9230", + "Description": "Analgesia, anxiolysis, inhalation of nitrous oxide", + "PriceLTEQ21": "22", + "PriceGT21": "15" + }, + { + "Procedure Code": "D9248", + "Description": "Nonintravenous conscious sedation", + "PriceLTEQ21": "45", + "PriceGT21": "45" + }, + { + "Procedure Code": "D9310", + "Description": "Consultation- Diagnostic service provided by dentist or physician other than requesting dentist or physician (Specialist only)", + "PriceLTEQ21": "54", + "PriceGT21": "63" + }, + { + "Procedure Code": "D9410", + "Description": "House/extended care facility call, once per facility per day", + "PriceLTEQ21": "36", + "PriceGT21": "39" + }, + { + "Procedure Code": "D9450", + "Description": "Rural add-on encounter payment", + "PriceLTEQ21": "31", + "PriceGT21": "31" + }, + { + "Procedure Code": "D9920", + "Description": "Behavior management, by report", + "PriceLTEQ21": "86", + "PriceGT21": "86" + }, + { + "Procedure Code": "D9930", + "Description": "Treatment of complications (postsurgical) - unusual circumstances, by report", + "PriceLTEQ21": "66", + "PriceGT21": "30" + }, + { + "Procedure Code": "D9941", + "Description": "Fabrication of athletic mouthguard", + "PriceLTEQ21": "85", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D9944", + "Description": "Occlusal guard - hard appliance, full arch", + "PriceLTEQ21": "308", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D9945", + "Description": "Occlusal guard - soft appliance, full arch", + "PriceLTEQ21": "308", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D9946", + "Description": "Occlusal guard - hard appliance, partial arch", + "PriceLTEQ21": "308", + "PriceGT21": "NC" + }, + { + "Procedure Code": "D9999", + "Description": "Unspecified adjunctive procedure, by report", + "PriceLTEQ21": "IC", + "PriceGT21": "IC" + } +] diff --git a/apps/ProcedureCodeFromMhPdf/procedureCodesOld.json b/apps/ProcedureCodeFromMhPdf/procedureCodesOld.json new file mode 100644 index 0000000..1df028e --- /dev/null +++ b/apps/ProcedureCodeFromMhPdf/procedureCodesOld.json @@ -0,0 +1,344 @@ +[ + { + "Procedure Code": "D1999", + "Description": "", + "Price": "50" + }, + { + "Procedure Code": "D0120", + "Description": "perio exam", + "Price": "105" + }, + { + "Procedure Code": "D0140", + "Description": "limited exam", + "Price": "90" + }, + { + "Procedure Code": "D0150", + "Description": "comprehensive exam", + "Price": "120" + }, + { + "Procedure Code": "D0210", + "Description": "Fmx.", + "Price": "120" + }, + { + "Procedure Code": "D0220", + "Description": "first PA.", + "Price": "60" + }, + { + "Procedure Code": "D0230", + "Description": "2nd PA.", + "Price": "50" + }, + { + "Procedure Code": "D0330", + "Description": "pano", + "Price": "150" + }, + { + "Procedure Code": "D0272", + "Description": "2 BW", + "Price": "80" + }, + { + "Procedure Code": "D0274", + "Description": "4BW", + "Price": "160" + }, + { + "Procedure Code": "D1110", + "Description": "adult prophy", + "Price": "150" + }, + { + "Procedure Code": "D1120", + "Description": "child prophy", + "Price": "120" + }, + { + "Procedure Code": "D1351", + "Description": "sealant", + "Price": "80" + }, + { + "Procedure Code": "D4341", + "Description": "srp", + "Price": "250" + }, + { + "Procedure Code": "D4910", + "Description": "perio maintains", + "Price": "250" + }, + { + "Procedure Code": "D1208", + "Description": "FL", + "Price": "90" + }, + { + "Procedure Code": "D2330", + "Description": "front composite. 1 s.", + "Price": "180" + }, + { + "Procedure Code": "D2331", + "Description": "2s", + "Price": "220" + }, + { + "Procedure Code": "D2332", + "Description": "3s", + "Price": "280" + }, + { + "Procedure Code": "D2335", + "Description": "4s or more", + "Price": "350" + }, + { + "Procedure Code": "D2391", + "Description": "back. 1s", + "Price": "200" + }, + { + "Procedure Code": "D2392", + "Description": "2s", + "Price": "250" + }, + { + "Procedure Code": "D2393", + "Description": "3s", + "Price": "280" + }, + { + "Procedure Code": "D2394", + "Description": "4s", + "Price": "320" + }, + { + "Procedure Code": "D2140", + "Description": "amalgam, one surface", + "Price": "150" + }, + { + "Procedure Code": "D2150", + "Description": "amalgam, two surface", + "Price": "200" + }, + { + "Procedure Code": "D2750", + "Description": "high noble", + "Price": "1300" + }, + { + "Procedure Code": "D2751", + "Description": "base metal", + "Price": "1200" + }, + { + "Procedure Code": "D2740", + "Description": "crown porcelain", + "Price": "1300" + }, + { + "Procedure Code": "D2954", + "Description": "p/c", + "Price": "450" + }, + { + "Procedure Code": "D7910", + "Description": "suture, small wound up to 5 mm", + "Price": "400" + }, + { + "Procedure Code": "D5110", + "Description": "FU", + "Price": "1200", + "Full Price": "1700" + }, + { + "Procedure Code": "D5120", + "Description": "FL", + "Price": "1700", + "Full Price": "1700" + }, + { + "Procedure Code": "D5211", + "Description": "pu", + "Price": "1300" + }, + { + "Procedure Code": "D5212", + "Description": "pl", + "Price": "1300" + }, + { + "Procedure Code": "D5213", + "Description": "cast pu.", + "Price": "1700" + }, + { + "Procedure Code": "D5214", + "Description": "cast pl", + "Price": "1700" + }, + { + "Procedure Code": "D5510", + "Description": "Repair broken complete denture base (QUAD)", + "Price": "400" + }, + { + "Procedure Code": "D5520", + "Description": "Replace missing or broken teeth - complete denture (each tooth) (TOOTH)", + "Price": "200" + }, + { + "Procedure Code": "D5750", + "Description": "lab reline", + "Price": "600" + }, + { + "Procedure Code": "D5730", + "Description": "chairside reline", + "Price": "500" + }, + { + "Procedure Code": "D2920", + "Description": "re cement crown", + "Price": "120" + }, + { + "Procedure Code": "D2950", + "Description": "core buildup", + "Price": "350" + }, + { + "Procedure Code": "D2955", + "Description": "post renoval", + "Price": "350" + }, + { + "Procedure Code": "D6100", + "Description": "", + "Price": "320" + }, + { + "Procedure Code": "D6110", + "Description": "implant", + "Price": "1600" + }, + { + "Procedure Code": "D6056", + "Description": "pre fab abut", + "Price": "750" + }, + { + "Procedure Code": "D6057", + "Description": "custom abut", + "Price": "800" + }, + { + "Procedure Code": "D6058", + "Description": "porcelain, implant crown, ceramic crown", + "Price": "1400" + }, + { + "Procedure Code": "D6059", + "Description": "", + "Price": "1400" + }, + { + "Procedure Code": "D6242", + "Description": "noble metal. For united", + "Price": "1400" + }, + { + "Procedure Code": "D6245", + "Description": "porcelain, not for united", + "Price": "1400" + }, + { + "Procedure Code": "D0367", + "Description": "", + "Price": "400" + }, + { + "Procedure Code": "D0364", + "Description": "Less than one jaw", + "Price": "350" + }, + { + "Procedure Code": "D0365", + "Description": "Mand", + "Price": "350" + }, + { + "Procedure Code": "D0366", + "Description": "Max", + "Price": "350" + }, + { + "Procedure Code": "D0368", + "Description": "include TMJ", + "Price": "375" + }, + { + "Procedure Code": "D0383", + "Description": "", + "Price": "350" + }, + { + "Procedure Code": "D0380", + "Description": "Less than one jaw", + "Price": "300" + }, + { + "Procedure Code": "D0381", + "Description": "Mand", + "Price": "300" + }, + { + "Procedure Code": "D0382", + "Description": "Max", + "Price": "300" + }, + { + "Procedure Code": "D7950", + "Description": "max", + "Price": "800" + }, + { + "Procedure Code": "D7140", + "Description": "simple ext", + "Price": "150" + }, + { + "Procedure Code": "D7210", + "Description": "surgical ext", + "Price": "280" + }, + { + "Procedure Code": "D7220", + "Description": "soft impacted", + "Price": "380" + }, + { + "Procedure Code": "D7230", + "Description": "partial bony", + "Price": "450" + }, + { + "Procedure Code": "D7240", + "Description": "fully bony", + "Price": "550" + }, + { + "Procedure Code": "D3320", + "Description": "pre M RCT", + "Price": "1050" + } +] \ No newline at end of file