Claim form - combos buttons feature 1

This commit is contained in:
2025-08-28 00:19:54 +05:30
parent 09ae4df819
commit 269cdf29b3
11 changed files with 3540 additions and 475 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -10,7 +10,7 @@ import {
} from "@/components/ui/select"; } from "@/components/ui/select";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { Label } from "@/components/ui/label"; import { Label } from "@/components/ui/label";
import { X, Calendar as CalendarIcon, HelpCircle } from "lucide-react"; import { X, Calendar as CalendarIcon, HelpCircle, Trash2 } from "lucide-react";
import { useToast } from "@/hooks/use-toast"; import { useToast } from "@/hooks/use-toast";
import { Calendar } from "@/components/ui/calendar"; import { Calendar } from "@/components/ui/calendar";
import { import {
@@ -27,7 +27,6 @@ import {
TooltipContent, TooltipContent,
TooltipTrigger, TooltipTrigger,
} from "@/components/ui/tooltip"; } from "@/components/ui/tooltip";
import procedureCodes from "../../assets/data/procedureCodes.json";
import { formatLocalDate, parseLocalDate } from "@/utils/dateUtils"; import { formatLocalDate, parseLocalDate } from "@/utils/dateUtils";
import { import {
Claim, Claim,
@@ -39,6 +38,12 @@ import {
UpdatePatient, UpdatePatient,
} from "@repo/db/types"; } from "@repo/db/types";
import { Decimal } from "decimal.js"; import { Decimal } from "decimal.js";
import {
COMBO_BUTTONS,
mapPricesForForm,
applyComboToForm,
getDescriptionForCode,
} from "@/utils/procedureCombosMapping";
interface ClaimFormData { interface ClaimFormData {
patientId: number; patientId: number;
@@ -252,25 +257,13 @@ export function ClaimForm({
}; };
// Map Price function // Map Price function
const mapPrices = () => { const onMapPrice = () => {
const updatedLines = form.serviceLines.map((line) => { setForm((prev) =>
if (line.procedureCode && line.procedureCode.trim() !== "") { mapPricesForForm({
const normalizedCode = line.procedureCode.toUpperCase().trim(); form: prev,
const procedureInfo = procedureCodes.find( patientDOB: patient?.dateOfBirth ?? "",
(p) => p["Procedure Code"].toUpperCase().trim() === normalizedCode })
); );
if (procedureInfo && procedureInfo.Price) {
return {
...line,
totalBilled: new Decimal(parseFloat(procedureInfo.Price)),
};
}
}
return line;
});
setForm({ ...form, serviceLines: updatedLines });
}; };
// FILE UPLOAD ZONE // FILE UPLOAD ZONE
@@ -554,9 +547,13 @@ export function ClaimForm({
</div> </div>
</div> </div>
<div className="grid grid-cols-7 gap-4 mb-2 font-medium text-sm text-gray-700"> {/* Header */}
<span>Procedure Code</span> <div className="grid grid-cols-[1.5fr,0.5fr,1fr,1fr,1fr,1fr,1fr] gap-1 mb-2 font-medium text-sm text-gray-700 items-center">
<span>Abbreviation</span> <div className="grid grid-cols-[auto,1fr] items-center gap-2">
<span />
<span className="pl-8">Procedure Code</span>
</div>
<span className="justify-self-center">Info</span>
<span>Procedure Date</span> <span>Procedure Date</span>
<span>Oral Cavity Area</span> <span>Oral Cavity Area</span>
<span>Tooth Number</span> <span>Tooth Number</span>
@@ -565,116 +562,128 @@ export function ClaimForm({
</div> </div>
{/* Dynamic Rows */} {/* Dynamic Rows */}
{form.serviceLines.map((line, i) => ( {form.serviceLines.map((line, i) => {
<div key={i} className="grid grid-cols-7 gap-1 mb-2"> const raw = line.procedureCode || "";
<Input const code = raw.trim();
placeholder="eg. D0120" const desc = code
value={line.procedureCode} ? getDescriptionForCode(code) || "No description available"
onChange={(e) => : "Enter a procedure code";
updateServiceLine(
i,
"procedureCode",
e.target.value.toUpperCase()
)
}
/>
<div className="flex items-center justify-center"> return (
<Tooltip> <div
<TooltipTrigger asChild> key={i}
<HelpCircle className="h-4 w-4 text-gray-400 hover:text-gray-600 cursor-help" /> className="grid grid-cols-[1.5fr,0.5fr,1fr,1fr,1fr,1fr,1fr] gap-1 mb-2 items-center"
</TooltipTrigger> >
<TooltipContent> <div className="grid grid-cols-[auto,1fr] items-center gap-2">
<div className="text-sm"> <button
{line.procedureCode && type="button"
line.procedureCode.trim() !== "" onClick={() =>
? (() => { setForm((prev) => {
const normalizedCode = line.procedureCode const next = {
.toUpperCase() ...prev,
.trim(); serviceLines: [...prev.serviceLines],
const procedureInfo = procedureCodes.find( };
(p) => next.serviceLines.splice(i, 1);
p["Procedure Code"].toUpperCase().trim() === return next;
normalizedCode })
); }
return procedureInfo className="p-1 rounded hover:bg-red-50"
? procedureInfo.Description ||
"No description available"
: "Enter a valid procedure code";
})()
: "Enter a procedure code"}
</div>
</TooltipContent>
</Tooltip>
</div>
{/* Date Picker */}
<Popover>
<PopoverTrigger asChild>
<Button
variant="outline"
className="w-full text-left font-normal"
> >
<CalendarIcon className="mr-2 h-4 w-4" /> <Trash2 className="h-4 w-4 text-red-500 hover:text-red-700" />
{line.procedureDate || "Pick Date"} </button>
</Button> <Input
</PopoverTrigger> placeholder="eg. D0120"
<PopoverContent className="w-auto p-0"> value={line.procedureCode}
<Calendar onChange={(e) =>
mode="single" updateServiceLine(
selected={new Date(line.procedureDate)} i,
onSelect={(date) => updateProcedureDate(i, date)} "procedureCode",
e.target.value.toUpperCase()
)
}
/> />
</PopoverContent> </div>
</Popover>
<Input <div className="flex justify-center">
placeholder="Oral Cavity Area" <Tooltip>
value={line.oralCavityArea} <TooltipTrigger asChild>
onChange={(e) => <HelpCircle className="h-4 w-4 text-gray-400 hover:text-gray-600 cursor-help" />
updateServiceLine(i, "oralCavityArea", e.target.value) </TooltipTrigger>
} <TooltipContent>
/> <div className="text-sm">{desc}</div>
<Input </TooltipContent>
placeholder="eg. 14" </Tooltip>
value={line.toothNumber} </div>
onChange={(e) =>
updateServiceLine(i, "toothNumber", e.target.value) {/* Date Picker */}
} <Popover>
/> <PopoverTrigger asChild>
<Input <Button
placeholder="eg. 'B', 'D', 'F', 'I', 'L', 'M', 'O'" variant="outline"
value={line.toothSurface} className="w-full text-left font-normal"
onChange={(e) => >
updateServiceLine(i, "toothSurface", e.target.value) <CalendarIcon className="mr-2 h-4 w-4" />
} {line.procedureDate || "Pick Date"}
/> </Button>
<Input </PopoverTrigger>
type="number" <PopoverContent className="w-auto p-0">
step="0.01" <Calendar
placeholder="$0.00" mode="single"
value={ selected={new Date(line.procedureDate)}
line.totalBilled?.toNumber() === 0 onSelect={(date) => updateProcedureDate(i, date)}
? "" />
: line.totalBilled?.toNumber() </PopoverContent>
} </Popover>
onChange={(e) => {
updateServiceLine(i, "totalBilled", e.target.value); <Input
}} placeholder="Oral Cavity Area"
onBlur={(e) => { value={line.oralCavityArea}
const val = parseFloat(e.target.value); onChange={(e) =>
const rounded = Math.round(val * 100) / 100; updateServiceLine(i, "oralCavityArea", e.target.value)
updateServiceLine( }
i, />
"totalBilled", <Input
isNaN(rounded) ? 0 : rounded placeholder="eg. 14"
); value={line.toothNumber}
}} onChange={(e) =>
/> updateServiceLine(i, "toothNumber", e.target.value)
</div> }
))} />
<Input
placeholder="eg. 'B', 'D', 'F', 'I', 'L', 'M', 'O'"
value={line.toothSurface}
onChange={(e) =>
updateServiceLine(i, "toothSurface", e.target.value)
}
/>
<Input
type="number"
step="0.01"
placeholder="$0.00"
value={
line.totalBilled?.toNumber() === 0
? ""
: line.totalBilled?.toNumber()
}
onChange={(e) => {
updateServiceLine(i, "totalBilled", e.target.value);
}}
onBlur={(e) => {
const val = parseFloat(e.target.value);
const rounded = Math.round(val * 100) / 100;
updateServiceLine(
i,
"totalBilled",
isNaN(rounded) ? 0 : rounded
);
}}
/>
</div>
);
})}
<Button <Button
className="mt-2"
variant="outline" variant="outline"
onClick={() => onClick={() =>
setForm((prev) => ({ setForm((prev) => ({
@@ -699,10 +708,28 @@ export function ClaimForm({
</Button> </Button>
<div className="flex gap-2 mt-10 mb-10"> <div className="flex gap-2 mt-10 mb-10">
<Button variant="outline">Child Prophy Codes</Button> {COMBO_BUTTONS.map((b) => (
<Button variant="outline">Adult Prophy Codes</Button> <Button
<Button variant="outline">Customized Group Codes</Button> key={b.id}
<Button variant="success" onClick={mapPrices}> variant="secondary"
onClick={() =>
setForm((prev) =>
applyComboToForm(
prev,
b.id as any,
patient?.dateOfBirth ?? "",
{
replaceAll: true,
lineDate: prev.serviceDate,
}
)
)
}
>
{b.label}
</Button>
))}
<Button variant="success" onClick={onMapPrice}>
Map Price Map Price
</Button> </Button>
</div> </div>

View File

@@ -0,0 +1,37 @@
export const PROCEDURE_COMBOS: Record<
string,
{ id: string; label: string; codes: string[] }
> = {
childRecall: {
id: "childRecall",
label: "Child Recall",
codes: [
"D0120",
"D1120",
"D0272",
"D1208",
"D2331",
"D0120",
"D1120",
"D0272",
"D1208",
"D2331",
"D0120",
"D1120",
"D0272",
"D1208",
"D2331",
],
},
adultProphy: {
id: "adultProphy",
label: "Adult Prophy",
codes: ["D0150", "D1110", "D0274", "D1208"],
},
bitewingsOnly: {
id: "bitewingsOnly",
label: "Bitewings Only",
codes: ["D0272"],
},
// add more…
};

View File

@@ -0,0 +1,270 @@
import { InputServiceLine } from "@repo/db/types";
import Decimal from "decimal.js";
import rawCodeTable from "@/assets/data/procedureCodes.json";
import { PROCEDURE_COMBOS } from "./procedureCombos";
/* ----------------------------- Types ----------------------------- */
export type CodeRow = {
"Procedure Code": string;
Description?: string;
Price?: string | number | null;
PriceLTEQ21?: string | number | null;
PriceGT21?: string | number | null;
[k: string]: unknown;
};
const CODE_TABLE = rawCodeTable as CodeRow[];
export type ClaimFormLike = {
serviceDate: string; // form-level service date
serviceLines: InputServiceLine[];
};
export type ApplyOptions = {
append?: boolean;
startIndex?: number;
lineDate?: string;
clearTrailing?: boolean;
replaceAll?: boolean;
};
/* ----------------------------- Helpers ----------------------------- */
export const COMBO_BUTTONS = Object.values(PROCEDURE_COMBOS).map((c) => ({
id: c.id,
label: c.label,
}));
// Build a fast lookup map keyed by normalized code
const normalizeCode = (code: string) => code.replace(/\s+/g, "").toUpperCase();
const CODE_MAP: Map<string, CodeRow> = (() => {
const m = new Map<string, CodeRow>();
for (const r of CODE_TABLE) {
const k = normalizeCode(String(r["Procedure Code"] || ""));
if (k && !m.has(k)) m.set(k, r);
}
return m;
})();
// this function is solely for abbrevations feature in claim-form
export function getDescriptionForCode(
code: string | undefined
): string | undefined {
if (!code) return undefined;
const row = CODE_MAP.get(normalizeCode(code));
return row?.Description;
}
const isBlankPrice = (v: unknown) => {
if (v == null) return true;
const s = String(v).trim().toUpperCase();
return s === "" || s === "IC" || s === "NC";
};
const toDecimalOrZero = (v: unknown): Decimal => {
if (isBlankPrice(v)) return new Decimal(0);
const n = typeof v === "string" ? parseFloat(v) : (v as number);
return new Decimal(Number.isFinite(n) ? n : 0);
};
// Accepts string or Date, supports MM/DD/YYYY and YYYY-MM-DD
type DateInput = string | Date;
const parseDate = (d: DateInput): Date => {
if (d instanceof Date)
return new Date(d.getFullYear(), d.getMonth(), d.getDate());
const s = String(d).trim();
// MM/DD/YYYY
const mdy = /^(\d{2})\/(\d{2})\/(\d{4})$/;
const m1 = mdy.exec(s);
if (m1) {
const mm = Number(m1[1]);
const dd = Number(m1[2]);
const yyyy = Number(m1[3]);
return new Date(yyyy, mm - 1, dd);
}
// YYYY-MM-DD
const ymd = /^(\d{4})-(\d{2})-(\d{2})$/;
const m2 = ymd.exec(s);
if (m2) {
const yyyy = Number(m2[1]);
const mm = Number(m2[2]);
const dd = Number(m2[3]);
return new Date(yyyy, mm - 1, dd);
}
// Fallback
return new Date(s);
};
const ageOnDate = (dob: DateInput, on: DateInput): number => {
const birth = parseDate(dob);
const ref = parseDate(on);
let age = ref.getFullYear() - birth.getFullYear();
const hadBirthday =
ref.getMonth() > birth.getMonth() ||
(ref.getMonth() === birth.getMonth() && ref.getDate() >= birth.getDate());
if (!hadBirthday) age -= 1;
return age;
};
/**
* Price chooser that respects your age rules and IC/NC semantics.
* - If <=21 → PriceLTEQ21 (if present and not IC/NC/blank) else Price.
* - If >21 → PriceGT21 (if present and not IC/NC/blank) else Price.
* - If chosen field is IC/NC/blank → 0 (leave empty).
*/
export function pickPriceForRowByAge(row: CodeRow, age: number): Decimal {
if (age <= 21) {
if (!isBlankPrice(row.PriceLTEQ21)) return toDecimalOrZero(row.PriceLTEQ21);
} else {
if (!isBlankPrice(row.PriceGT21)) return toDecimalOrZero(row.PriceGT21);
}
// Fallback to Price if tiered not available/blank
if (!isBlankPrice(row.Price)) return toDecimalOrZero(row.Price);
return new Decimal(0);
}
/**
* Gets price for a code using age & code table.
*/
function getPriceForCodeWithAgeFromMap(
map: Map<string, CodeRow>,
code: string,
age: number
): Decimal {
const row = map.get(normalizeCode(code));
return row ? pickPriceForRowByAge(row, age) : new Decimal(0);
}
// helper keeping lines empty,
export const makeEmptyLine = (lineDate: string): InputServiceLine => ({
procedureCode: "",
procedureDate: lineDate,
oralCavityArea: "",
toothNumber: "",
toothSurface: "",
totalBilled: new Decimal(0),
totalAdjusted: new Decimal(0),
totalPaid: new Decimal(0),
});
// Ensure the array has at least `min` lines; append blank ones if needed.
const ensureCapacity = (
lines: (InputServiceLine | undefined)[],
min: number,
lineDate: string
) => {
while (lines.length < min) {
lines.push(makeEmptyLine(lineDate));
}
};
/* ------------------------- Main entry points ------------------------- */
/**
* Map prices for ALL existing lines in a form (your "Map Price" button),
* using patient's DOB and the form's serviceDate (or per-line procedureDate).
* Returns a NEW form object (immutable).
*/
export function mapPricesForForm<T extends ClaimFormLike>(params: {
form: T;
patientDOB: DateInput;
}): T {
const { form, patientDOB } = params;
return {
...form,
serviceLines: form.serviceLines.map((ln) => {
const age = ageOnDate(patientDOB, form.serviceDate);
const code = normalizeCode(ln.procedureCode || "");
if (!code) return { ...ln };
const price = getPriceForCodeWithAgeFromMap(CODE_MAP, code, age);
return { ...ln, procedureCode: code, totalBilled: price };
}),
};
}
/**
* Apply a preset combo (fills codes & prices) using patientDOB and serviceDate.
* Returns a NEW form object (immutable).
*/
export function applyComboToForm<T extends ClaimFormLike>(
form: T,
comboId: keyof typeof PROCEDURE_COMBOS,
patientDOB: DateInput,
options: ApplyOptions = {}
): T {
const preset = PROCEDURE_COMBOS[String(comboId)];
if (!preset) return form;
const {
append = true,
startIndex,
lineDate = form.serviceDate,
clearTrailing = false,
replaceAll = false, // NEW
} = options;
const next: T = { ...form, serviceLines: [...form.serviceLines] };
// Replace-all: blank all existing and start from 0
if (replaceAll) {
for (let i = 0; i < next.serviceLines.length; i++) {
next.serviceLines[i] = makeEmptyLine(lineDate);
}
}
// determine insertion index
let insertAt = 0;
if (!replaceAll) {
if (append) {
let last = -1;
next.serviceLines.forEach((ln, i) => {
if (ln.procedureCode?.trim()) last = i;
});
insertAt = Math.max(0, last + 1);
} else if (typeof startIndex === "number") {
insertAt = Math.max(
0,
Math.min(startIndex, next.serviceLines.length - 1)
);
}
} // if replaceAll, insertAt stays 0
// Make sure we have enough rows for the whole combo
ensureCapacity(next.serviceLines, insertAt + preset.codes.length, lineDate);
// Age on the specific line date we will set
const age = ageOnDate(patientDOB, lineDate);
for (let j = 0; j < preset.codes.length; j++) {
const i = insertAt + j;
if (i >= next.serviceLines.length) break;
const codeRaw = preset.codes[j];
if (!codeRaw) continue;
const code = normalizeCode(codeRaw);
const price = getPriceForCodeWithAgeFromMap(CODE_MAP, code, age);
const original = next.serviceLines[i];
next.serviceLines[i] = {
...original,
procedureCode: code,
procedureDate: lineDate,
oralCavityArea: original?.oralCavityArea ?? "",
toothNumber: original?.toothNumber ?? "",
toothSurface: original?.toothSurface ?? "",
totalBilled: price,
totalAdjusted: new Decimal(0),
totalPaid: new Decimal(0),
} as InputServiceLine;
}
if (replaceAll || clearTrailing) {
const after = insertAt + preset.codes.length;
for (let i = after; i < next.serviceLines.length; i++) {
next.serviceLines[i] = makeEmptyLine(lineDate);
}
}
return next;
}

Binary file not shown.

View File

@@ -0,0 +1,96 @@
#!/usr/bin/env python3
"""
Compare a main dental JSON file with one or more other JSON files and
return all records whose 'Procedure Code' is NOT present in the main file.
- Matching key: 'Procedure Code' (case-insensitive, trimmed).
- Keeps the full record from the other files (including extra fields like 'Full Price').
- Deduplicates by Procedure Code across the collected "missing" results.
CONFIG: set MAIN_PATH, OTHER_PATHS, OUT_PATH below.
"""
import json
from pathlib import Path
from typing import List, Dict, Any
# =========================
# CONFIG — EDIT THESE ONLY
# =========================
MAIN_PATH = "procedureCodesMain.json" # your main JSON (with PriceLTEQ21/PriceGT21)
OTHER_PATHS = [
"procedureCodesOld.json", # one or more other JSON files to compare against the main
# "other2.json",
]
OUT_PATH = "not_in_main.json" # where to write the results
# =========================
def _load_json_any(path: str) -> List[Dict[str, Any]]:
"""
Load JSON. Accept:
- a list of objects
- a single object (wraps into a list)
"""
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, dict):
return [data]
if isinstance(data, list):
# filter out non-dict items defensively
return [x for x in data if isinstance(x, dict)]
raise ValueError(f"Unsupported JSON top-level type in {path}: {type(data)}")
def _norm_code(record: Dict[str, Any]) -> str:
# Normalize the 'Procedure Code' for matching
code = str(record.get("Procedure Code", "")).strip().upper()
# Some PDFs might have stray spaces, tabs, or zero-width chars
code = "".join(ch for ch in code if not ch.isspace())
return code
def collect_main_codes(main_path: str) -> set:
main_items = _load_json_any(main_path)
codes = {_norm_code(rec) for rec in main_items if _norm_code(rec)}
return codes
def collect_missing_records(other_paths: List[str], main_codes: set) -> List[Dict[str, Any]]:
missing: Dict[str, Dict[str, Any]] = {} # map normalized code -> record
for p in other_paths:
items = _load_json_any(p)
for rec in items:
code_norm = _norm_code(rec)
if not code_norm:
continue
if code_norm not in main_codes and code_norm not in missing:
# Keep the full original record
missing[code_norm] = rec
# return in a stable, sorted order by code
return [missing[k] for k in sorted(missing.keys())]
def main():
# Validate files exist
if not Path(MAIN_PATH).exists():
raise FileNotFoundError(f"Main file not found: {MAIN_PATH}")
for p in OTHER_PATHS:
if not Path(p).exists():
raise FileNotFoundError(f"Other file not found: {p}")
main_codes = collect_main_codes(MAIN_PATH)
missing_records = collect_missing_records(OTHER_PATHS, main_codes)
with open(OUT_PATH, "w", encoding="utf-8") as f:
json.dump(missing_records, f, ensure_ascii=False, indent=2)
print(f"Main codes: {len(main_codes)}")
print(f"Missing from main: {len(missing_records)}")
print(f"Wrote results to {OUT_PATH}")
# Also echo to stdout
print(json.dumps(missing_records, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,183 @@
import re
import json
from typing import List, Dict
import fitz # PyMuPDF
# =========================
# CONFIG — EDIT THESE ONLY
# =========================
PDF_PATH = "MH.pdf" # path to your PDF
PAGES = [2] # 0-based page indexes to parse, e.g., [2] for the page you showed
OUT_PATH = "output.json" # where to write JSON
FIRST_PRICE_IS_LTE21 = True # True => first price line is <=21; False => first price is >21
PRINT_PAGE_TEXT = False # set True if you want to print the raw page text for sanity check
# =========================
# --- patterns ---
code_line_re = re.compile(r"^\s*(D\d{4})\s*$")
# a price token is either '$123', '$1,234.50', '123', '123.45', or 'NC'
price_line_re = re.compile(r"^\s*(?:\$\s*)?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\d+(?:\.\d{2})?|NC)\s*$", re.IGNORECASE)
# lines that definitely start a notes block we should ignore once prices are done
note_starters_re = re.compile(r"^(Teeth\b|One of\b|--|—||Age limitation:|CR\b)", re.IGNORECASE)
def normalize_ws(s: str) -> str:
s = s.replace("\u00a0", " ")
s = re.sub(r"[ \t]+", " ", s)
s = re.sub(r"\s*\n\s*", " ", s)
s = re.sub(r"\s{2,}", " ", s)
return s.strip(" ,.;:-•·\n\t")
def clean_money(token: str) -> str:
if token.upper() == "NC":
return "NC"
return token.replace(",", "").lstrip("$").strip()
def get_page_lines(pdf_path: str, pages: List[int]) -> List[str]:
doc = fitz.open(pdf_path)
try:
max_idx = len(doc) - 1
for p in pages:
if p < 0 or p > max_idx:
raise ValueError(f"Invalid page index {p}. Valid range is 0..{max_idx}.")
lines: List[str] = []
for p in pages:
text = doc.load_page(p).get_text("text") or ""
if PRINT_PAGE_TEXT:
print(f"\n--- RAW PAGE {p} ---\n{text}")
# keep line boundaries; later we parse line-by-line
lines.extend(text.splitlines())
return lines
finally:
doc.close()
def extract_records(lines: List[str]) -> List[Dict[str, str]]:
out: List[Dict[str, str]] = []
i = 0
n = len(lines)
while i < n:
line = lines[i].strip()
# seek a code line
mcode = code_line_re.match(line)
if not mcode:
i += 1
continue
code = mcode.group(1)
i += 1
# gather description lines until we encounter price lines
desc_lines: List[str] = []
# skip blank lines before description
while i < n and not lines[i].strip():
i += 1
# collect description lines (usually 13) until first price token
# stop also if we accidentally hit another code (defensive)
j = i
while j < n:
s = lines[j].strip()
if not s:
# blank line inside description — consider description ended if the next is a price
# but we don't advance here; break and let price parsing handle it
break
if code_line_re.match(s):
# next code — no prices found; abandon this broken record
break
if price_line_re.match(s):
# reached price section
break
if note_starters_re.match(s):
# encountered a note before price — treat as end of description; prices may be missing
break
desc_lines.append(s)
j += 1
# advance i to where we left off
i = j
description = normalize_ws(" ".join(desc_lines))
# collect up to two price tokens
prices: List[str] = []
while i < n and len(prices) < 2:
s = lines[i].strip()
if not s:
i += 1
continue
if code_line_re.match(s):
# new record — stop; this means we never got prices (malformed)
break
mprice = price_line_re.match(s)
if mprice:
prices.append(clean_money(mprice.group(1)))
i += 1
continue
# if we encounter a note/flags block, skip forward until the next code/blank
if note_starters_re.match(s) or s in {"Y", "NC"}:
# skip this block quickly
i += 1
# keep skipping subsequent non-empty, non-code lines until a blank or next code
while i < n:
t = lines[i].strip()
if not t or code_line_re.match(t):
break
i += 1
# now let the outer loop proceed
continue
# unrecognized line: if prices already found, we can break; else skip
if prices:
break
i += 1
if len(prices) < 2:
# couldn't find 2 prices reliably; skip this record
continue
if FIRST_PRICE_IS_LTE21:
price_lte21, price_gt21 = prices[0], prices[1]
else:
price_lte21, price_gt21 = prices[1], prices[0]
out.append(
{
"Procedure Code": code,
"Description": description,
"PriceLTEQ21": price_lte21,
"PriceGT21": price_gt21,
}
)
# after prices, skip forward until next code or blank block end
while i < n:
s = lines[i].strip()
if not s:
i += 1
break
if code_line_re.match(s):
# next record will pick this up
break
i += 1
return out
def extract_pdf_to_json(pdf_path: str, pages: List[int], out_path: str) -> List[Dict[str, str]]:
lines = get_page_lines(pdf_path, pages)
data = extract_records(lines)
with open(out_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
return data
if __name__ == "__main__":
data = extract_pdf_to_json(PDF_PATH, PAGES, OUT_PATH)
print(f"Wrote {len(data)} rows to {OUT_PATH}")
print(json.dumps(data, ensure_ascii=False, indent=2))

View File

@@ -0,0 +1,208 @@
#!/usr/bin/env python3
"""
MassHealth dental PDF parser (PyMuPDF / fitz) — PAGE RANGE VERSION
Parses rows like:
D2160
Amalgam-three surfaces,
primary or permanent
$110
$92
Y
Y
...
Outputs a single JSON with records from the chosen page range (inclusive).
Config:
- PDF_PATH: path to the PDF
- PAGE_START, PAGE_END: 1-based page numbers (inclusive)
- FIRST_PRICE_IS_LTE21: True => first price line is <=21; False => first price is >21
- OUT_PATH: output JSON path
"""
import re
import json
from typing import List, Dict
import fitz # PyMuPDF
# =========================
# CONFIG — EDIT THESE ONLY
# =========================
PDF_PATH = "MH.pdf" # path to your PDF
PAGE_START = 1 # 1-based inclusive start page (e.g., 1)
PAGE_END = 12 # 1-based inclusive end page (e.g., 5)
OUT_PATH = "output.json" # single JSON file containing all parsed rows
FIRST_PRICE_IS_LTE21 = True # True => first price line is <=21; False => first price is >21
PRINT_PAGE_TEXT = False # set True to print raw text for each page
# =========================
# --- patterns ---
code_line_re = re.compile(r"^\s*(D\d{4})\s*$")
# a price token is either '$123', '$1,234.50', '123', '123.45', or 'NC'
price_line_re = re.compile(r"^\s*(?:\$\s*)?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\d+(?:\.\d{2})?|NC)\s*$", re.IGNORECASE)
# lines that definitely start a notes block to ignore once prices are done
note_starters_re = re.compile(r"^(Teeth\b|One of\b|--|—||Age limitation:|CR\b)", re.IGNORECASE)
def normalize_ws(s: str) -> str:
s = s.replace("\u00a0", " ")
s = re.sub(r"[ \t]+", " ", s)
s = re.sub(r"\s*\n\s*", " ", s)
s = re.sub(r"\s{2,}", " ", s)
return s.strip(" ,.;:-•·\n\t")
def clean_money(token: str) -> str:
if token.upper() == "NC":
return "NC"
return token.replace(",", "").lstrip("$").strip()
def get_page_lines(pdf_path: str, page_start_1b: int, page_end_1b: int) -> List[str]:
if page_start_1b <= 0 or page_end_1b <= 0:
raise ValueError("PAGE_START and PAGE_END must be >= 1 (1-based).")
if page_start_1b > page_end_1b:
raise ValueError("PAGE_START cannot be greater than PAGE_END.")
doc = fitz.open(pdf_path)
try:
last_idx_0b = len(doc) - 1
# convert to 0-based inclusive range
start_0b = page_start_1b - 1
end_0b = page_end_1b - 1
if start_0b < 0 or end_0b > last_idx_0b:
raise ValueError(f"Page range out of bounds. Valid 1-based range is 1..{last_idx_0b + 1}.")
lines: List[str] = []
for p in range(start_0b, end_0b + 1):
text = doc.load_page(p).get_text("text") or ""
if PRINT_PAGE_TEXT:
print(f"\n--- RAW PAGE {p} (0-based; shown as {p+1} 1-based) ---\n{text}")
lines.extend(text.splitlines())
return lines
finally:
doc.close()
def extract_records(lines: List[str]) -> List[Dict[str, str]]:
out: List[Dict[str, str]] = []
i = 0
n = len(lines)
while i < n:
line = lines[i].strip()
# seek a code line
mcode = code_line_re.match(line)
if not mcode:
i += 1
continue
code = mcode.group(1)
i += 1
# gather description lines until we encounter price lines
desc_lines: List[str] = []
# skip blank lines before description
while i < n and not lines[i].strip():
i += 1
# collect description lines (usually 13) until first price token
# stop also if we accidentally hit another code (defensive)
j = i
while j < n:
s = lines[j].strip()
if not s:
break
if code_line_re.match(s):
# next code — description ended abruptly (malformed)
break
if price_line_re.match(s):
# reached price section
break
if note_starters_re.match(s):
# encountered a note before price — treat as end of description; prices may be missing
break
desc_lines.append(s)
j += 1
# advance i to where we left off
i = j
description = normalize_ws(" ".join(desc_lines))
# collect up to two price tokens
prices: List[str] = []
while i < n and len(prices) < 2:
s = lines[i].strip()
if not s:
i += 1
continue
if code_line_re.match(s):
# new record — stop; this means we never got prices (malformed)
break
mprice = price_line_re.match(s)
if mprice:
prices.append(clean_money(mprice.group(1)))
i += 1
continue
# if we encounter a note/flags block, skip forward until a blank or next code
if note_starters_re.match(s) or s in {"Y", "NC"}:
i += 1
while i < n:
t = lines[i].strip()
if not t or code_line_re.match(t):
break
i += 1
continue
# unrecognized line: if we already captured some prices, break; else skip
if prices:
break
i += 1
if len(prices) < 2:
# couldn't find 2 prices reliably; skip this record
continue
if FIRST_PRICE_IS_LTE21:
price_lte21, price_gt21 = prices[0], prices[1]
else:
price_lte21, price_gt21 = prices[1], prices[0]
out.append(
{
"Procedure Code": code,
"Description": description,
"PriceLTEQ21": price_lte21,
"PriceGT21": price_gt21,
}
)
# after prices, skip forward until next code or blank block end
while i < n:
s = lines[i].strip()
if not s:
i += 1
break
if code_line_re.match(s):
break
i += 1
return out
def extract_pdf_range_to_json(pdf_path: str, page_start_1b: int, page_end_1b: int, out_path: str) -> List[Dict[str, str]]:
lines = get_page_lines(pdf_path, page_start_1b, page_end_1b)
data = extract_records(lines)
with open(out_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
return data
if __name__ == "__main__":
data = extract_pdf_range_to_json(PDF_PATH, PAGE_START, PAGE_END, OUT_PATH)
print(f"Wrote {len(data)} rows to {OUT_PATH}")
print(json.dumps(data, ensure_ascii=False, indent=2))

View File

@@ -0,0 +1,192 @@
[
{
"Procedure Code": "D0120",
"Description": "perio exam",
"Price": "105"
},
{
"Procedure Code": "D0140",
"Description": "limited exam",
"Price": "90"
},
{
"Procedure Code": "D0150",
"Description": "comprehensive exam",
"Price": "120"
},
{
"Procedure Code": "D0210",
"Description": "Fmx.",
"Price": "120"
},
{
"Procedure Code": "D0220",
"Description": "first PA.",
"Price": "60"
},
{
"Procedure Code": "D0230",
"Description": "2nd PA.",
"Price": "50"
},
{
"Procedure Code": "D0272",
"Description": "2 BW",
"Price": "80"
},
{
"Procedure Code": "D0274",
"Description": "4BW",
"Price": "160"
},
{
"Procedure Code": "D0330",
"Description": "pano",
"Price": "150"
},
{
"Procedure Code": "D0364",
"Description": "Less than one jaw",
"Price": "350"
},
{
"Procedure Code": "D0365",
"Description": "Mand",
"Price": "350"
},
{
"Procedure Code": "D0366",
"Description": "Max",
"Price": "350"
},
{
"Procedure Code": "D0367",
"Description": "",
"Price": "400"
},
{
"Procedure Code": "D0368",
"Description": "include TMJ",
"Price": "375"
},
{
"Procedure Code": "D0380",
"Description": "Less than one jaw",
"Price": "300"
},
{
"Procedure Code": "D0381",
"Description": "Mand",
"Price": "300"
},
{
"Procedure Code": "D0382",
"Description": "Max",
"Price": "300"
},
{
"Procedure Code": "D0383",
"Description": "",
"Price": "350"
},
{
"Procedure Code": "D1110",
"Description": "adult prophy",
"Price": "150"
},
{
"Procedure Code": "D1120",
"Description": "child prophy",
"Price": "120"
},
{
"Procedure Code": "D1208",
"Description": "FL",
"Price": "90"
},
{
"Procedure Code": "D1351",
"Description": "sealant",
"Price": "80"
},
{
"Procedure Code": "D1999",
"Description": "",
"Price": "50"
},
{
"Procedure Code": "D2140",
"Description": "amalgam, one surface",
"Price": "150"
},
{
"Procedure Code": "D2150",
"Description": "amalgam, two surface",
"Price": "200"
},
{
"Procedure Code": "D2955",
"Description": "post renoval",
"Price": "350"
},
{
"Procedure Code": "D4910",
"Description": "perio maintains",
"Price": "250"
},
{
"Procedure Code": "D5510",
"Description": "Repair broken complete denture base (QUAD)",
"Price": "400"
},
{
"Procedure Code": "D6056",
"Description": "pre fab abut",
"Price": "750"
},
{
"Procedure Code": "D6057",
"Description": "custom abut",
"Price": "800"
},
{
"Procedure Code": "D6058",
"Description": "porcelain, implant crown, ceramic crown",
"Price": "1400"
},
{
"Procedure Code": "D6059",
"Description": "",
"Price": "1400"
},
{
"Procedure Code": "D6100",
"Description": "",
"Price": "320"
},
{
"Procedure Code": "D6110",
"Description": "implant",
"Price": "1600"
},
{
"Procedure Code": "D6242",
"Description": "noble metal. For united",
"Price": "1400"
},
{
"Procedure Code": "D6245",
"Description": "porcelain, not for united",
"Price": "1400"
},
{
"Procedure Code": "D7910",
"Description": "suture, small wound up to 5 mm",
"Price": "400"
},
{
"Procedure Code": "D7950",
"Description": "max",
"Price": "800"
}
]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,344 @@
[
{
"Procedure Code": "D1999",
"Description": "",
"Price": "50"
},
{
"Procedure Code": "D0120",
"Description": "perio exam",
"Price": "105"
},
{
"Procedure Code": "D0140",
"Description": "limited exam",
"Price": "90"
},
{
"Procedure Code": "D0150",
"Description": "comprehensive exam",
"Price": "120"
},
{
"Procedure Code": "D0210",
"Description": "Fmx.",
"Price": "120"
},
{
"Procedure Code": "D0220",
"Description": "first PA.",
"Price": "60"
},
{
"Procedure Code": "D0230",
"Description": "2nd PA.",
"Price": "50"
},
{
"Procedure Code": "D0330",
"Description": "pano",
"Price": "150"
},
{
"Procedure Code": "D0272",
"Description": "2 BW",
"Price": "80"
},
{
"Procedure Code": "D0274",
"Description": "4BW",
"Price": "160"
},
{
"Procedure Code": "D1110",
"Description": "adult prophy",
"Price": "150"
},
{
"Procedure Code": "D1120",
"Description": "child prophy",
"Price": "120"
},
{
"Procedure Code": "D1351",
"Description": "sealant",
"Price": "80"
},
{
"Procedure Code": "D4341",
"Description": "srp",
"Price": "250"
},
{
"Procedure Code": "D4910",
"Description": "perio maintains",
"Price": "250"
},
{
"Procedure Code": "D1208",
"Description": "FL",
"Price": "90"
},
{
"Procedure Code": "D2330",
"Description": "front composite. 1 s.",
"Price": "180"
},
{
"Procedure Code": "D2331",
"Description": "2s",
"Price": "220"
},
{
"Procedure Code": "D2332",
"Description": "3s",
"Price": "280"
},
{
"Procedure Code": "D2335",
"Description": "4s or more",
"Price": "350"
},
{
"Procedure Code": "D2391",
"Description": "back. 1s",
"Price": "200"
},
{
"Procedure Code": "D2392",
"Description": "2s",
"Price": "250"
},
{
"Procedure Code": "D2393",
"Description": "3s",
"Price": "280"
},
{
"Procedure Code": "D2394",
"Description": "4s",
"Price": "320"
},
{
"Procedure Code": "D2140",
"Description": "amalgam, one surface",
"Price": "150"
},
{
"Procedure Code": "D2150",
"Description": "amalgam, two surface",
"Price": "200"
},
{
"Procedure Code": "D2750",
"Description": "high noble",
"Price": "1300"
},
{
"Procedure Code": "D2751",
"Description": "base metal",
"Price": "1200"
},
{
"Procedure Code": "D2740",
"Description": "crown porcelain",
"Price": "1300"
},
{
"Procedure Code": "D2954",
"Description": "p/c",
"Price": "450"
},
{
"Procedure Code": "D7910",
"Description": "suture, small wound up to 5 mm",
"Price": "400"
},
{
"Procedure Code": "D5110",
"Description": "FU",
"Price": "1200",
"Full Price": "1700"
},
{
"Procedure Code": "D5120",
"Description": "FL",
"Price": "1700",
"Full Price": "1700"
},
{
"Procedure Code": "D5211",
"Description": "pu",
"Price": "1300"
},
{
"Procedure Code": "D5212",
"Description": "pl",
"Price": "1300"
},
{
"Procedure Code": "D5213",
"Description": "cast pu.",
"Price": "1700"
},
{
"Procedure Code": "D5214",
"Description": "cast pl",
"Price": "1700"
},
{
"Procedure Code": "D5510",
"Description": "Repair broken complete denture base (QUAD)",
"Price": "400"
},
{
"Procedure Code": "D5520",
"Description": "Replace missing or broken teeth - complete denture (each tooth) (TOOTH)",
"Price": "200"
},
{
"Procedure Code": "D5750",
"Description": "lab reline",
"Price": "600"
},
{
"Procedure Code": "D5730",
"Description": "chairside reline",
"Price": "500"
},
{
"Procedure Code": "D2920",
"Description": "re cement crown",
"Price": "120"
},
{
"Procedure Code": "D2950",
"Description": "core buildup",
"Price": "350"
},
{
"Procedure Code": "D2955",
"Description": "post renoval",
"Price": "350"
},
{
"Procedure Code": "D6100",
"Description": "",
"Price": "320"
},
{
"Procedure Code": "D6110",
"Description": "implant",
"Price": "1600"
},
{
"Procedure Code": "D6056",
"Description": "pre fab abut",
"Price": "750"
},
{
"Procedure Code": "D6057",
"Description": "custom abut",
"Price": "800"
},
{
"Procedure Code": "D6058",
"Description": "porcelain, implant crown, ceramic crown",
"Price": "1400"
},
{
"Procedure Code": "D6059",
"Description": "",
"Price": "1400"
},
{
"Procedure Code": "D6242",
"Description": "noble metal. For united",
"Price": "1400"
},
{
"Procedure Code": "D6245",
"Description": "porcelain, not for united",
"Price": "1400"
},
{
"Procedure Code": "D0367",
"Description": "",
"Price": "400"
},
{
"Procedure Code": "D0364",
"Description": "Less than one jaw",
"Price": "350"
},
{
"Procedure Code": "D0365",
"Description": "Mand",
"Price": "350"
},
{
"Procedure Code": "D0366",
"Description": "Max",
"Price": "350"
},
{
"Procedure Code": "D0368",
"Description": "include TMJ",
"Price": "375"
},
{
"Procedure Code": "D0383",
"Description": "",
"Price": "350"
},
{
"Procedure Code": "D0380",
"Description": "Less than one jaw",
"Price": "300"
},
{
"Procedure Code": "D0381",
"Description": "Mand",
"Price": "300"
},
{
"Procedure Code": "D0382",
"Description": "Max",
"Price": "300"
},
{
"Procedure Code": "D7950",
"Description": "max",
"Price": "800"
},
{
"Procedure Code": "D7140",
"Description": "simple ext",
"Price": "150"
},
{
"Procedure Code": "D7210",
"Description": "surgical ext",
"Price": "280"
},
{
"Procedure Code": "D7220",
"Description": "soft impacted",
"Price": "380"
},
{
"Procedure Code": "D7230",
"Description": "partial bony",
"Price": "450"
},
{
"Procedure Code": "D7240",
"Description": "fully bony",
"Price": "550"
},
{
"Procedure Code": "D3320",
"Description": "pre M RCT",
"Price": "1050"
}
]