feat(ProcedureCodes updated)
This commit is contained in:
241
apps/ProcedureCodeFromMhPdf/compareJson_matchingPrice.py
Normal file
241
apps/ProcedureCodeFromMhPdf/compareJson_matchingPrice.py
Normal file
@@ -0,0 +1,241 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Compare prices between two JSON files (file1 vs file2) — CONFIG-DRIVEN version.
|
||||
|
||||
Behavior:
|
||||
- Loads two JSON arrays of records (file1 and file2).
|
||||
- Indexes by procedure code (tries common keys like "Procedure Code", "Code", etc).
|
||||
- Normalizes money tokens: removes $ and commas, treats "NC" as literal.
|
||||
- Compares all three price fields:
|
||||
- Price
|
||||
- PriceLTEQ21
|
||||
- PriceGT21
|
||||
Matching rules:
|
||||
- If both records have the same named field, compare them.
|
||||
- If file1 has only a single "Price" and file2 has PriceLTEQ21 / PriceGT21,
|
||||
the script will compare file1.Price to BOTH PriceLTEQ21 and PriceGT21 (and
|
||||
report mismatch if file1.Price differs from either).
|
||||
- "NC" only equals "NC".
|
||||
- Numeric tokens compared numerically within tolerance (default 0.005).
|
||||
- Produces output JSON (configured below) listing:
|
||||
- mismatches: detailed entries for codes that differ
|
||||
- only_in_file1: codes found only in file1
|
||||
- only_in_file2: codes found only in file2
|
||||
- summary
|
||||
|
||||
Edit the CONFIG block below, then run the script.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
# =========================
|
||||
# CONFIG — EDIT THESE ONLY
|
||||
# =========================
|
||||
FILE1_PATH = "procedureCodes_v2.json" # path to file 1 (your base/reference file)
|
||||
FILE2_PATH = "output.json" # path to file 2 (the file to compare)
|
||||
OUT_PATH = "price_diffs.json" # output JSON writing mismatches
|
||||
TOLERANCE = 0.005 # numeric tolerance for floats
|
||||
CODE_KEY_CANDIDATES = ("Procedure Code", "Code", "procedure_code", "procedure code")
|
||||
# If True: when file1 has single "Price" and file2 has both LTEQ/GT values,
|
||||
# compare file1.Price against both fields and flag mismatch if either differs.
|
||||
COMPARE_SINGLE_PRICE_AGAINST_BOTH = True
|
||||
# =========================
|
||||
|
||||
_money_re = re.compile(r"^\s*(NC|\$?\s*[\d,]+(?:\.\d{1,2})?)\s*$", re.IGNORECASE)
|
||||
|
||||
|
||||
def normalize_money_token(token: Optional[str]) -> Optional[str]:
|
||||
"""Normalize money token to canonical string or 'NC'. Return None if missing/empty."""
|
||||
if token is None:
|
||||
return None
|
||||
t = str(token).strip()
|
||||
if not t:
|
||||
return None
|
||||
m = _money_re.match(t)
|
||||
if not m:
|
||||
# unknown format — return trimmed token so mismatch is visible
|
||||
return t
|
||||
val = m.group(1)
|
||||
if val.upper() == "NC":
|
||||
return "NC"
|
||||
val = val.replace("$", "").replace(",", "").strip()
|
||||
# Remove trailing zeros from decimals, but preserve integer form
|
||||
if "." in val:
|
||||
val = val.rstrip("0").rstrip(".")
|
||||
return val
|
||||
|
||||
|
||||
def numeric_compare(a: Optional[str], b: Optional[str], tol: float = TOLERANCE) -> bool:
|
||||
"""Compare normalized tokens. NC compares only equal to NC. Otherwise numeric compare."""
|
||||
if a is None or b is None:
|
||||
return False
|
||||
if a == b:
|
||||
return True
|
||||
if a.upper() == "NC" or b.upper() == "NC":
|
||||
return a.upper() == b.upper()
|
||||
try:
|
||||
return abs(float(a) - float(b)) <= tol
|
||||
except Exception:
|
||||
# fallback to exact match if non-numeric
|
||||
return a == b
|
||||
|
||||
|
||||
def load_json(path: str) -> List[Dict[str, Any]]:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if not isinstance(data, list):
|
||||
raise ValueError(f"Expected JSON array in {path}")
|
||||
return data
|
||||
|
||||
|
||||
def build_index(records: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
|
||||
"""Index records by procedure code. First match wins for duplicates."""
|
||||
idx: Dict[str, Dict[str, Any]] = {}
|
||||
for rec in records:
|
||||
code = None
|
||||
for k in CODE_KEY_CANDIDATES:
|
||||
if k in rec and rec[k]:
|
||||
code = str(rec[k]).strip()
|
||||
break
|
||||
if not code:
|
||||
# try to find any field with a Dxxxx-like value
|
||||
for v in rec.values():
|
||||
if isinstance(v, str) and re.match(r"^\s*D\d{4}\s*$", v):
|
||||
code = v.strip()
|
||||
break
|
||||
if not code:
|
||||
continue
|
||||
if code in idx:
|
||||
# duplicate: keep first occurrence
|
||||
continue
|
||||
idx[code] = rec
|
||||
return idx
|
||||
|
||||
|
||||
def extract_price_fields(rec: Dict[str, Any]) -> Dict[str, Optional[str]]:
|
||||
"""
|
||||
Return dict with normalized values for 'Price', 'PriceLTEQ21', and 'PriceGT21'.
|
||||
Keys always present with None when missing.
|
||||
"""
|
||||
return {
|
||||
"Price": normalize_money_token(rec.get("Price")),
|
||||
"PriceLTEQ21": normalize_money_token(rec.get("PriceLTEQ21")),
|
||||
"PriceGT21": normalize_money_token(rec.get("PriceGT21")),
|
||||
}
|
||||
|
||||
|
||||
def compare_code_records(code: str, rec1: Dict[str, Any], rec2: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Compare price fields for a single code. Return mismatch dict if any mismatch present, else None.
|
||||
Mismatch dict includes file1/file2 price fields and per-field mismatch details.
|
||||
"""
|
||||
p1 = extract_price_fields(rec1)
|
||||
p2 = extract_price_fields(rec2)
|
||||
|
||||
mismatches = []
|
||||
|
||||
# 1) Compare same-named fields if both present
|
||||
for key in ("Price", "PriceLTEQ21", "PriceGT21"):
|
||||
a = p1.get(key)
|
||||
b = p2.get(key)
|
||||
if a is None and b is None:
|
||||
continue
|
||||
if a is None or b is None:
|
||||
# present in one but not the other: count as mismatch
|
||||
mismatches.append({"field": key, "file1": a, "file2": b, "reason": "missing_in_one"})
|
||||
continue
|
||||
if not numeric_compare(a, b):
|
||||
mismatches.append({"field": key, "file1": a, "file2": b, "reason": "value_mismatch"})
|
||||
|
||||
# 2) Special-case: if file1 has only single Price, and file2 has LTEQ/GT present,
|
||||
# optionally compare file1.Price against each of them.
|
||||
if COMPARE_SINGLE_PRICE_AGAINST_BOTH:
|
||||
# Only apply if file1.Price exists and file1 does NOT have LTEQ/GT (both None),
|
||||
# but file2 has at least one of LTEQ/GT.
|
||||
file1_has_price = p1.get("Price") is not None
|
||||
file1_has_any_special = (p1.get("PriceLTEQ21") is not None) or (p1.get("PriceGT21") is not None)
|
||||
file2_has_any_special = (p2.get("PriceLTEQ21") is not None) or (p2.get("PriceGT21") is not None)
|
||||
if file1_has_price and (not file1_has_any_special) and file2_has_any_special:
|
||||
# compare file1.Price to each present file2 special price
|
||||
left = p1.get("Price")
|
||||
for special_key in ("PriceLTEQ21", "PriceGT21"):
|
||||
right = p2.get(special_key)
|
||||
if right is None:
|
||||
continue
|
||||
# If already recorded a same-named mismatch for this special_key above,
|
||||
# that mismatch covered the case where file1 was missing that named field.
|
||||
# But since file1 lacked that special field, we still want to compare single Price vs special.
|
||||
if not numeric_compare(left, right):
|
||||
mismatches.append({
|
||||
"field": f"Price_vs_{special_key}",
|
||||
"file1": left,
|
||||
"file2": right,
|
||||
"reason": "single_price_vs_special_mismatch"
|
||||
})
|
||||
|
||||
if mismatches:
|
||||
return {
|
||||
"Procedure Code": code,
|
||||
"Description_file1": rec1.get("Description"),
|
||||
"Description_file2": rec2.get("Description"),
|
||||
"file1_prices": p1,
|
||||
"file2_prices": p2,
|
||||
"mismatches": mismatches
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
# load inputs
|
||||
data1 = load_json(FILE1_PATH)
|
||||
data2 = load_json(FILE2_PATH)
|
||||
|
||||
idx1 = build_index(data1)
|
||||
idx2 = build_index(data2)
|
||||
|
||||
codes_all = sorted(set(list(idx1.keys()) + list(idx2.keys())))
|
||||
|
||||
mismatched: List[Dict[str, Any]] = []
|
||||
only_in_file1: List[str] = []
|
||||
only_in_file2: List[str] = []
|
||||
|
||||
for code in codes_all:
|
||||
rec1 = idx1.get(code)
|
||||
rec2 = idx2.get(code)
|
||||
if rec1 is None:
|
||||
only_in_file2.append(code)
|
||||
continue
|
||||
if rec2 is None:
|
||||
only_in_file1.append(code)
|
||||
continue
|
||||
diff = compare_code_records(code, rec1, rec2)
|
||||
if diff:
|
||||
mismatched.append(diff)
|
||||
|
||||
out = {
|
||||
"summary": {
|
||||
"total_codes_found": len(codes_all),
|
||||
"only_in_file1_count": len(only_in_file1),
|
||||
"only_in_file2_count": len(only_in_file2),
|
||||
"mismatched_count": len(mismatched),
|
||||
},
|
||||
"only_in_file1": only_in_file1,
|
||||
"only_in_file2": only_in_file2,
|
||||
"mismatches": mismatched
|
||||
}
|
||||
|
||||
with open(OUT_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(out, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# brief console summary
|
||||
print(f"Compared {len(codes_all)} procedure codes.")
|
||||
print(f"Only in {FILE1_PATH}: {len(only_in_file1)} codes.")
|
||||
print(f"Only in {FILE2_PATH}: {len(only_in_file2)} codes.")
|
||||
print(f"Mismatched prices: {len(mismatched)} codes.")
|
||||
print(f"Wrote detailed diffs to {OUT_PATH}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user