feat(ProcedureCodes updated)

2025-10-01 23:01:27 +05:30
parent 82e3651ba4
commit 0d4458b163
10 changed files with 2639 additions and 229 deletions
--- a/apps/ProcedureCodeFromMhPdf/compareJson_matchingPrice.py
+++ b/apps/ProcedureCodeFromMhPdf/compareJson_matchingPrice.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+"""
+Compare prices between two JSON files (file1 vs file2) — CONFIG-DRIVEN version.
+
+Behavior:
+- Loads two JSON arrays of records (file1 and file2).
+- Indexes by procedure code (tries common keys like "Procedure Code", "Code", etc).
+- Normalizes money tokens: removes $ and commas, treats "NC" as literal.
+- Compares all three price fields:
+    - Price
+    - PriceLTEQ21
+    - PriceGT21
+  Matching rules:
+    - If both records have the same named field, compare them.
+    - If file1 has only a single "Price" and file2 has PriceLTEQ21 / PriceGT21,
+      the script will compare file1.Price to BOTH PriceLTEQ21 and PriceGT21 (and
+      report mismatch if file1.Price differs from either).
+    - "NC" only equals "NC".
+    - Numeric tokens compared numerically within tolerance (default 0.005).
+- Produces output JSON (configured below) listing:
+    - mismatches: detailed entries for codes that differ
+    - only_in_file1: codes found only in file1
+    - only_in_file2: codes found only in file2
+    - summary
+
+Edit the CONFIG block below, then run the script.
+"""
+
+import json
+import re
+from typing import List, Dict, Any, Optional
+
+# =========================
+# CONFIG — EDIT THESE ONLY
+# =========================
+FILE1_PATH = "procedureCodes_v2.json"         # path to file 1 (your base/reference file)
+FILE2_PATH = "output.json"         # path to file 2 (the file to compare)
+OUT_PATH = "price_diffs.json"     # output JSON writing mismatches
+TOLERANCE = 0.005                 # numeric tolerance for floats
+CODE_KEY_CANDIDATES = ("Procedure Code", "Code", "procedure_code", "procedure code")
+# If True: when file1 has single "Price" and file2 has both LTEQ/GT values,
+# compare file1.Price against both fields and flag mismatch if either differs.
+COMPARE_SINGLE_PRICE_AGAINST_BOTH = True
+# =========================
+
+_money_re = re.compile(r"^\s*(NC|\$?\s*[\d,]+(?:\.\d{1,2})?)\s*$", re.IGNORECASE)
+
+
+def normalize_money_token(token: Optional[str]) -> Optional[str]:
+    """Normalize money token to canonical string or 'NC'. Return None if missing/empty."""
+    if token is None:
+        return None
+    t = str(token).strip()
+    if not t:
+        return None
+    m = _money_re.match(t)
+    if not m:
+        # unknown format — return trimmed token so mismatch is visible
+        return t
+    val = m.group(1)
+    if val.upper() == "NC":
+        return "NC"
+    val = val.replace("$", "").replace(",", "").strip()
+    # Remove trailing zeros from decimals, but preserve integer form
+    if "." in val:
+        val = val.rstrip("0").rstrip(".")
+    return val
+
+
+def numeric_compare(a: Optional[str], b: Optional[str], tol: float = TOLERANCE) -> bool:
+    """Compare normalized tokens. NC compares only equal to NC. Otherwise numeric compare."""
+    if a is None or b is None:
+        return False
+    if a == b:
+        return True
+    if a.upper() == "NC" or b.upper() == "NC":
+        return a.upper() == b.upper()
+    try:
+        return abs(float(a) - float(b)) <= tol
+    except Exception:
+        # fallback to exact match if non-numeric
+        return a == b
+
+
+def load_json(path: str) -> List[Dict[str, Any]]:
+    with open(path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    if not isinstance(data, list):
+        raise ValueError(f"Expected JSON array in {path}")
+    return data
+
+
+def build_index(records: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
+    """Index records by procedure code. First match wins for duplicates."""
+    idx: Dict[str, Dict[str, Any]] = {}
+    for rec in records:
+        code = None
+        for k in CODE_KEY_CANDIDATES:
+            if k in rec and rec[k]:
+                code = str(rec[k]).strip()
+                break
+        if not code:
+            # try to find any field with a Dxxxx-like value
+            for v in rec.values():
+                if isinstance(v, str) and re.match(r"^\s*D\d{4}\s*$", v):
+                    code = v.strip()
+                    break
+        if not code:
+            continue
+        if code in idx:
+            # duplicate: keep first occurrence
+            continue
+        idx[code] = rec
+    return idx
+
+
+def extract_price_fields(rec: Dict[str, Any]) -> Dict[str, Optional[str]]:
+    """
+    Return dict with normalized values for 'Price', 'PriceLTEQ21', and 'PriceGT21'.
+    Keys always present with None when missing.
+    """
+    return {
+        "Price": normalize_money_token(rec.get("Price")),
+        "PriceLTEQ21": normalize_money_token(rec.get("PriceLTEQ21")),
+        "PriceGT21": normalize_money_token(rec.get("PriceGT21")),
+    }
+
+
+def compare_code_records(code: str, rec1: Dict[str, Any], rec2: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    """
+    Compare price fields for a single code. Return mismatch dict if any mismatch present, else None.
+    Mismatch dict includes file1/file2 price fields and per-field mismatch details.
+    """
+    p1 = extract_price_fields(rec1)
+    p2 = extract_price_fields(rec2)
+
+    mismatches = []
+
+    # 1) Compare same-named fields if both present
+    for key in ("Price", "PriceLTEQ21", "PriceGT21"):
+        a = p1.get(key)
+        b = p2.get(key)
+        if a is None and b is None:
+            continue
+        if a is None or b is None:
+            # present in one but not the other: count as mismatch
+            mismatches.append({"field": key, "file1": a, "file2": b, "reason": "missing_in_one"})
+            continue
+        if not numeric_compare(a, b):
+            mismatches.append({"field": key, "file1": a, "file2": b, "reason": "value_mismatch"})
+
+    # 2) Special-case: if file1 has only single Price, and file2 has LTEQ/GT present,
+    #    optionally compare file1.Price against each of them.
+    if COMPARE_SINGLE_PRICE_AGAINST_BOTH:
+        # Only apply if file1.Price exists and file1 does NOT have LTEQ/GT (both None),
+        # but file2 has at least one of LTEQ/GT.
+        file1_has_price = p1.get("Price") is not None
+        file1_has_any_special = (p1.get("PriceLTEQ21") is not None) or (p1.get("PriceGT21") is not None)
+        file2_has_any_special = (p2.get("PriceLTEQ21") is not None) or (p2.get("PriceGT21") is not None)
+        if file1_has_price and (not file1_has_any_special) and file2_has_any_special:
+            # compare file1.Price to each present file2 special price
+            left = p1.get("Price")
+            for special_key in ("PriceLTEQ21", "PriceGT21"):
+                right = p2.get(special_key)
+                if right is None:
+                    continue
+                # If already recorded a same-named mismatch for this special_key above,
+                # that mismatch covered the case where file1 was missing that named field.
+                # But since file1 lacked that special field, we still want to compare single Price vs special.
+                if not numeric_compare(left, right):
+                    mismatches.append({
+                        "field": f"Price_vs_{special_key}",
+                        "file1": left,
+                        "file2": right,
+                        "reason": "single_price_vs_special_mismatch"
+                    })
+
+    if mismatches:
+        return {
+            "Procedure Code": code,
+            "Description_file1": rec1.get("Description"),
+            "Description_file2": rec2.get("Description"),
+            "file1_prices": p1,
+            "file2_prices": p2,
+            "mismatches": mismatches
+        }
+    return None
+
+
+def main():
+    # load inputs
+    data1 = load_json(FILE1_PATH)
+    data2 = load_json(FILE2_PATH)
+
+    idx1 = build_index(data1)
+    idx2 = build_index(data2)
+
+    codes_all = sorted(set(list(idx1.keys()) + list(idx2.keys())))
+
+    mismatched: List[Dict[str, Any]] = []
+    only_in_file1: List[str] = []
+    only_in_file2: List[str] = []
+
+    for code in codes_all:
+        rec1 = idx1.get(code)
+        rec2 = idx2.get(code)
+        if rec1 is None:
+            only_in_file2.append(code)
+            continue
+        if rec2 is None:
+            only_in_file1.append(code)
+            continue
+        diff = compare_code_records(code, rec1, rec2)
+        if diff:
+            mismatched.append(diff)
+
+    out = {
+        "summary": {
+            "total_codes_found": len(codes_all),
+            "only_in_file1_count": len(only_in_file1),
+            "only_in_file2_count": len(only_in_file2),
+            "mismatched_count": len(mismatched),
+        },
+        "only_in_file1": only_in_file1,
+        "only_in_file2": only_in_file2,
+        "mismatches": mismatched
+    }
+
+    with open(OUT_PATH, "w", encoding="utf-8") as f:
+        json.dump(out, f, ensure_ascii=False, indent=2)
+
+    # brief console summary
+    print(f"Compared {len(codes_all)} procedure codes.")
+    print(f"Only in {FILE1_PATH}: {len(only_in_file1)} codes.")
+    print(f"Only in {FILE2_PATH}: {len(only_in_file2)} codes.")
+    print(f"Mismatched prices: {len(mismatched)} codes.")
+    print(f"Wrote detailed diffs to {OUT_PATH}")
+
+
+if __name__ == "__main__":
+    main()