Claim form - combos buttons feature 1
This commit is contained in:
96
apps/pdfProcedureCode/compareJson.py
Normal file
96
apps/pdfProcedureCode/compareJson.py
Normal file
@@ -0,0 +1,96 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Compare a main dental JSON file with one or more other JSON files and
|
||||
return all records whose 'Procedure Code' is NOT present in the main file.
|
||||
|
||||
- Matching key: 'Procedure Code' (case-insensitive, trimmed).
|
||||
- Keeps the full record from the other files (including extra fields like 'Full Price').
|
||||
- Deduplicates by Procedure Code across the collected "missing" results.
|
||||
|
||||
CONFIG: set MAIN_PATH, OTHER_PATHS, OUT_PATH below.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
|
||||
# =========================
|
||||
# CONFIG — EDIT THESE ONLY
|
||||
# =========================
|
||||
MAIN_PATH = "procedureCodesMain.json" # your main JSON (with PriceLTEQ21/PriceGT21)
|
||||
OTHER_PATHS = [
|
||||
"procedureCodesOld.json", # one or more other JSON files to compare against the main
|
||||
# "other2.json",
|
||||
]
|
||||
OUT_PATH = "not_in_main.json" # where to write the results
|
||||
# =========================
|
||||
|
||||
|
||||
def _load_json_any(path: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Load JSON. Accept:
|
||||
- a list of objects
|
||||
- a single object (wraps into a list)
|
||||
"""
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, dict):
|
||||
return [data]
|
||||
if isinstance(data, list):
|
||||
# filter out non-dict items defensively
|
||||
return [x for x in data if isinstance(x, dict)]
|
||||
raise ValueError(f"Unsupported JSON top-level type in {path}: {type(data)}")
|
||||
|
||||
|
||||
def _norm_code(record: Dict[str, Any]) -> str:
|
||||
# Normalize the 'Procedure Code' for matching
|
||||
code = str(record.get("Procedure Code", "")).strip().upper()
|
||||
# Some PDFs might have stray spaces, tabs, or zero-width chars
|
||||
code = "".join(ch for ch in code if not ch.isspace())
|
||||
return code
|
||||
|
||||
|
||||
def collect_main_codes(main_path: str) -> set:
|
||||
main_items = _load_json_any(main_path)
|
||||
codes = {_norm_code(rec) for rec in main_items if _norm_code(rec)}
|
||||
return codes
|
||||
|
||||
|
||||
def collect_missing_records(other_paths: List[str], main_codes: set) -> List[Dict[str, Any]]:
|
||||
missing: Dict[str, Dict[str, Any]] = {} # map normalized code -> record
|
||||
for p in other_paths:
|
||||
items = _load_json_any(p)
|
||||
for rec in items:
|
||||
code_norm = _norm_code(rec)
|
||||
if not code_norm:
|
||||
continue
|
||||
if code_norm not in main_codes and code_norm not in missing:
|
||||
# Keep the full original record
|
||||
missing[code_norm] = rec
|
||||
# return in a stable, sorted order by code
|
||||
return [missing[k] for k in sorted(missing.keys())]
|
||||
|
||||
|
||||
def main():
|
||||
# Validate files exist
|
||||
if not Path(MAIN_PATH).exists():
|
||||
raise FileNotFoundError(f"Main file not found: {MAIN_PATH}")
|
||||
for p in OTHER_PATHS:
|
||||
if not Path(p).exists():
|
||||
raise FileNotFoundError(f"Other file not found: {p}")
|
||||
|
||||
main_codes = collect_main_codes(MAIN_PATH)
|
||||
missing_records = collect_missing_records(OTHER_PATHS, main_codes)
|
||||
|
||||
with open(OUT_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(missing_records, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"Main codes: {len(main_codes)}")
|
||||
print(f"Missing from main: {len(missing_records)}")
|
||||
print(f"Wrote results to {OUT_PATH}")
|
||||
# Also echo to stdout
|
||||
print(json.dumps(missing_records, ensure_ascii=False, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user