feat: payment PDF extraction, import, and remittance tracking
- Add Upload Payment Documents section with Extract & Download (Excel) and Extract & Import (database) buttons - PDF extractor (pdfplumber) parses MassHealth RA PDFs: two-pass strategy joins summary-page ICN/patient map with detail-page procedure data (CDT code, paid code, tooth, date, allowed amount) - RA cover-page summary (Payee ID, RA #, Payment Amount, etc.) included as separate Excel sheet; numeric values written as numbers - Backend PDF import route groups rows by Member #, finds/creates patient, creates Payment + ServiceLines with ICN per procedure - Add icn, paidCode, allowedAmount fields to ServiceLine schema - Payments table: status simplified to Paid in Full / Balance; adjustment auto-computed on mhPaidAmount/copayment change; Paid in Full and Revert buttons with confirmation dialogs - Edit Payment modal: shows ICN, Paid Code, Allowed Amount per line - PDF Import badge distinguishes from OCR imports in payments table Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
224
apps/PaymentOCRService/pdf_extractor.py
Normal file
224
apps/PaymentOCRService/pdf_extractor.py
Normal file
@@ -0,0 +1,224 @@
|
||||
import io
|
||||
import re
|
||||
import pdfplumber
|
||||
|
||||
DCODE_RE = re.compile(r'^D\d{4}$')
|
||||
MEMBER_RE = re.compile(r'\b(\d{12})\b') # MassHealth member IDs are always 12 digits
|
||||
|
||||
# ── Page-1 header patterns ────────────────────────────────────────────────────
|
||||
_H = {
|
||||
"Payee ID": re.compile(r'Payee ID:\s*(\S+)'),
|
||||
"Business NPI": re.compile(r'Business NPI:\s*(\S+)'),
|
||||
"Run #": re.compile(r'Run #:\s*(\S+)'),
|
||||
"RA #": re.compile(r'RA #:\s*(\S+)'),
|
||||
"RA Date": re.compile(r'RA Date:\s*(\S+)'),
|
||||
"Claim Detail Amount": re.compile(r'Claim Detail Amount:\s*([\$\d,\.]+)'),
|
||||
"Claim Adjustment Amount": re.compile(r'Claim Adjustment Amount:\s*([\$\(\)\d,\.]+)'),
|
||||
"Misc. Adjustment Amount": re.compile(r'Misc\. Adjustment Amount:\s*([\$\(\)\d,\.]+)'),
|
||||
"Payment Amount": re.compile(r'Payment Amount:\s*([\$\d,\.]+)'),
|
||||
}
|
||||
|
||||
|
||||
def extract_ra_header(pdf_bytes: bytes, filename: str) -> dict:
|
||||
"""Extract the cover-page summary (Payee ID, RA #, Payment Amount, etc.)."""
|
||||
header: dict[str, str] = {"Source File": filename}
|
||||
with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
|
||||
# Header info lives on pages 1 and 2 — scan both to be safe
|
||||
for page in pdf.pages[:2]:
|
||||
text = page.extract_text() or ""
|
||||
for field, pattern in _H.items():
|
||||
if field not in header or not header[field]:
|
||||
m = pattern.search(text)
|
||||
if m:
|
||||
header[field] = m.group(1).strip()
|
||||
return header
|
||||
|
||||
|
||||
def _c(val) -> str:
|
||||
return str(val).replace("\n", " ").strip() if val else ""
|
||||
|
||||
|
||||
def _amt(val: str) -> str:
|
||||
return val.replace("$", "").replace(",", "").strip() if val else ""
|
||||
|
||||
|
||||
def _col(headers: list[str], *keywords) -> int | None:
|
||||
for i, h in enumerate(headers):
|
||||
hl = h.lower()
|
||||
if all(k.lower() in hl for k in keywords):
|
||||
return i
|
||||
return None
|
||||
|
||||
|
||||
def _find_header_row(table: list[list]) -> tuple[int | None, list[str]]:
|
||||
for i, row in enumerate(table):
|
||||
# Skip merged context rows (only one non-None cell)
|
||||
if sum(1 for c in row if c) <= 1:
|
||||
continue
|
||||
flat = [_c(c) for c in row]
|
||||
j = " ".join(flat).lower()
|
||||
if "patient name" in j and "icn" in j and "code" not in j:
|
||||
return i, flat
|
||||
if ("submitted" in j and "code" in j) or "paid code" in j:
|
||||
return i, flat
|
||||
return None, []
|
||||
|
||||
|
||||
def _is_summary(h: list[str]) -> bool:
|
||||
j = " ".join(h).lower()
|
||||
return "patient name" in j and "icn" in j and "code" not in j
|
||||
|
||||
|
||||
def _is_detail(h: list[str]) -> bool:
|
||||
j = " ".join(h).lower()
|
||||
return ("submitted" in j and "code" in j) or "paid code" in j
|
||||
|
||||
|
||||
def _merge_headers(table: list[list], hdr_idx: int) -> list[str]:
|
||||
n = max(len(r) for r in table[: hdr_idx + 1])
|
||||
merged = []
|
||||
for ci in range(n):
|
||||
parts = [_c(table[ri][ci]) for ri in range(hdr_idx + 1)
|
||||
if ci < len(table[ri]) and table[ri][ci]]
|
||||
merged.append(" ".join(parts))
|
||||
return merged
|
||||
|
||||
|
||||
# ── Pass 1: summary pages → {icn: patient_name} ──────────────────────────────
|
||||
|
||||
def _build_patient_map(pdf) -> dict[str, str]:
|
||||
patient_map: dict[str, str] = {}
|
||||
|
||||
for page in pdf.pages:
|
||||
for tobj in page.find_tables():
|
||||
table = tobj.extract()
|
||||
if not table or len(table) < 2:
|
||||
continue
|
||||
hdr_idx, headers = _find_header_row(table)
|
||||
if hdr_idx is None or not _is_summary(headers):
|
||||
continue
|
||||
|
||||
pi = _col(headers, "Patient Name")
|
||||
ii = _col(headers, "ICN")
|
||||
if pi is None or ii is None:
|
||||
continue
|
||||
|
||||
for row in table[hdr_idx + 1:]:
|
||||
if not row:
|
||||
continue
|
||||
patient = _c(row[pi]) if pi < len(row) else ""
|
||||
icn = _c(row[ii]) if ii < len(row) else ""
|
||||
if not patient or not icn:
|
||||
continue
|
||||
if "Total" in patient or not icn.replace(" ", "").isdigit():
|
||||
continue
|
||||
patient_map[icn] = patient
|
||||
|
||||
return patient_map
|
||||
|
||||
|
||||
# ── Pass 2: detail pages → {icn: procedure_dict} ─────────────────────────────
|
||||
|
||||
def _build_detail_map(pdf) -> dict[str, dict]:
|
||||
detail_map: dict[str, dict] = {}
|
||||
|
||||
for page in pdf.pages:
|
||||
for tobj in page.find_tables():
|
||||
table = tobj.extract()
|
||||
if not table or len(table) < 2:
|
||||
continue
|
||||
hdr_idx, headers = _find_header_row(table)
|
||||
if hdr_idx is None or not _is_detail(headers):
|
||||
continue
|
||||
|
||||
# ICN is in the merged first cell (row 0)
|
||||
context_cell = str(table[0][0]) if table[0] and table[0][0] else ""
|
||||
icn_m = re.search(r'ICN:\s*(\d+)', context_cell)
|
||||
member_m = MEMBER_RE.search(context_cell)
|
||||
icn = icn_m.group(1) if icn_m else ""
|
||||
member = member_m.group(1) if member_m else ""
|
||||
if not icn:
|
||||
continue
|
||||
|
||||
h = _merge_headers(table, hdr_idx)
|
||||
|
||||
sub_code_i = _col(h, "Submitted", "Code")
|
||||
paid_code_i = _col(h, "Paid", "Code")
|
||||
tooth_i = _col(h, "Tooth")
|
||||
date_i = _col(h, "Date")
|
||||
allowed_i = _col(h, "Allowed")
|
||||
|
||||
sub_amt_i = paid_amt_i = None
|
||||
for i, col_h in enumerate(h):
|
||||
lh = col_h.lower()
|
||||
if "submitted" in lh and "code" not in lh and sub_amt_i is None:
|
||||
sub_amt_i = i
|
||||
if "paid" in lh and "code" not in lh and ("amount" in lh or paid_amt_i is None):
|
||||
paid_amt_i = i
|
||||
|
||||
for row in table[hdr_idx + 1:]:
|
||||
if not row:
|
||||
continue
|
||||
cdt = _c(row[sub_code_i]) if sub_code_i is not None and sub_code_i < len(row) else ""
|
||||
if not DCODE_RE.match(cdt):
|
||||
continue
|
||||
|
||||
paid_code = _c(row[paid_code_i]) if paid_code_i is not None and paid_code_i < len(row) else ""
|
||||
tooth = _c(row[tooth_i]) if tooth_i is not None and tooth_i < len(row) else ""
|
||||
date = _c(row[date_i]) if date_i is not None and date_i < len(row) else ""
|
||||
sub_a = _amt(_c(row[sub_amt_i])) if sub_amt_i is not None and sub_amt_i < len(row) else ""
|
||||
allow_a = _amt(_c(row[allowed_i])) if allowed_i is not None and allowed_i < len(row) else ""
|
||||
paid_a = _amt(_c(row[paid_amt_i])) if paid_amt_i is not None and paid_amt_i < len(row) else ""
|
||||
|
||||
detail_map[icn] = {
|
||||
"Member #": member,
|
||||
"Submitted Code": cdt,
|
||||
"Paid Code": paid_code,
|
||||
"Tooth": tooth,
|
||||
"Date of Service": date,
|
||||
"Submitted Amount": sub_a,
|
||||
"Allowed Amount": allow_a,
|
||||
"Paid Amount": paid_a,
|
||||
}
|
||||
|
||||
return detail_map
|
||||
|
||||
|
||||
# ── Main: join on ICN ─────────────────────────────────────────────────────────
|
||||
|
||||
def extract_ra_pdf(pdf_bytes: bytes, filename: str) -> dict:
|
||||
"""
|
||||
Two-pass extraction of a MassHealth Remittance Advice PDF.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"header": { Payee ID, Business NPI, Run #, RA #, RA Date,
|
||||
Claim Detail Amount, Claim Adjustment Amount,
|
||||
Misc. Adjustment Amount, Payment Amount },
|
||||
"rows": [ one dict per ICN … ]
|
||||
}
|
||||
"""
|
||||
header = extract_ra_header(pdf_bytes, filename)
|
||||
|
||||
with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
|
||||
patient_map = _build_patient_map(pdf)
|
||||
detail_map = _build_detail_map(pdf)
|
||||
|
||||
rows = []
|
||||
for icn, patient_name in patient_map.items():
|
||||
detail = detail_map.get(icn, {})
|
||||
rows.append({
|
||||
"Patient Name": patient_name,
|
||||
"Member #": detail.get("Member #", ""),
|
||||
"ICN": icn,
|
||||
"Submitted Code": detail.get("Submitted Code", ""),
|
||||
"Paid Code": detail.get("Paid Code", ""),
|
||||
"Tooth": detail.get("Tooth", ""),
|
||||
"Date of Service": detail.get("Date of Service", ""),
|
||||
"Submitted Amount": detail.get("Submitted Amount", ""),
|
||||
"Allowed Amount": detail.get("Allowed Amount", ""),
|
||||
"Paid Amount": detail.get("Paid Amount", ""),
|
||||
"Source File": filename,
|
||||
})
|
||||
|
||||
return {"header": header, "rows": rows}
|
||||
Reference in New Issue
Block a user