From 3ae1ffd65a31cf09dbe2294748c534869a326fde Mon Sep 17 00:00:00 2001 From: Potenz Date: Fri, 3 Oct 2025 03:41:35 +0530 Subject: [PATCH] fix(pdf mismatch fix) --- apps/Backend/src/routes/insuranceStatus.ts | 74 ++++++++++++++++++- .../selenium_eligibilityCheckWorker.py | 50 ++++++++++++- 2 files changed, 118 insertions(+), 6 deletions(-) diff --git a/apps/Backend/src/routes/insuranceStatus.ts b/apps/Backend/src/routes/insuranceStatus.ts index e736dfc..810e770 100644 --- a/apps/Backend/src/routes/insuranceStatus.ts +++ b/apps/Backend/src/routes/insuranceStatus.ts @@ -10,6 +10,57 @@ import fsSync from "fs"; const router = Router(); +/** + * Empty the folder containing `filePath`. + * - Uses only the filePath to locate the folder. + * - Deletes files and symlinks only (does not remove subfolders). + * - Has minimal safety checks to avoid catastrophic deletes. + */ +async function emptyFolderContainingFile(filePath: string) { + if (!filePath) return; + + // Resolve to absolute path and get parent folder + const absFile = path.resolve(String(filePath)); + const folder = path.dirname(absFile); + + // Safety: refuse to operate on root (or extremely short paths) + const parsed = path.parse(folder); + if (!folder || folder === parsed.root) { + throw new Error(`Refusing to clean root or empty folder: ${folder}`); + } + + // Optional light heuristic: require folder name to be non-empty and not a system root like "/tmp" + const base = path.basename(folder).toLowerCase(); + if (base.length < 2) { + throw new Error(`Refusing to clean suspicious folder: ${folder}`); + } + + // Read and remove files/symlinks only + try { + const names = await fs.readdir(folder); + for (const name of names) { + const full = path.join(folder, name); + try { + const st = await fs.lstat(full); + if (st.isFile() || st.isSymbolicLink()) { + await fs.unlink(full); + console.log(`[cleanup] removed file: ${full}`); + } else { + // If you truly know there will be no subfolders you can skip or log + console.log(`[cleanup] skipping non-file item: ${full}`); + } + } catch (innerErr) { + console.error(`[cleanup] failed to remove ${full}:`, innerErr); + // continue with other files + } + } + console.log(`[cleanup] emptied folder: ${folder}`); + } catch (err) { + console.error(`[cleanup] failed reading folder ${folder}:`, err); + throw err; + } +} + router.post( "/eligibility-check", async (req: Request, res: Response): Promise => { @@ -23,6 +74,8 @@ router.post( return res.status(401).json({ error: "Unauthorized: user info missing" }); } + let result: any = undefined; + try { const insuranceEligibilityData = JSON.parse(req.body.data); @@ -43,8 +96,7 @@ router.post( massdhpPassword: credentials.password, }; - const result = - await forwardToSeleniumInsuranceEligibilityAgent(enrichedData); + result = await forwardToSeleniumInsuranceEligibilityAgent(enrichedData); let createdPdfFileId: number | null = null; @@ -94,6 +146,11 @@ router.post( createdPdfFileId = Number(created.id); } + // safe-success path (after createdPdfFileId is set and DB committed) + if (result.pdf_path) { + await emptyFolderContainingFile(result.pdf_path); + } + await fs.unlink(result.pdf_path); result.pdfUploadStatus = `PDF saved to group: ${group.title}`; @@ -113,6 +170,19 @@ router.post( }); } catch (err: any) { console.error(err); + + try { + if (result && result.pdf_path) { + await emptyFolderContainingFile(result.pdf_path); + } else { + console.log(`[eligibility-check] no pdf_path available to cleanup`); + } + } catch (cleanupErr) { + console.error( + `[eligibility-check cleanup failed for ${result?.pdf_path}`, + cleanupErr + ); + } return res.status(500).json({ error: err.message || "Failed to forward to selenium agent", }); diff --git a/apps/SeleniumService/selenium_eligibilityCheckWorker.py b/apps/SeleniumService/selenium_eligibilityCheckWorker.py index 20b8e09..8fc09f3 100644 --- a/apps/SeleniumService/selenium_eligibilityCheckWorker.py +++ b/apps/SeleniumService/selenium_eligibilityCheckWorker.py @@ -7,6 +7,8 @@ from selenium.webdriver.support import expected_conditions as EC from webdriver_manager.chrome import ChromeDriverManager import time import os +import shutil +import stat class AutomationMassHealthEligibilityCheck: def __init__(self, data): @@ -120,7 +122,6 @@ class AutomationMassHealthEligibilityCheck: def step2(self): - wait = WebDriverWait(self.driver, 90) def wait_for_pdf_download(timeout=60): for _ in range(timeout): files = [f for f in os.listdir(self.download_dir) if f.endswith(".pdf")] @@ -128,6 +129,18 @@ class AutomationMassHealthEligibilityCheck: return os.path.join(self.download_dir, files[0]) time.sleep(1) raise TimeoutError("PDF did not download in time") + + def _unique_target_path(): + """ + Create a unique filename using memberId. + """ + safe_member = "".join(c for c in str(self.memberId) if c.isalnum() or c in "-_.") + filename = f"eligibility_{safe_member}.pdf" + return os.path.join(self.download_dir, filename) + + wait = WebDriverWait(self.driver, 90) + tmp_created_path = None + try: eligibilityElement = wait.until(EC.presence_of_element_located((By.XPATH, @@ -140,16 +153,45 @@ class AutomationMassHealthEligibilityCheck: txReportElement.click() - pdf_path = wait_for_pdf_download() - print("PDF downloaded at:", pdf_path) + # wait for the PDF to fully appear + downloaded_path = wait_for_pdf_download() + # generate unique target path (include memberId) + target_path = self._unique_target_path() + # It's possible Chrome writes a file with a fixed name: copy/rename it to our target name. + shutil.copyfile(downloaded_path, target_path) + # ensure the copied file is writable / stable + os.chmod(target_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) + + + print("PDF downloaded at:", target_path) return { "status": "success", "eligibility": eligibilityText, - "pdf_path": pdf_path + "pdf_path": target_path } except Exception as e: print(f"ERROR: {str(e)}") + + # Empty the download folder (remove files / symlinks only) + try: + dl = os.path.abspath(self.download_dir) + if os.path.isdir(dl): + for name in os.listdir(dl): + item = os.path.join(dl, name) + try: + if os.path.isfile(item) or os.path.islink(item): + os.remove(item) + print(f"[cleanup] removed: {item}") + except Exception as rm_err: + print(f"[cleanup] failed to remove {item}: {rm_err}") + print(f"[cleanup] emptied download dir: {dl}") + else: + print(f"[cleanup] download dir does not exist: {dl}") + except Exception as cleanup_exc: + print(f"[cleanup] unexpected error while cleaning downloads dir: {cleanup_exc}") + + return { "status": "error", "message": str(e),