selenium worker approach2

2025-06-10 09:52:40 +05:30
parent c9a203846c
commit aed82d6f14
3 changed files with 21 additions and 60 deletions
--- a/apps/Backend/src/routes/claims.ts
+++ b/apps/Backend/src/routes/claims.ts
@@ -7,6 +7,7 @@ import multer from "multer";
 import { forwardToSeleniumAgent, forwardToSeleniumAgent2 } from "../services/seleniumClient";
 import path from "path";
 import fs from "fs";
 import axios from "axios";
 const router = Router();
@@ -101,9 +102,6 @@ router.post(
        return res.status(400).json({ error: result.message || "Failed to fetch PDF" });
      }
      const base64Data = result.pdf_base64;
      const buffer = Buffer.from(base64Data, "base64");
      const pdfUrl = result.pdf_url;
      const filename = path.basename(new URL(pdfUrl).pathname);
@@ -113,12 +111,15 @@ router.post(
      }
      const filePath = path.join(tempDir, filename);
-      fs.writeFileSync(filePath, buffer);
+
      // Download the PDF directly using axios
      const pdfResponse = await axios.get(pdfUrl, { responseType: "arraybuffer" });
      fs.writeFileSync(filePath, pdfResponse.data);
      return res.json({
        success: true,
        pdfPath: `/temp/${filename}`,
-        pdfUrl: pdfUrl,
+        pdfUrl,
        fileName: filename,
      });
    } catch (err) {
--- a/apps/SeleniumService/agent.py
+++ b/apps/SeleniumService/agent.py
@@ -32,14 +32,14 @@ async def fetch_pdf():
        if not bot:
            return {"status": "error", "message": "No running automation session"}
-        pdf_data = bot.reach_to_pdf()
+        result = bot.reach_to_pdf()
-        if pdf_data.get("status") != "success":
+
-            return {"status": "error", "message": pdf_data.get("message")}
+        if result.get("status") != "success":
            return {"status": "error", "message": result.get("message")}
        return {
            "status": "success",
-            "pdf_url": pdf_data["pdf_url"],
+            "pdf_url": result["pdf_url"]
            "pdf_base64": pdf_data["pdf_bytes"]
        }
    except Exception as e:
        return {"status": "error", "message": str(e)}
--- a/apps/SeleniumService/selenium_worker.py
+++ b/apps/SeleniumService/selenium_worker.py
@@ -297,61 +297,21 @@ class AutomationMassHealth:
            pdf_link_element = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//a[contains(@href, '.pdf')]"))
            )
-            print("PDF link found. Clicking it...")
+            print("PDF link found.")
            # Click the PDF link
            pdf_link_element.click()
            time.sleep(5)
-            existing_windows = self.driver.window_handles
+            pdf_relative_url = pdf_link_element.get_attribute("href")
-            # Wait for the new tab
+            if not pdf_relative_url.startswith("http"):
-            WebDriverWait(self.driver, 90).until(
+                full_pdf_url = f"https://providers.massdhp.com{pdf_relative_url}"
-                lambda d: len(d.window_handles) > len(existing_windows)
+            else:
-            )
+                full_pdf_url = pdf_relative_url
-            print("Switching to PDF tab...")
+            print("FULL PDF LINK: ",full_pdf_url)
            self.driver.switch_to.window(self.driver.window_handles[1])
            time.sleep(2)
            current_url = self.driver.current_url
            print(f"Switched to PDF tab. Current URL: {current_url}")
             # Get full PDF URL in case it's a relative path
            pdf_url = pdf_link_element.get_attribute("href")
            if not pdf_url.startswith("http"):
                base_url = self.driver.current_url.split("/providers")[0]
                pdf_url = f"{base_url}/{pdf_url}"
            # Get cookies from Selenium session, saving just for my referece while testing. in prod just use below one line
            # cookies = {c['name']: c['value'] for c in self.driver.get_cookies()} 
            # 1. Get raw Selenium cookies (list of dicts)
            raw_cookies = self.driver.get_cookies()
            with open("raw_cookies.txt", "w") as f:
                json.dump(raw_cookies, f, indent=2)
            formatted_cookies = {c['name']: c['value'] for c in raw_cookies}
            with open("formatted_cookies.txt", "w") as f:
                for k, v in formatted_cookies.items():
                    f.write(f"{k}={v}\n")
            # Use requests to download the file using session cookies
            print("Downloading PDF content via requests...")
            pdf_response = requests.get(pdf_url, cookies=formatted_cookies)
            if pdf_response.status_code == 200:
                print("PDF successfully fetched (bytes length):")
            return {
                "status": "success",
-                "pdf_bytes": base64.b64encode(pdf_response.content).decode(),
+                "pdf_url": full_pdf_url
            }
            else:
                print("Failed to fetch PDF. Status:", pdf_response.status_code, pdf_response)
                return {
                "status": "error",
                "message": pdf_response,
            }
        except Exception as e: