selenium worker approach2

This commit is contained in:
2025-06-10 09:52:40 +05:30
parent c9a203846c
commit aed82d6f14
3 changed files with 21 additions and 60 deletions

View File

@@ -7,6 +7,7 @@ import multer from "multer";
import { forwardToSeleniumAgent, forwardToSeleniumAgent2 } from "../services/seleniumClient"; import { forwardToSeleniumAgent, forwardToSeleniumAgent2 } from "../services/seleniumClient";
import path from "path"; import path from "path";
import fs from "fs"; import fs from "fs";
import axios from "axios";
const router = Router(); const router = Router();
@@ -101,9 +102,6 @@ router.post(
return res.status(400).json({ error: result.message || "Failed to fetch PDF" }); return res.status(400).json({ error: result.message || "Failed to fetch PDF" });
} }
const base64Data = result.pdf_base64;
const buffer = Buffer.from(base64Data, "base64");
const pdfUrl = result.pdf_url; const pdfUrl = result.pdf_url;
const filename = path.basename(new URL(pdfUrl).pathname); const filename = path.basename(new URL(pdfUrl).pathname);
@@ -113,12 +111,15 @@ router.post(
} }
const filePath = path.join(tempDir, filename); const filePath = path.join(tempDir, filename);
fs.writeFileSync(filePath, buffer);
// Download the PDF directly using axios
const pdfResponse = await axios.get(pdfUrl, { responseType: "arraybuffer" });
fs.writeFileSync(filePath, pdfResponse.data);
return res.json({ return res.json({
success: true, success: true,
pdfPath: `/temp/${filename}`, pdfPath: `/temp/${filename}`,
pdfUrl: pdfUrl, pdfUrl,
fileName: filename, fileName: filename,
}); });
} catch (err) { } catch (err) {

View File

@@ -32,14 +32,14 @@ async def fetch_pdf():
if not bot: if not bot:
return {"status": "error", "message": "No running automation session"} return {"status": "error", "message": "No running automation session"}
pdf_data = bot.reach_to_pdf() result = bot.reach_to_pdf()
if pdf_data.get("status") != "success":
return {"status": "error", "message": pdf_data.get("message")} if result.get("status") != "success":
return {"status": "error", "message": result.get("message")}
return { return {
"status": "success", "status": "success",
"pdf_url": pdf_data["pdf_url"], "pdf_url": result["pdf_url"]
"pdf_base64": pdf_data["pdf_bytes"]
} }
except Exception as e: except Exception as e:
return {"status": "error", "message": str(e)} return {"status": "error", "message": str(e)}

View File

@@ -297,61 +297,21 @@ class AutomationMassHealth:
pdf_link_element = wait.until( pdf_link_element = wait.until(
EC.element_to_be_clickable((By.XPATH, "//a[contains(@href, '.pdf')]")) EC.element_to_be_clickable((By.XPATH, "//a[contains(@href, '.pdf')]"))
) )
print("PDF link found. Clicking it...") print("PDF link found.")
# Click the PDF link
pdf_link_element.click()
time.sleep(5) time.sleep(5)
existing_windows = self.driver.window_handles pdf_relative_url = pdf_link_element.get_attribute("href")
# Wait for the new tab if not pdf_relative_url.startswith("http"):
WebDriverWait(self.driver, 90).until( full_pdf_url = f"https://providers.massdhp.com{pdf_relative_url}"
lambda d: len(d.window_handles) > len(existing_windows)
)
print("Switching to PDF tab...")
self.driver.switch_to.window(self.driver.window_handles[1])
time.sleep(2)
current_url = self.driver.current_url
print(f"Switched to PDF tab. Current URL: {current_url}")
# Get full PDF URL in case it's a relative path
pdf_url = pdf_link_element.get_attribute("href")
if not pdf_url.startswith("http"):
base_url = self.driver.current_url.split("/providers")[0]
pdf_url = f"{base_url}/{pdf_url}"
# Get cookies from Selenium session, saving just for my referece while testing. in prod just use below one line
# cookies = {c['name']: c['value'] for c in self.driver.get_cookies()}
# 1. Get raw Selenium cookies (list of dicts)
raw_cookies = self.driver.get_cookies()
with open("raw_cookies.txt", "w") as f:
json.dump(raw_cookies, f, indent=2)
formatted_cookies = {c['name']: c['value'] for c in raw_cookies}
with open("formatted_cookies.txt", "w") as f:
for k, v in formatted_cookies.items():
f.write(f"{k}={v}\n")
# Use requests to download the file using session cookies
print("Downloading PDF content via requests...")
pdf_response = requests.get(pdf_url, cookies=formatted_cookies)
if pdf_response.status_code == 200:
print("PDF successfully fetched (bytes length):")
return {
"status": "success",
"pdf_bytes": base64.b64encode(pdf_response.content).decode(),
}
else: else:
print("Failed to fetch PDF. Status:", pdf_response.status_code, pdf_response) full_pdf_url = pdf_relative_url
return {
"status": "error", print("FULL PDF LINK: ",full_pdf_url)
"message": pdf_response, return {
"status": "success",
"pdf_url": full_pdf_url
} }
except Exception as e: except Exception as e: