selenium worker approach2
This commit is contained in:
@@ -7,6 +7,7 @@ import multer from "multer";
|
|||||||
import { forwardToSeleniumAgent, forwardToSeleniumAgent2 } from "../services/seleniumClient";
|
import { forwardToSeleniumAgent, forwardToSeleniumAgent2 } from "../services/seleniumClient";
|
||||||
import path from "path";
|
import path from "path";
|
||||||
import fs from "fs";
|
import fs from "fs";
|
||||||
|
import axios from "axios";
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
@@ -101,9 +102,6 @@ router.post(
|
|||||||
return res.status(400).json({ error: result.message || "Failed to fetch PDF" });
|
return res.status(400).json({ error: result.message || "Failed to fetch PDF" });
|
||||||
}
|
}
|
||||||
|
|
||||||
const base64Data = result.pdf_base64;
|
|
||||||
const buffer = Buffer.from(base64Data, "base64");
|
|
||||||
|
|
||||||
const pdfUrl = result.pdf_url;
|
const pdfUrl = result.pdf_url;
|
||||||
const filename = path.basename(new URL(pdfUrl).pathname);
|
const filename = path.basename(new URL(pdfUrl).pathname);
|
||||||
|
|
||||||
@@ -113,12 +111,15 @@ router.post(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const filePath = path.join(tempDir, filename);
|
const filePath = path.join(tempDir, filename);
|
||||||
fs.writeFileSync(filePath, buffer);
|
|
||||||
|
// Download the PDF directly using axios
|
||||||
|
const pdfResponse = await axios.get(pdfUrl, { responseType: "arraybuffer" });
|
||||||
|
fs.writeFileSync(filePath, pdfResponse.data);
|
||||||
|
|
||||||
return res.json({
|
return res.json({
|
||||||
success: true,
|
success: true,
|
||||||
pdfPath: `/temp/${filename}`,
|
pdfPath: `/temp/${filename}`,
|
||||||
pdfUrl: pdfUrl,
|
pdfUrl,
|
||||||
fileName: filename,
|
fileName: filename,
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
|||||||
@@ -32,14 +32,14 @@ async def fetch_pdf():
|
|||||||
if not bot:
|
if not bot:
|
||||||
return {"status": "error", "message": "No running automation session"}
|
return {"status": "error", "message": "No running automation session"}
|
||||||
|
|
||||||
pdf_data = bot.reach_to_pdf()
|
result = bot.reach_to_pdf()
|
||||||
if pdf_data.get("status") != "success":
|
|
||||||
return {"status": "error", "message": pdf_data.get("message")}
|
if result.get("status") != "success":
|
||||||
|
return {"status": "error", "message": result.get("message")}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": "success",
|
"status": "success",
|
||||||
"pdf_url": pdf_data["pdf_url"],
|
"pdf_url": result["pdf_url"]
|
||||||
"pdf_base64": pdf_data["pdf_bytes"]
|
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"status": "error", "message": str(e)}
|
return {"status": "error", "message": str(e)}
|
||||||
|
|||||||
@@ -297,61 +297,21 @@ class AutomationMassHealth:
|
|||||||
pdf_link_element = wait.until(
|
pdf_link_element = wait.until(
|
||||||
EC.element_to_be_clickable((By.XPATH, "//a[contains(@href, '.pdf')]"))
|
EC.element_to_be_clickable((By.XPATH, "//a[contains(@href, '.pdf')]"))
|
||||||
)
|
)
|
||||||
print("PDF link found. Clicking it...")
|
print("PDF link found.")
|
||||||
|
|
||||||
# Click the PDF link
|
|
||||||
pdf_link_element.click()
|
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
existing_windows = self.driver.window_handles
|
pdf_relative_url = pdf_link_element.get_attribute("href")
|
||||||
|
|
||||||
# Wait for the new tab
|
if not pdf_relative_url.startswith("http"):
|
||||||
WebDriverWait(self.driver, 90).until(
|
full_pdf_url = f"https://providers.massdhp.com{pdf_relative_url}"
|
||||||
lambda d: len(d.window_handles) > len(existing_windows)
|
else:
|
||||||
)
|
full_pdf_url = pdf_relative_url
|
||||||
|
|
||||||
print("Switching to PDF tab...")
|
print("FULL PDF LINK: ",full_pdf_url)
|
||||||
self.driver.switch_to.window(self.driver.window_handles[1])
|
|
||||||
|
|
||||||
|
|
||||||
time.sleep(2)
|
|
||||||
current_url = self.driver.current_url
|
|
||||||
print(f"Switched to PDF tab. Current URL: {current_url}")
|
|
||||||
|
|
||||||
|
|
||||||
# Get full PDF URL in case it's a relative path
|
|
||||||
pdf_url = pdf_link_element.get_attribute("href")
|
|
||||||
if not pdf_url.startswith("http"):
|
|
||||||
base_url = self.driver.current_url.split("/providers")[0]
|
|
||||||
pdf_url = f"{base_url}/{pdf_url}"
|
|
||||||
|
|
||||||
# Get cookies from Selenium session, saving just for my referece while testing. in prod just use below one line
|
|
||||||
# cookies = {c['name']: c['value'] for c in self.driver.get_cookies()}
|
|
||||||
# 1. Get raw Selenium cookies (list of dicts)
|
|
||||||
raw_cookies = self.driver.get_cookies()
|
|
||||||
with open("raw_cookies.txt", "w") as f:
|
|
||||||
json.dump(raw_cookies, f, indent=2)
|
|
||||||
|
|
||||||
formatted_cookies = {c['name']: c['value'] for c in raw_cookies}
|
|
||||||
with open("formatted_cookies.txt", "w") as f:
|
|
||||||
for k, v in formatted_cookies.items():
|
|
||||||
f.write(f"{k}={v}\n")
|
|
||||||
|
|
||||||
# Use requests to download the file using session cookies
|
|
||||||
print("Downloading PDF content via requests...")
|
|
||||||
pdf_response = requests.get(pdf_url, cookies=formatted_cookies)
|
|
||||||
|
|
||||||
if pdf_response.status_code == 200:
|
|
||||||
print("PDF successfully fetched (bytes length):")
|
|
||||||
return {
|
return {
|
||||||
"status": "success",
|
"status": "success",
|
||||||
"pdf_bytes": base64.b64encode(pdf_response.content).decode(),
|
"pdf_url": full_pdf_url
|
||||||
}
|
|
||||||
else:
|
|
||||||
print("Failed to fetch PDF. Status:", pdf_response.status_code, pdf_response)
|
|
||||||
return {
|
|
||||||
"status": "error",
|
|
||||||
"message": pdf_response,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user