removed test'

This commit is contained in:
2025-05-23 17:06:56 +05:30
parent 7a8c5472c8
commit d575af52c7

View File

@@ -1,37 +0,0 @@
import fitz # PyMuPDF
import re
def extract_from_pdf(file_path):
doc = fitz.open(file_path)
text = "\n".join(page.get_text() for page in doc)
lines = [line.strip() for line in text.splitlines() if line.strip()]
member_id = ""
name = ""
dob = ""
for i, line in enumerate(lines):
if line.isdigit() and (len(line) <= 14 or len(line) >= 8):
member_id = line
name_lines = []
j = i + 1
while j < len(lines) and not re.match(r"\d{1,2}/\d{1,2}/\d{4}", lines[j]):
name_lines.append(lines[j])
j += 1
name = " ".join(name_lines).strip()
if j < len(lines):
dob = lines[j].strip()
break
return {
"memberId": member_id,
"name": name,
"dob": dob
}
if __name__ == "__main__":
result = extract_from_pdf("PDF_To_Test/sample1.pdf")
print(result)