extraction func is added

This commit is contained in:
2025-05-22 20:46:55 +05:30
parent f53919a3cd
commit d6d040c9e4
21 changed files with 2575 additions and 1752 deletions

View File

@@ -8,16 +8,31 @@ app = Flask(__name__)
def extract():
file = request.files['pdf']
doc = fitz.open(stream=file.read(), filetype="pdf")
text = "".join(page.get_text() for page in doc)
text = "\n".join(page.get_text() for page in doc)
lines = [line.strip() for line in text.splitlines() if line.strip()]
member_id = ""
name = ""
dob = ""
for i, line in enumerate(lines):
if line.isdigit() and (len(line) <= 14 or len(line) >= 8):
member_id = line
name_lines = []
j = i + 1
while j < len(lines) and not re.match(r"\d{1,2}/\d{1,2}/\d{4}", lines[j]):
name_lines.append(lines[j])
j += 1
name = " ".join(name_lines).strip()
name = re.search(r"Name:\s*(.*)", text)
email = re.search(r"Email:\s*(.*)", text)
if j < len(lines):
dob = lines[j].strip()
break
return jsonify({
"text": text,
"name": name.group(1).strip() if name else "",
"email": email.group(1).strip() if email else ""
})
return {
"memberId": member_id,
"name": name,
"dob": dob
}
if __name__ == "__main__":
app.run(port=5001)