extraction func is added
This commit is contained in:
@@ -8,16 +8,31 @@ app = Flask(__name__)
|
||||
def extract():
|
||||
file = request.files['pdf']
|
||||
doc = fitz.open(stream=file.read(), filetype="pdf")
|
||||
text = "".join(page.get_text() for page in doc)
|
||||
text = "\n".join(page.get_text() for page in doc)
|
||||
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
||||
member_id = ""
|
||||
name = ""
|
||||
dob = ""
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
if line.isdigit() and (len(line) <= 14 or len(line) >= 8):
|
||||
member_id = line
|
||||
name_lines = []
|
||||
j = i + 1
|
||||
while j < len(lines) and not re.match(r"\d{1,2}/\d{1,2}/\d{4}", lines[j]):
|
||||
name_lines.append(lines[j])
|
||||
j += 1
|
||||
name = " ".join(name_lines).strip()
|
||||
|
||||
name = re.search(r"Name:\s*(.*)", text)
|
||||
email = re.search(r"Email:\s*(.*)", text)
|
||||
if j < len(lines):
|
||||
dob = lines[j].strip()
|
||||
break
|
||||
|
||||
return jsonify({
|
||||
"text": text,
|
||||
"name": name.group(1).strip() if name else "",
|
||||
"email": email.group(1).strip() if email else ""
|
||||
})
|
||||
return {
|
||||
"memberId": member_id,
|
||||
"name": name,
|
||||
"dob": dob
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(port=5001)
|
||||
|
||||
Reference in New Issue
Block a user