#! /usr/bin/env python3 import PyPDF2 import pandas as pd class Senioritylist: def __init__(self, pdf_filename): self.lines = None self.num_lines = None self.raw = None self.effective_date = None self.base = None self.pdf_filename = pdf_filename self.parse_page(0) def import_pdf(self): obj = open(self.pdf_filename, 'rb') pdfreader = PyPDF2.PdfFileReader(obj) return pdfreader def parse_page(self, pagenum=0): pdfreader = self.import_pdf() self.raw = pdfreader.pages[pagenum].extractText() self.lines = self.raw.split("\n") self.num_lines = len(self.lines) base = None seat = None if self.lines[0].find("Effective Date") != -1: self.effective_date = self.lines[0][16:23] self.base = self.lines[0][84:88] if __name__ == '__main__': mylist = Senioritylist('Dec_22/dec22_sen.pdf') print(mylist.lines[0:10]) print(mylist.effective_date) print(mylist.base)