field-diversity / pdf.py
jpwahle's picture
Initial commit
505fd08
raw
history blame
407 Bytes
import scipdf
def parse_pdf_to_artcile_dict(pdf_path):
return scipdf.parse_pdf_to_dict(pdf_path)
if __name__ == "__main__":
article_dict = scipdf.parse_pdf_to_dict(
"/Users/jp/Documents/papers/demo-test/EMNLP23_Influence_NLP_Citation_Analysis.pdf"
) # return dictionary
print(article_dict.keys())
print(article_dict["title"])
print(article_dict["references"][0].keys())