zliang commited on
Commit
2a47024
1 Parent(s): 3155ca8

Delete bibtest.ipynb

Browse files
Files changed (1) hide show
  1. bibtest.ipynb +0 -195
bibtest.ipynb DELETED
@@ -1,195 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import pdf2bib"
10
- ]
11
- },
12
- {
13
- "cell_type": "code",
14
- "execution_count": 2,
15
- "metadata": {},
16
- "outputs": [],
17
- "source": [
18
- "def extract_metadata(file_path):\n",
19
- " pdfextractdata = pdf2bib.pdf2bib(file_path)\n",
20
- " #st.write(pdfextractdata)\n",
21
- " pdfextractdata_metadata = {} if pdfextractdata.get('metadata', {}) is None else pdfextractdata.get('metadata', {})\n",
22
- "\n",
23
- " return pdfextractdata_metadata"
24
- ]
25
- },
26
- {
27
- "cell_type": "code",
28
- "execution_count": 43,
29
- "metadata": {},
30
- "outputs": [
31
- {
32
- "name": "stderr",
33
- "output_type": "stream",
34
- "text": [
35
- "[pdf2bib]: Trying to extract data to generate the BibTeX entry for the file: boiteau-et-al-2024-relating-molecular-properties-to-the-persistence-of-marine-dissolved-organic-matter-with-liquid.pdf\n",
36
- "[pdf2bib]: Calling pdf2doi...\n",
37
- "[pdf2doi]: Trying to retrieve a DOI/identifier for the file: boiteau-et-al-2024-relating-molecular-properties-to-the-persistence-of-marine-dissolved-organic-matter-with-liquid.pdf\n",
38
- "[pdf2doi]: Method #1: Looking for a valid identifier in the document infos...\n",
39
- "[pdf2doi]: Validating the possible DOI 10.1021/acs.est.3c08245 via a query to dx.doi.org...\n",
40
- "[pdf2doi]: The DOI 10.1021/acs.est.3c08245 is validated by dx.doi.org.\n",
41
- "[pdf2doi]: A valid DOI was found in the document info labelled '/prism:doi'.\n",
42
- "[pdf2bib]: pdf2doi found a valid identifier for this paper.\n",
43
- "[pdf2bib]: Parsing the info returned by dx.doi.org...\n",
44
- "[pdf2bib]: A valid BibTeX entry was generated.\n"
45
- ]
46
- }
47
- ],
48
- "source": [
49
- "a = pdf2bib.pdf2bib(\"boiteau-et-al-2024-relating-molecular-properties-to-the-persistence-of-marine-dissolved-organic-matter-with-liquid.pdf\")"
50
- ]
51
- },
52
- {
53
- "cell_type": "code",
54
- "execution_count": 47,
55
- "metadata": {},
56
- "outputs": [
57
- {
58
- "data": {
59
- "text/plain": [
60
- "'@article{boiteau2024relating,\\n\\ttitle = {Relating Molecular Properties to the Persistence of Marine Dissolved Organic Matter with Liquid Chromatography–Ultrahigh-Resolution Mass Spectrometry},\\n\\tpublisher = {American Chemical Society (ACS)},\\n\\turl = {http://dx.doi.org/10.1021/acs.est.3c08245},\\n\\tdoi = {10.1021/acs.est.3c08245},\\n\\tjournal = {Environmental Science & Technology},\\n\\tyear = {2024},\\n\\tmonth = {2},\\n\\tauthor = {Rene M. Boiteau and Yuri E. Corilo and William R. Kew and Christian Dewey and Maria Cristina Alvarez Rodriguez and Craig A. Carlson and Tim M. Conway}\\n}'"
61
- ]
62
- },
63
- "execution_count": 47,
64
- "metadata": {},
65
- "output_type": "execute_result"
66
- }
67
- ],
68
- "source": [
69
- "a.get(\"bibtex\")"
70
- ]
71
- },
72
- {
73
- "cell_type": "code",
74
- "execution_count": null,
75
- "metadata": {},
76
- "outputs": [],
77
- "source": []
78
- },
79
- {
80
- "cell_type": "code",
81
- "execution_count": 12,
82
- "metadata": {},
83
- "outputs": [],
84
- "source": [
85
- "import bibtexparser"
86
- ]
87
- },
88
- {
89
- "cell_type": "code",
90
- "execution_count": 9,
91
- "metadata": {},
92
- "outputs": [],
93
- "source": [
94
- "parser = bibtex.Parser()"
95
- ]
96
- },
97
- {
98
- "cell_type": "code",
99
- "execution_count": 45,
100
- "metadata": {},
101
- "outputs": [],
102
- "source": [
103
- "parser = bibtexparser.bparser.BibTexParser(common_strings=True)\n",
104
- "bib_database = bibtexparser.loads(a.get(\"bibtex\"), parser=parser)"
105
- ]
106
- },
107
- {
108
- "cell_type": "code",
109
- "execution_count": 41,
110
- "metadata": {},
111
- "outputs": [],
112
- "source": [
113
- "def format_author_names(authors_str):\n",
114
- " authors = authors_str.split(' and ')\n",
115
- " formatted_authors = []\n",
116
- " for author in authors:\n",
117
- " parts = author.split()\n",
118
- " if len(parts) == 2: # Simple case: First Last\n",
119
- " last, first = parts[1], parts[0]\n",
120
- " formatted_authors.append(f\"{last}, {first[0]}.\")\n",
121
- " elif len(parts) > 2: # Handling middle names or initials\n",
122
- " last = parts[-1]\n",
123
- " initials = ''.join(f\"{part[0]}.\" for part in parts[:-1])\n",
124
- " formatted_authors.append(f\"{last}, {initials}\")\n",
125
- " if len(formatted_authors) > 1:\n",
126
- " formatted_authors_str = ', '.join(formatted_authors[:-1]) + ', & ' + formatted_authors[-1]\n",
127
- " else:\n",
128
- " formatted_authors_str = formatted_authors[0]\n",
129
- " return formatted_authors_str"
130
- ]
131
- },
132
- {
133
- "cell_type": "code",
134
- "execution_count": 36,
135
- "metadata": {},
136
- "outputs": [],
137
- "source": [
138
- "def format_apa(entry):\n",
139
- " author = format_author_names(entry.get('author', ''))\n",
140
- " year = entry.get('year', '')\n",
141
- " title = entry.get('title', '')\n",
142
- " journal = entry.get('journal', '')\n",
143
- " volume = entry.get('volume', '')\n",
144
- " issue = entry.get('issue', '')\n",
145
- " pages = entry.get('page', '').replace('-', '–') # En dash for page range\n",
146
- " doi = entry.get('doi', '')\n",
147
- " \n",
148
- " # Constructing the citation\n",
149
- " apa_citation = f\"{author} {title}. {journal} {volume}, {pages} ({year}). https://doi.org/{doi}\"\n",
150
- " return apa_citation"
151
- ]
152
- },
153
- {
154
- "cell_type": "code",
155
- "execution_count": 46,
156
- "metadata": {},
157
- "outputs": [
158
- {
159
- "data": {
160
- "text/plain": [
161
- "'Boiteau, R.M., Corilo, Y.E., Kew, W.R., Dewey, C., Rodriguez, M.C.A., Carlson, C.A., & Conway, T.M. Relating Molecular Properties to the Persistence of Marine Dissolved Organic Matter with Liquid Chromatography–Ultrahigh-Resolution Mass Spectrometry. Environmental Science & Technology , (2024). https://doi.org/10.1021/acs.est.3c08245'"
162
- ]
163
- },
164
- "execution_count": 46,
165
- "metadata": {},
166
- "output_type": "execute_result"
167
- }
168
- ],
169
- "source": [
170
- "format_apa(bib_database.entries[0])"
171
- ]
172
- }
173
- ],
174
- "metadata": {
175
- "kernelspec": {
176
- "display_name": "Python 3",
177
- "language": "python",
178
- "name": "python3"
179
- },
180
- "language_info": {
181
- "codemirror_mode": {
182
- "name": "ipython",
183
- "version": 3
184
- },
185
- "file_extension": ".py",
186
- "mimetype": "text/x-python",
187
- "name": "python",
188
- "nbconvert_exporter": "python",
189
- "pygments_lexer": "ipython3",
190
- "version": "3.10.2"
191
- }
192
- },
193
- "nbformat": 4,
194
- "nbformat_minor": 2
195
- }