Spaces:
Sleeping
Sleeping
File size: 11,290 Bytes
505fd08 19b7e49 505fd08 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 |
# Copyright 2023 by Jan Philip Wahle, https://jpwahle.com/
# All rights reserved.
# Thanks to Mukund Rungta for inspiration on early versions of this demo https://huggingface.co/spaces/mrungta8/CitationalAmnesia
import asyncio
import gradio as gr
from aclanthology import determine_page_type
from plots import generate_cfdi_plot, generate_maoc_plot
from s2 import (check_s2_id_type, compute_stats_for_acl_author,
compute_stats_for_acl_paper, compute_stats_for_acl_venue,
compute_stats_for_pdf, compute_stats_for_s2_author,
compute_stats_for_s2_paper)
def return_clear():
"""Clearing all demo inputs
Returns:
None
"""
return None, None, None, None, None, None, None, None
def create_compute_stats(submit_type=None):
def compute_stats(s2_id=None, pdf_file=None, acl_link=None):
if submit_type == "s2_id" and s2_id:
# Check if s2_id is a paper id or an author id
id_type, author_name = check_s2_id_type(s2_id)
if id_type == "paper":
results = compute_stats_for_s2_paper(s2_id)
results = results + ("paper",)
return plot_and_return_stats(*results)
if id_type == "author":
results = compute_stats_for_s2_author(s2_id, author_name)
results = results + ("author",)
return plot_and_return_stats(*results)
if submit_type == "acl_link" and acl_link:
# Crawl all papers for the author or venue or just the paper if it is a paper link
url_type = determine_page_type(acl_link)
if url_type == "paper":
results = compute_stats_for_acl_paper(acl_link)
results = results + ("paper",)
return plot_and_return_stats(*results)
if url_type == "author":
results = compute_stats_for_acl_author(acl_link)
results = results + ("author",)
return plot_and_return_stats(*results)
if url_type == "venue":
results = compute_stats_for_acl_venue(acl_link)
results = results + ("proceedings",)
return plot_and_return_stats(*results)
if submit_type == "pdf_file" and pdf_file:
# Compute the citation field diversity index and citation age diversity index
results = asyncio.run(compute_stats_for_pdf(pdf_file))
results = results + ("paper",)
return plot_and_return_stats(*results)
return None, None, None, None, None, None, None, None
return compute_stats
def plot_and_return_stats(
title_authors,
num_references,
field_counts,
year_title_dict,
cfdi,
cadi,
maoc,
compute_type,
):
"""
Plots the data and returns statistics.
Args:
title_authors (str): The title and authors of the paper.
num_references (int): The number of references in the paper.
field_counts (dict): A dictionary containing the count of each field.
year_title_dict (dict): A dictionary containing the year and title of each paper.
cfdi (list): A list of tuples containing the citation field and the number of papers in that field.
cadi (list): A list of tuples containing the citation author and the number of papers by that author.
maoc (list): A list of tuples containing the main author and the number of papers by that author.
Returns:
tuple: A tuple containing the title and authors of the paper, the number of references, the top 3 most cited fields,
the most common oldest papers, the cfdi, cadi, and the plots for cfdi and maoc.
"""
# Generate cfdi plot
plot_cfdi = generate_cfdi_plot(cfdi, compute_type)
# Generate cadi plot
# plot_maoc = generate_maoc_plot(maoc, compute_type)
# Get top 3 most cited fields
top_fields_text = "\n".join(
[
f"{field}: {count}"
for field, count in sorted(
field_counts.items(), reverse=True, key=lambda x: x[1]
)[:3]
]
)
cfdi = round(cfdi, 3)
# Get most common oldest papers
# oldest_paper_text = "".join(
# f"[{str(year)}] {title}" + "\n"
# for year, title in sorted(year_title_dict.items())[:3]
# )
return (
title_authors,
num_references,
top_fields_text,
# oldest_paper_text,
cfdi,
# cadi,
plot_cfdi,
# plot_maoc,
)
with gr.Blocks(
theme=gr.themes.Soft()
) as demo:
with gr.Row():
gr.Markdown(
"""
# Citation Field Diversity Calculator
Welcome to this interactive demo to analyze the field diversity aspect of your citational practice. This tool will enable you to reflect on a critical aspect:
- By whom am I influenced? Which fields heavily inform and shape the research trajectory of my works?
In addition, you will be able to analyze how the above compares to the average paper or author. The results you will receive cannot be categorized into “good” or “bad”. Instead, they are meant to raise self-awareness about one’s citational diversity and reflect on it. The results might bring you to further questions, such as:
- Am I reading widely across fields?
- Should I expand my literature search to include works from other fields?
Using citations as a tangible marker of influence, our demo provides empirical insights into the influence of papers across fields.
## What is Citation Field Diversity?
Field diversity is a measure of the variety of research fields that a paper or an author draws upon. A high field diversity indicates that the work draws from various distinct research fields, demonstrating a multidisciplinary influence on that work or author.
## What is the Citation Field Diversity Index (CFDI) and how is it calculated?
The calculation of Field Diversity involves extracting all the references of a paper, categorizing them into distinct study fields, and determining the proportion of each study field over all the references. The Citation Field Diversity Index (CFDI) is then computed by applying the Gini Index on these proportions.
For more details, please refer to Eq. 3 in [this paper](https://aclanthology.org/2023.acl-long.341/).
"""
)
gr.Markdown(
"""
## How do I Interpret CFDI?
Higher values of CFDI indicate a greater diversity of a paper in terms of the fields it cites, signifying a multidisciplinary influence. On the other hand, lower values signify a lower diversity, indicating that citations are more concentrated in specific fields.
## How can I use this demo?
There are three ways for you to compute the field diversity for papers:
1. **Semantic Scholar ID**: Enter the Semantic Scholar ID of a **paper** or **author** and click the *"Compute"* button.
2. **ACL Anthology Link**: Paste the ACL Anthology link of a **paper**, **venue**, or **author** and click the *"Compute"* button.
3. **PDF File**: Upload your **paper** PDF and click the *"Compute"* button.
To retrieve the **Semantic Scholar ID** for a paper such as "The Elephant in the Room: Analyzing the Presence of Big Tech in Natural Language Processing Research," search the paper on Semantic Scholar [here](https://www.semanticscholar.org/paper/The-Elephant-in-the-Room%3A-Analyzing-the-Presence-of-Abdalla-Wahle/587ffdfd7229e8e0dbc5250b44df5fad6251f6ad) and use the last part of the URL. The Semantic Scholar ID (SSID) for this paper is: **587ffdfd7229e8e0dbc5250b44df5fad6251f6ad**.
To get an ACL Anthology link, you can go to any ACL Anthology paper, author or proceedings page and just copy and paste the url. For example:
- https://aclanthology.org/2023.acl-long.1/
- https://aclanthology.org/people/a/anna-rogers/
- https://aclanthology.org/events/acl-2002/
"""
)
with gr.Row():
with gr.Tabs():
with gr.TabItem("Semantic Scholar ID"):
s2_id = gr.Textbox(
label="Semantic Scholar ID",
placeholder=(
"Enter the Semantic Scholar ID here and press enter..."
),
# value="587ffdfd7229e8e0dbc5250b44df5fad6251f6ad",
)
with gr.Row():
s2_submit_btn = gr.Button("Compute")
with gr.TabItem("ACL Anthology Link"):
acl_link = gr.Textbox(
label="ACL Anthology Link",
placeholder="Paste the ACL Anthology link here...",
)
with gr.Row():
acl_submit_btn = gr.Button("Compute")
with gr.TabItem("PDF File"):
pdf_file = gr.File(
file_types=[".pdf"], label="Upload your paper PDF"
)
with gr.Row():
file_submit_btn = gr.Button("Compute")
with gr.Row():
title = gr.Textbox(
label="Title / Author Name / Venue Name:", lines=2
) # Can be either paper title, author name, or proceedings title
with gr.Row():
num_ref = gr.Textbox(label="Number of references", lines=3)
top_field_list = gr.Textbox(label="Top 3 fields cited:", lines=3)
# top_age_list = gr.Textbox(label="Top 3 oldest papers cited:", lines=3)
with gr.Row():
cfdi = gr.Textbox(label="CFDI")
# cadi = gr.Textbox(label="CADI")
with gr.Row():
cfdi_plot = gr.Plot(label="Citation Field Diversity")
# cadi_plot = gr.Plot(label="Citation Age Diversity")
with gr.Row():
clear_btn = gr.Button("Clear")
submit_args = dict(
inputs=[s2_id, pdf_file, acl_link],
outputs=[
title,
num_ref,
top_field_list,
# top_age_list,
cfdi,
# cadi,
cfdi_plot,
# cadi_plot,
],
)
s2_submit_args = submit_args.copy()
s2_submit_args["fn"] = create_compute_stats(submit_type="s2_id")
acl_submit_args = submit_args.copy()
acl_submit_args["fn"] = create_compute_stats(submit_type="acl_link")
file_submit_args = submit_args.copy()
file_submit_args["fn"] = create_compute_stats(submit_type="pdf_file")
s2_id.submit(**s2_submit_args)
acl_link.submit(**acl_submit_args)
acl_submit_btn.click(**acl_submit_args)
s2_submit_btn.click(**s2_submit_args)
file_submit_btn.click(**file_submit_args)
clear_btn.click(
fn=return_clear,
inputs=[],
outputs=[
title,
num_ref,
top_field_list,
# top_age_list,
cfdi,
# cadi,
cfdi_plot,
# cadi_plot,
s2_id,
acl_link,
pdf_file,
],
)
demo.queue(concurrency_count=3)
demo.launch(server_port=7860, server_name="0.0.0.0")
|