Spaces:

openreviewer
/

reviewer-arena

Running

App Files Files Community

openreviewer commited on May 17, 2024

Commit

25f01d1

verified ·

1 Parent(s): 0bf9463

Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

.gitattributes +16 -16
.github/workflows/deploy.yml +53 -53
.gitignore +2 -2
app.py +254 -168
file_utils.py +2 -2
iclr2024/question11.txt +6 -6
iclr2024/systemrole.txt +10 -10
logging_config.py +8 -8
models.py +158 -158
requirements.txt +108 -108
utils.py +45 -49

.gitattributes CHANGED Viewed

@@ -1,16 +1,16 @@
-# HIDE ALL OF THE FILES IN THE DIRECTORY
-*.py
-*.log
-*.md
-*.txt
-iclr2024/**
-*.github/**
-*.gitignore
-*.gitattributes
-*.git/**
-*.__pycache__/**

+# HIDE ALL OF THE FILES IN THE DIRECTORY
+*.py
+*.log
+*.md
+*.txt
+iclr2024/**
+*.github/**
+*.gitignore
+*.gitattributes
+*.git/**
+*.__pycache__/**

.github/workflows/deploy.yml CHANGED Viewed

@@ -1,54 +1,54 @@
-name: Deploy Gradio App
-on:
-  push:
-    branches:
-      - main
-jobs:
-  deploy:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.12.3'  # Specify the Python version you are using
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements.txt  # Ensure you have a requirements.txt file
-    - name: Login to Hugging Face
-      env:
-        HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
-      run: |
-        huggingface-cli login --token $HUGGINGFACE_TOKEN
-    - name: Deploy Gradio App
-      env:
-        HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
-      run: gradio deploy
-    # - name: Upload to Hugging Face Spaces
-    #   env:
-    #     HF_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
-    #   run: |
-    #     git lfs install
-    #     huggingface-cli lfs-enable-largefiles .
-    #     huggingface-cli repo create reviewerarena/reviewer-arena --type=space
-    #     huggingface-cli repo upload reviewerarena/reviewer-arena . --all-yes
-  #  - name: Login to Hugging Face
-  #     env:
-  #       HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
-  #     run: |
-  #       echo "$HUGGINGFACE_TOKEN" | huggingface-cli login --token
-  #   - name: Deploy Gradio App
-  #     env:
-  #       HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
-  #     run: |
   #       gradio deploy --token $HUGGINGFACE_TOKEN

+name: Deploy Gradio App
+on:
+  push:
+    branches:
+      - main
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12.3'  # Specify the Python version you are using
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt  # Ensure you have a requirements.txt file
+    - name: Login to Hugging Face
+      env:
+        HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+      run: |
+        huggingface-cli login --token $HUGGINGFACE_TOKEN
+    - name: Deploy Gradio App
+      env:
+        HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+      run: gradio deploy
+    # - name: Upload to Hugging Face Spaces
+    #   env:
+    #     HF_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+    #   run: |
+    #     git lfs install
+    #     huggingface-cli lfs-enable-largefiles .
+    #     huggingface-cli repo create reviewerarena/reviewer-arena --type=space
+    #     huggingface-cli repo upload reviewerarena/reviewer-arena . --all-yes
+  #  - name: Login to Hugging Face
+  #     env:
+  #       HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+  #     run: |
+  #       echo "$HUGGINGFACE_TOKEN" | huggingface-cli login --token
+  #   - name: Deploy Gradio App
+  #     env:
+  #       HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+  #     run: |
   #       gradio deploy --token $HUGGINGFACE_TOKEN

.gitignore CHANGED Viewed

@@ -1,3 +1,3 @@
-my-venv/
-old/
 arena.log

+my-venv/
+old/
 arena.log

app.py CHANGED Viewed

@@ -1,168 +1,254 @@
-import gradio as gr
-from utils import process_paper
-import os
-import logging
-import html
-from logging_config import setup_logging
-setup_logging()  # Ensure logging is initialized
-# Define global variables for directories and API keys
-paper_dir = 'path_to_temp_storage'
-prompt_dir = 'iclr2024'
-api_keys = {
-    'openai_api_key': os.environ.get('openai_api_key'),
-    'claude_api_key': os.environ.get('anthropic_api_key'),
-    'gemini_api_key': os.environ.get('google_api_key'),
-    'commandr_api_key': os.environ.get('cohere_api_key')
-}
-# Configure whether to use real API or not
-use_real_api = False  # Set this to True to use real APIs, False to use dummy data
-def review_papers(pdf_file):
-    logging.info(f"Received file type: {type(pdf_file)}")
-    if use_real_api:
-        reviews = process_paper(pdf_file, paper_dir, prompt_dir, api_keys)
-        processed_reviews = []
-        for review in reviews:
-            processed_review = {}
-            for section in review:
-                if ':' in section:  # Ensure there is a colon to split on
-                    key, value = section.split(':', 1)  # Split on the first colon only
-                    # Replace newline characters with <br> for HTML line breaks
-                    processed_value = value.strip().replace('\n', '<br>')
-                    processed_review[key.strip()] = html.escape(processed_value)  # Ensure HTML escaping
-            processed_reviews.append(processed_review)
-        reviews = processed_reviews
-    else:
-        # Dummy reviews for testing with structured sections
-        reviews = [
-            {
-                "Summary": "This is a placeholder review for Model 1. The paper explores advanced methodologies in reinforcement learning applied to autonomous driving systems, proposing significant enhancements to decision-making algorithms that could improve safety and operational efficiency. The authors provide a detailed analysis of the current limitations of existing systems and suggest innovative solutions that could transform the field.",
-                "Soundness": "The assumptions underlying the proposed enhancements are occasionally not fully justified, particularly concerning the scalability of the algorithms under varied and unpredictable traffic conditions. A more rigorous examination of these assumptions is necessary to solidify the paper's foundation.",
-                "Presentation": "While the paper is structured adequately, some sections delve into technical details that are not sufficiently elucidated for a broader audience. This could potentially limit the paper's impact and accessibility, making it challenging for non-specialists to fully grasp the implications of the research.",
-                "Contribution": "The paper makes a moderate contribution to the existing body of knowledge, offering incremental improvements over current methodologies rather than a completely novel approach. However, these improvements are significant and could lead to better practical implementations in the field of autonomous driving.",
-                "Strengths": "The initial results presented in the paper are promising, showing potential for the proposed methods. The inclusion of real-world data in the preliminary experiments adds a layer of credibility and relevance to the results, showcasing the practical applicability of the research.",
-                "Weaknesses": "The paper lacks detailed exposition on the methodology, particularly in how the algorithms adapt to unexpected or novel scenarios. This is a critical area that requires further development and testing to ensure the robustness and reliability of the proposed solutions.",
-                "Questions/Suggestions": "The statistical analysis section could be enhanced by incorporating more robust statistical techniques and a wider array of metrics. Additionally, conducting tests in a variety of driving environments could help in substantiating the claims made and strengthen the overall findings of the research.",
-                "Ethics Review": "The research complies with all ethical standards, addressing potential ethical issues related to autonomous driving comprehensively. Issues such as privacy concerns, decision-making in critical situations, and the overall impact on societal norms are discussed and handled with the utmost care.",
-                "Overall Score": "3/5",
-                "Confidence": "Confidence in the findings is moderate. While the initial results are encouraging, the limited scope of testing and some unresolved questions regarding scalability and robustness temper the confidence in these results.",
-                "Code of Conduct": "There are no violations of the code of conduct noted. The research upholds ethical standards and maintains transparency in methodologies and data usage, contributing to its integrity and the trustworthiness of the findings."
-            },
-            {
-                "Summary": "This is a placeholder review for Model 2. The paper explores advanced methodologies in reinforcement learning applied to autonomous driving systems, proposing significant enhancements to decision-making algorithms that could improve safety and operational efficiency. The authors provide a detailed analysis of the current limitations of existing systems and suggest innovative solutions that could transform the field.",
-                "Soundness": "The assumptions underlying the proposed enhancements are occasionally not fully justified, particularly concerning the scalability of the algorithms under varied and unpredictable traffic conditions. A more rigorous examination of these assumptions is necessary to solidify the paper's foundation.",
-                "Presentation": "While the paper is structured adequately, some sections delve into technical details that are not sufficiently elucidated for a broader audience. This could potentially limit the paper's impact and accessibility, making it challenging for non-specialists to fully grasp the implications of the research.",
-                "Contribution": "The paper makes a moderate contribution to the existing body of knowledge, offering incremental improvements over current methodologies rather than a completely novel approach. However, these improvements are significant and could lead to better practical implementations in the field of autonomous driving.",
-                "Strengths": "The initial results presented in the paper are promising, showing potential for the proposed methods. The inclusion of real-world data in the preliminary experiments adds a layer of credibility and relevance to the results, showcasing the practical applicability of the research.",
-                "Weaknesses": "The paper lacks detailed exposition on the methodology, particularly in how the algorithms adapt to unexpected or novel scenarios. This is a critical area that requires further development and testing to ensure the robustness and reliability of the proposed solutions.",
-                "Questions/Suggestions": "The statistical analysis section could be enhanced by incorporating more robust statistical techniques and a wider array of metrics. Additionally, conducting tests in a variety of driving environments could help in substantiating the claims made and strengthen the overall findings of the research.",
-                "Ethics Review": "The research complies with all ethical standards, addressing potential ethical issues related to autonomous driving comprehensively. Issues such as privacy concerns, decision-making in critical situations, and the overall impact on societal norms are discussed and handled with the utmost care.",
-                "Overall Score": "3/5",
-                "Confidence": "Confidence in the findings is moderate. While the initial results are encouraging, the limited scope of testing and some unresolved questions regarding scalability and robustness temper the confidence in these results.",
-                "Code of Conduct": "There are no violations of the code of conduct noted. The research upholds ethical standards and maintains transparency in methodologies and data usage, contributing to its integrity and the trustworthiness of the findings."
-            }
-        ]
-    processed_reviews = []
-    for review in reviews:
-        processed_review = {}
-        for key, value in review.items():
-            # Replace newline characters with <br> for HTML line breaks and escape HTML
-            processed_value = value.strip().replace('\n', '<br>')
-            processed_review[key.strip()] = html.escape(processed_value)  # Ensure HTML escaping
-        processed_reviews.append(processed_review)
-    reviews = processed_reviews
-    review_texts = []
-    for review in reviews:
-        formatted_review = "<div class='review-container'>"
-        for section, content in review.items():
-            formatted_review += f"<div class='review-section'><strong>{section}:</strong> <span>{html.unescape(content)}</span></div>"
-        formatted_review += "</div>"
-        review_texts.append(formatted_review)
-    logging.debug(f"Final formatted reviews: {review_texts}")
-    return review_texts
-def setup_interface():
-    logging.debug("Setting up Gradio interface.")
-    css = """
-    .review-container {
-        padding: 10px;
-        margin-bottom: 20px;
-        border: 1px solid #ccc;
-        background-color: #f9f9f9;
-    }
-    .review-section {
-        margin-bottom: 12px;
-        padding: 8px;
-        background-color: #ffffff;
-        border-left: 4px solid #007BFF;
-        padding-left: 10px;
-    }
-    .review-section strong {
-        color: #333;
-        font-weight: bold;
-        display: block;
-        margin-bottom: 5px;
-    }
-    .review-section span, .gr-markdown {
-        color: #000;
-        font-size: 14px;
-        line-height: 1.5;
-        display: block;
-        white-space: normal;
-        opacity: 1;
-    }
-    .model-label {
-        font-size: 18px;
-        font-weight: bold;
-        color: #007BFF;
-        margin-bottom: 10px;
-    }
-    .gr-file, .gr-button, .gr-radio {
-        width: 300px;
-        margin: auto;
-    }
-    """
-    with gr.Blocks(css=css) as demo:
-        gr.Markdown("## Reviewer Arena")
-        gr.Markdown("Upload an academic paper to get reviews from two randomly selected LLMs.")
-        with gr.Row():
-            file_input = gr.File(label="Upload Academic Paper")
-            submit_button = gr.Button("Submit!!")
-        with gr.Row():
-            with gr.Column():
-                gr.HTML("<div class='model-label'>Model A</div>")
-                review1 = gr.Markdown()
-            with gr.Column():
-                gr.HTML("<div class='model-label'>Model B</div>")
-                review2 = gr.Markdown()
-        # Voting options
-        vote_options = ["👍 A is better", "👍 B is better", "👔 Tie", "👎 Both are bad"]
-        vote = gr.Radio(label="Vote on the best model", choices=vote_options, value="Tie")
-        vote_button = gr.Button("Submit Vote")
-        def handle_vote(vote):
-            print(f"Vote received: {vote}")
-            return f"Vote for '{vote}' received!"
-        vote_button.click(fn=handle_vote, inputs=vote, outputs=gr.Textbox(visible=False))
-        submit_button.click(
-            fn=review_papers,
-            inputs=[file_input],
-            outputs=[review1, review2]
-        )
-    logging.debug("Gradio interface setup complete.")
-    return demo
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    demo = setup_interface()
-    # BLOCK PATHS OF ALL THE FILES AND LAUNCH THE APP
-    # demo.launch(auth=(os.environ.get('login_username'), os.environ.get('login_password')), share=True)
-    demo.launch()

+import gradio as gr
+from utils import process_paper
+import os
+import logging
+import html
+from logging_config import setup_logging
+setup_logging()
+paper_dir = 'path_to_temp_storage'
+prompt_dir = 'iclr2024'
+api_keys = {
+    'openai_api_key': os.environ.get('openai_api_key'),
+    'claude_api_key': os.environ.get('anthropic_api_key'),
+    'gemini_api_key': os.environ.get('google_api_key'),
+    'commandr_api_key': os.environ.get('cohere_api_key')
+}
+use_real_api = False
+def review_papers(pdf_file):
+    logging.info(f"Received file type: {type(pdf_file)}")
+    if use_real_api:
+        reviews, selected_models = process_paper(
+            pdf_file, paper_dir, prompt_dir, api_keys)
+        processed_reviews = []
+        for review in reviews:
+            processed_review = {}
+            for section in review:
+                if ':' in section:
+                    key, value = section.split(':', 1)
+                    processed_value = value.strip().replace('\n', '<br>')
+                    processed_review[key.strip()] = html.escape(
+                        processed_value)
+            processed_reviews.append(processed_review)
+        reviews = processed_reviews
+    else:
+        reviews = [
+            {
+                "Summary": "This is a placeholder review for Model 1. The paper explores advanced methodologies in reinforcement learning applied to autonomous driving systems, proposing significant enhancements to decision-making algorithms that could improve safety and operational efficiency. The authors provide a detailed analysis of the current limitations of existing systems and suggest innovative solutions that could transform the field.",
+                "Soundness": "The assumptions underlying the proposed enhancements are occasionally not fully justified, particularly concerning the scalability of the algorithms under varied and unpredictable traffic conditions. A more rigorous examination of these assumptions is necessary to solidify the paper's foundation.",
+                "Presentation": "While the paper is structured adequately, some sections delve into technical details that are not sufficiently elucidated for a broader audience. This could potentially limit the paper's impact and accessibility, making it challenging for non-specialists to fully grasp the implications of the research.",
+                "Contribution": "The paper makes a moderate contribution to the existing body of knowledge, offering incremental improvements over current methodologies rather than a completely novel approach. However, these improvements are significant and could lead to better practical implementations in the field of autonomous driving.",
+                "Strengths": "The initial results presented in the paper are promising, showing potential for the proposed methods. The inclusion of real-world data in the preliminary experiments adds a layer of credibility and relevance to the results, showcasing the practical applicability of the research.",
+                "Weaknesses": "The paper lacks detailed exposition on the methodology, particularly in how the algorithms adapt to unexpected or novel scenarios. This is a critical area that requires further development and testing to ensure the robustness and reliability of the proposed solutions.",
+                "Questions/Suggestions": "The statistical analysis section could be enhanced by incorporating more robust statistical techniques and a wider array of metrics. Additionally, conducting tests in a variety of driving environments could help in substantiating the claims made and strengthen the overall findings of the research.",
+                "Ethics Review": "The research complies with all ethical standards, addressing potential ethical issues related to autonomous driving comprehensively. Issues such as privacy concerns, decision-making in critical situations, and the overall impact on societal norms are discussed and handled with the utmost care.",
+                "Overall Score": "3/5",
+                "Confidence": "Confidence in the findings is moderate. While the initial results are encouraging, the limited scope of testing and some unresolved questions regarding scalability and robustness temper the confidence in these results.",
+                "Code of Conduct": "There are no violations of the code of conduct noted. The research upholds ethical standards and maintains transparency in methodologies and data usage, contributing to its integrity and the trustworthiness of the findings."
+            },
+            {
+                "Summary": "This is a placeholder review for Model 2. The paper explores advanced methodologies in reinforcement learning applied to autonomous driving systems, proposing significant enhancements to decision-making algorithms that could improve safety and operational efficiency. The authors provide a detailed analysis of the current limitations of existing systems and suggest innovative solutions that could transform the field.",
+                "Soundness": "The assumptions underlying the proposed enhancements are occasionally not fully justified, particularly concerning the scalability of the algorithms under varied and unpredictable traffic conditions. A more rigorous examination of these assumptions is necessary to solidify the paper's foundation.",
+                "Presentation": "While the paper is structured adequately, some sections delve into technical details that are not sufficiently elucidated for a broader audience. This could potentially limit the paper's impact and accessibility, making it challenging for non-specialists to fully grasp the implications of the research.",
+                "Contribution": "The paper makes a moderate contribution to the existing body of knowledge, offering incremental improvements over current methodologies rather than a completely novel approach. However, these improvements are significant and could lead to better practical implementations in the field of autonomous driving.",
+                "Strengths": "The initial results presented in the paper are promising, showing potential for the proposed methods. The inclusion of real-world data in the preliminary experiments adds a layer of credibility and relevance to the results, showcasing the practical applicability of the research.",
+                "Weaknesses": "The paper lacks detailed exposition on the methodology, particularly in how the algorithms adapt to unexpected or novel scenarios. This is a critical area that requires further development and testing to ensure the robustness and reliability of the proposed solutions.",
+                "Questions/Suggestions": "The statistical analysis section could be enhanced by incorporating more robust statistical techniques and a wider array of metrics. Additionally, conducting tests in a variety of driving environments could help in substantiating the claims made and strengthen the overall findings of the research.",
+                "Ethics Review": "The research complies with all ethical standards, addressing potential ethical issues related to autonomous driving comprehensively. Issues such as privacy concerns, decision-making in critical situations, and the overall impact on societal norms are discussed and handled with the utmost care.",
+                "Overall Score": "3/5",
+                "Confidence": "Confidence in the findings is moderate. While the initial results are encouraging, the limited scope of testing and some unresolved questions regarding scalability and robustness temper the confidence in these results.",
+                "Code of Conduct": "There are no violations of the code of conduct noted. The research upholds ethical standards and maintains transparency in methodologies and data usage, contributing to its integrity and the trustworthiness of the findings."
+            }
+        ]
+        selected_models = ['model1-placeholder', 'model2-placeholder']
+    review_texts = []
+    for review in reviews:
+        formatted_review = "<div class='review-container'>"
+        for section, content in review.items():
+            formatted_review += f"<div class='review-section'><strong>{section}:</strong> <span>{html.unescape(content)}</span></div>"
+        formatted_review += "</div>"
+        review_texts.append(formatted_review)
+    model_a = selected_models[0]
+    model_b = selected_models[1]
+    logging.debug(f"Final formatted reviews: {review_texts}")
+    return review_texts[0], review_texts[1], gr.update(visible=True), gr.update(visible=True), model_a, model_b
+def setup_interface():
+    logging.debug("Setting up Gradio interface.")
+    css = """
+    .review-container {
+        padding: 10px;
+        margin-bottom: 20px;
+        border: 1px solid #ccc;
+        background-color: #f9f9f9;
+    }
+    .review-section {
+        margin-bottom: 12px;
+        padding: 8px;
+        background-color: #ffffff;
+        border-left: 4px solid #007BFF;
+        padding-left: 10px;
+    }
+    .review-section strong {
+        color: #333;
+        font-weight: bold;
+        display: block;
+        margin-bottom: 5px;
+    }
+    .review-section span, .gr-markdown {
+        color: #000;
+        font-size: 14px;
+        line-height: 1.5;
+        display: block;
+        white-space: normal;
+        opacity: 1;
+    }
+    .model-label {
+        font-size: 18px;
+        font-weight: bold;
+        color: #007BFF;
+        margin-bottom: 10px;
+    }
+    .gr-file, .gr-button, .gr-radio {
+        width: 300px;
+        margin: auto;
+    }
+    .gr-button-small {
+        width: 150px;
+        height: 40px;
+        font-size: 16px;
+    }
+    """
+    with gr.Blocks(css=css) as demo:
+        with gr.Tabs():
+            with gr.TabItem("Reviewer Arena"):
+                gr.Markdown("## Reviewer Arena")
+                gr.Markdown(
+                    "Upload an academic paper to get reviews from two randomly selected LLMs.")
+                with gr.Row():
+                    file_input = gr.File(label="Upload Academic Paper")
+                    submit_button = gr.Button(
+                        "Submit!", elem_id="submit-button")
+                with gr.Row():
+                    with gr.Column():
+                        gr.HTML("<div class='model-label'>Model A</div>")
+                        review1 = gr.Markdown()
+                    with gr.Column():
+                        gr.HTML("<div class='model-label'>Model B</div>")
+                        review2 = gr.Markdown()
+                vote_options = ["👍 A is better",
+                                "👍 B is better", "👔 Tie", "👎 Both are bad"]
+                vote = gr.Radio(label="Vote on the best model",
+                                choices=vote_options, value="Tie", visible=False)
+                vote_button = gr.Button("Submit Vote", visible=False)
+                vote_message = gr.HTML("", visible=False)
+                another_paper_button = gr.Button(
+                    "Review another paper", visible=False)
+                model_identity_message = gr.HTML("", visible=False)
+                def handle_vote(vote, model_a, model_b):
+                    print(f"Vote received: {vote}")
+                    message = f"<p>Thank you for your vote!</p><p>Model A: {model_a}</p><p>Model B: {model_b}</p>"
+                    return gr.update(value=message, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
+                vote_button.click(fn=handle_vote, inputs=[vote, model_identity_message, model_identity_message], outputs=[
+                                  vote_message, vote, vote_button, another_paper_button])
+                submit_button.click(
+                    fn=review_papers,
+                    inputs=[file_input],
+                    outputs=[review1, review2, vote, vote_button,
+                             model_identity_message, model_identity_message]
+                )
+                another_paper_button.click(
+                    fn=lambda: None, inputs=None, outputs=None, js="() => { location.reload(); }")
+            with gr.TabItem("Leaderboard"):
+                gr.Markdown("## Leaderboard")
+                leaderboard_html = """
+                    <table style="width:100%; border: 1px solid #444; border-collapse: collapse; font-family: Arial, sans-serif; background-color: #2b2b2b;">
+                        <thead>
+                            <tr style="border: 1px solid #444; padding: 12px; background-color: #1a1a1a;">
+                                <th style="border: 1px solid #444; padding: 12px; color: #ddd;">Rank</th>
+                                <th style="border: 1px solid #444; padding: 12px; color: #ddd;">Model</th>
+                                <th style="border: 1px solid #444; padding: 12px; color: #ddd;">Arena Elo</th>
+                                <th style="border: 1px solid #444; padding: 12px; color: #ddd;">95% CI</th>
+                                <th style="border: 1px solid #444; padding: 12px; color: #ddd;">Votes</th>
+                                <th style="border: 1px solid #444; padding: 12px; color: #ddd;">Organization</th>
+                                <th style="border: 1px solid #444; padding: 12px; color: #ddd;">License</th>
+                                <th style="border: 1px solid #444; padding: 12px; color: #ddd;">Knowledge Cutoff</th>
+                            </tr>
+                        </thead>
+                        <tbody>
+                            <tr style="border: 1px solid #444; padding: 12px;">
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">1</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">GPT-4-Turbo-2024-04-09</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">1258</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">+3/-3</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">44592</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">OpenAI</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Proprietary</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2023/12</td>
+                            </tr>
+                            <tr style="border: 1px solid #444; padding: 12px;">
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">GPT-4-1106-preview</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">1252</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">+2/-3</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">76173</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">OpenAI</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Proprietary</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2023/4</td>
+                            </tr>
+                            <tr style="border: 1px solid #444; padding: 12px;">
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Gemini 1.5 Pro API-0409-Preview</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">1249</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">+3/-3</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">61011</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Google</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Proprietary</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2023/11</td>
+                            </tr>
+                            <tr style="border: 1px solid #444; padding: 12px;">
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Claude 3 Opus</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">1248</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">+2/-2</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">101063</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Anthropic</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Proprietary</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2023/8</td>
+                            </tr>
+                            <tr style="border: 1px solid #444; padding: 12px;">
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">3</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">GPT-4-0125-preview</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">1246</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">+3/-2</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">70239</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">OpenAI</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Proprietary</td>
+                                <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2023/12</td>
+                            </tr>
+                        </tbody>
+                    </table>
+                """
+                gr.HTML(leaderboard_html)
+    logging.debug("Gradio interface setup complete.")
+    return demo
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    demo = setup_interface()
+    demo.launch()

file_utils.py CHANGED Viewed

@@ -1,3 +1,3 @@
-def read_file(file_path):
-    with open(file_path, 'r', encoding='utf-8') as f:
         return f.read()

+def read_file(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
         return f.read()

iclr2024/question11.txt CHANGED Viewed

@@ -1,7 +1,7 @@
-If there are no violations of the Code of Conduct with this paper, please respond with NO. Otherwise, if this paper violates the Code of Conduct, please indicate the relevant section(s) from the following options:
-Yes, Harassment, bullying, or discrimination based on personal characteristics
-Yes, Inappropriate physical contact, sexual harassment, or unwelcome sexual attention
-Yes, Offensive comments related to gender, race, religion, or other protected characteristics
-Yes, Disruption of talks or other events, or behavior interfering with participation
 Yes, Inappropriate use of imagery, language, or personal attacks in virtual interactions

+If there are no violations of the Code of Conduct with this paper, please respond with NO. Otherwise, if this paper violates the Code of Conduct, please indicate the relevant section(s) from the following options:
+Yes, Harassment, bullying, or discrimination based on personal characteristics
+Yes, Inappropriate physical contact, sexual harassment, or unwelcome sexual attention
+Yes, Offensive comments related to gender, race, religion, or other protected characteristics
+Yes, Disruption of talks or other events, or behavior interfering with participation
 Yes, Inappropriate use of imagery, language, or personal attacks in virtual interactions

iclr2024/systemrole.txt CHANGED Viewed

@@ -1,11 +1,11 @@
-You are a very critical but fair peer reviewer. You will be provided with papers submitted to a conference/journal to review. The papers will be delimited with #### characters.
-We are aiming for a 20-25% acceptance rate. Average score thresholds of 5.5-5.7 roughly correspond to acceptance rates of 25%-20%. It is certainly possible to both accept papers below this threshold and reject papers above it. But any such decision should be properly explained.
-The statistics for the previous year was: A total of 3422 submissions were received. The average score of all submissions was 5.47 with standard deviation 1.30, with scores ranging from 1.00 to 9.00. Aim for a similar distribution of scores and use the full range of scores between 1-10.
-Out of all submissions, 32% (1095 submissions) were accepted, with scores ranging from 4.50 to 9.00 and an average score of 6.61 with a standard deviation of 0.75. Only 2.1% (55 submissions) were accepted for oral presentation, with scores ranging from 5.00 to 9.00 and an average score of 7.80 with a standard deviation of 0.63.
-6.64% (174 submissions) were selected for the spotlight, with scores ranging from 5.60 to 8.60 and an average score of 7.33 with a standard deviation of 0.58. 33.04% (866 submissions) were accepted for poster presentation, with scores ranging from 4.50 to 8.00 and an average score of 6.39 with a standard deviation of 0.61.
 60.36% (1582 submissions) were rejected, with scores ranging from 1.00 to 7.50 and an average score of 4.69 with a standard deviation of 0.97. Additionally, 775 submissions were withdrawn and 26 were desk rejected.

+You are a very critical but fair peer reviewer. You will be provided with papers submitted to a conference/journal to review. The papers will be delimited with #### characters.
+We are aiming for a 20-25% acceptance rate. Average score thresholds of 5.5-5.7 roughly correspond to acceptance rates of 25%-20%. It is certainly possible to both accept papers below this threshold and reject papers above it. But any such decision should be properly explained.
+The statistics for the previous year was: A total of 3422 submissions were received. The average score of all submissions was 5.47 with standard deviation 1.30, with scores ranging from 1.00 to 9.00. Aim for a similar distribution of scores and use the full range of scores between 1-10.
+Out of all submissions, 32% (1095 submissions) were accepted, with scores ranging from 4.50 to 9.00 and an average score of 6.61 with a standard deviation of 0.75. Only 2.1% (55 submissions) were accepted for oral presentation, with scores ranging from 5.00 to 9.00 and an average score of 7.80 with a standard deviation of 0.63.
+6.64% (174 submissions) were selected for the spotlight, with scores ranging from 5.60 to 8.60 and an average score of 7.33 with a standard deviation of 0.58. 33.04% (866 submissions) were accepted for poster presentation, with scores ranging from 4.50 to 8.00 and an average score of 6.39 with a standard deviation of 0.61.
 60.36% (1582 submissions) were rejected, with scores ranging from 1.00 to 7.50 and an average score of 4.69 with a standard deviation of 0.97. Additionally, 775 submissions were withdrawn and 26 were desk rejected.

logging_config.py CHANGED Viewed

@@ -1,9 +1,9 @@
-import logging
-def setup_logging():
-    logging.basicConfig(
-        filename="arena.log",
-        level=logging.DEBUG,  # Change to DEBUG level
-        format='%(asctime)s - %(levelname)s - %(message)s'
-    )
     logging.info("Logging setup complete.")

+import logging
+def setup_logging():
+    logging.basicConfig(
+        filename="arena.log",
+        level=logging.DEBUG,  # Change to DEBUG level
+        format='%(asctime)s - %(levelname)s - %(message)s'
+    )
     logging.info("Logging setup complete.")

models.py CHANGED Viewed

@@ -1,158 +1,158 @@
-import os
-import logging
-import openai
-import tiktoken
-import re
-import anthropic
-import cohere
-import google.generativeai as genai
-import time
-from file_utils import read_file
-from openai import OpenAI
-class Paper:
-    def __init__(self, arxiv_id, tex_file):
-        self.arxiv_id = arxiv_id
-        self.tex_file = tex_file
-class PaperProcessor:
-    MAX_TOKENS = 127192
-    encoding = tiktoken.encoding_for_model("gpt-4-0125-preview")
-    def __init__(self, prompt_dir, model, openai_api_key, claude_api_key, gemini_api_key, commandr_api_key):
-        self.prompt_dir = prompt_dir
-        self.model = model
-        self.openai_api_key = openai_api_key
-        self.claude_api_key = claude_api_key
-        self.gemini_api_key = gemini_api_key
-        self.commandr_api_key = commandr_api_key
-    def count_tokens(self, text):
-        return len(self.encoding.encode(text))
-    def truncate_content(self, content):
-        token_count = self.count_tokens(content)
-        logging.debug(f"Token count before truncation: {token_count}")
-        if token_count > self.MAX_TOKENS:
-            tokens = self.encoding.encode(content)
-            truncated_tokens = tokens[:self.MAX_TOKENS]
-            truncated_content = self.encoding.decode(truncated_tokens)
-            logging.debug(f"Content truncated. Token count after truncation: {self.count_tokens(truncated_content)}")
-            return truncated_content
-        return content
-    def prepare_base_prompt(self, paper):
-        return paper.tex_file
-    def call_model(self, prompt, model_type):
-        system_role_file_path = os.path.join(self.prompt_dir, "systemrole.txt")
-        if not os.path.exists(system_role_file_path):
-            logging.error(f"System role file not found: {system_role_file_path}")
-            return None
-        system_role = read_file(system_role_file_path)
-        logging.debug(f"Token count of full prompt: {self.count_tokens(prompt)}")
-        logging.info(f"Sending the following prompt to {model_type}: {prompt}")
-        try:
-            if model_type == 'gpt':
-                client = OpenAI(api_key=self.openai_api_key)
-                messages = [{"role": "system", "content": system_role}, {"role": "user", "content": prompt}]
-                completion = client.chat.completions.create(
-                    model="gpt-4-turbo-2024-04-09",
-                    messages=messages,
-                    temperature=1
-                )
-                return completion.choices[0].message.content.strip()
-            elif model_type == 'claude':
-                client = anthropic.Anthropic(api_key=self.claude_api_key)
-                response = client.messages.create(
-                    model='claude-3-opus-20240229',
-                    max_tokens=4096,
-                    system=system_role,
-                    temperature=0.5,
-                    messages=[{"role": "user", "content": prompt}]
-                )
-                return response.content[0].text
-            elif model_type == 'commandr':
-                co = cohere.Client(self.commandr_api_key)
-                response = co.chat(
-                    model="command-r-plus",
-                    message=prompt,
-                    preamble=system_role
-                )
-                return response.text
-            elif model_type == 'gemini':
-                genai.configure(api_key=self.gemini_api_key)
-                model = genai.GenerativeModel('gemini-pro')
-                response = model.generate_content(prompt)
-                return response.candidates[0].content.parts[0].text
-        except Exception as e:
-            logging.error(f"Exception occurred: {e}")
-            return None
-    def is_content_appropriate(self, content):
-        try:
-            response = openai.moderations.create(input=content)
-            return not response["results"][0]["flagged"]
-        except Exception as e:
-            logging.error(f"Exception occurred while checking content appropriateness: {e}")
-            return True  # In case of an error, default to content being appropriate
-    def get_prompt_files(self, prompt_dir):
-        return [f for f in os.listdir(prompt_dir) if f.endswith('.txt') and f.startswith('question')]
-    def process_paper(self, paper):
-        openai.api_key = self.openai_api_key
-        start_time = time.time()
-        base_prompt = self.prepare_base_prompt(paper)
-        if base_prompt is None:
-            return "Error: Base prompt could not be prepared."
-        moderation_response = openai.moderations.create(input=base_prompt)
-        if moderation_response.results[0].flagged:
-            return ["Desk Rejected", "The paper contains inappropriate or harmful content."]
-        review_output = []
-        previous_responses = []
-        header = ['Summary:', 'Soundness:', 'Presentation:', 'Contribution:', 'Strengths:', 'Weaknesses:', 'Questions:', 'Flag For Ethics Review:', 'Rating:', 'Confidence:', 'Code Of Conduct:']
-        for i in range(1, 12):
-            question_file = os.path.join(self.prompt_dir, f"question{i}.txt")
-            question_text = read_file(question_file)
-            if i == 1:
-                prompt = f"{question_text}\n\n####\n{base_prompt}\n####"
-            else:
-                prompt = f"\nHere is your review so far:\n{' '.join(previous_responses)}\n\nHere are your reviewer instructions. Please answer the following question:\n{question_text}"
-            truncated_prompt = self.truncate_content(prompt)
-            logging.info(f"Processing prompt for question {i}")
-            response = self.call_model(truncated_prompt, self.model)
-            if response is None:
-                response = "N/A"
-            if i in [2, 3, 4, 10]:
-                number_match = re.search(r'\b\d+\b', response)
-                if number_match:
-                    number = int(number_match.group(0))
-                    response = '5/5' if number > 5 else number_match.group(0) + '/5'
-            elif i == 9:
-                number_match = re.search(r'\b\d+\b', response)
-                if number_match:
-                    response = number_match.group(0) + '/10'
-            response_with_header = f"{header[i-1]} {response}"
-            review_output.append(response_with_header)
-            previous_responses.append(response)
-        end_time = time.time()
-        elapsed_time = end_time - start_time
-        print(f"Time taken to process paper: {elapsed_time:.2f} seconds")
-        return review_output

+import os
+import logging
+import openai
+import tiktoken
+import re
+import anthropic
+import cohere
+import google.generativeai as genai
+import time
+from file_utils import read_file
+from openai import OpenAI
+class Paper:
+    def __init__(self, arxiv_id, tex_file):
+        self.arxiv_id = arxiv_id
+        self.tex_file = tex_file
+class PaperProcessor:
+    MAX_TOKENS = 127192
+    encoding = tiktoken.encoding_for_model("gpt-4-0125-preview")
+    def __init__(self, prompt_dir, model, openai_api_key, claude_api_key, gemini_api_key, commandr_api_key):
+        self.prompt_dir = prompt_dir
+        self.model = model
+        self.openai_api_key = openai_api_key
+        self.claude_api_key = claude_api_key
+        self.gemini_api_key = gemini_api_key
+        self.commandr_api_key = commandr_api_key
+    def count_tokens(self, text):
+        return len(self.encoding.encode(text))
+    def truncate_content(self, content):
+        token_count = self.count_tokens(content)
+        logging.debug(f"Token count before truncation: {token_count}")
+        if token_count > self.MAX_TOKENS:
+            tokens = self.encoding.encode(content)
+            truncated_tokens = tokens[:self.MAX_TOKENS]
+            truncated_content = self.encoding.decode(truncated_tokens)
+            logging.debug(f"Content truncated. Token count after truncation: {self.count_tokens(truncated_content)}")
+            return truncated_content
+        return content
+    def prepare_base_prompt(self, paper):
+        return paper.tex_file
+    def call_model(self, prompt, model_type):
+        system_role_file_path = os.path.join(self.prompt_dir, "systemrole.txt")
+        if not os.path.exists(system_role_file_path):
+            logging.error(f"System role file not found: {system_role_file_path}")
+            return None
+        system_role = read_file(system_role_file_path)
+        logging.debug(f"Token count of full prompt: {self.count_tokens(prompt)}")
+        logging.info(f"Sending the following prompt to {model_type}: {prompt}")
+        try:
+            if model_type == 'gpt':
+                client = OpenAI(api_key=self.openai_api_key)
+                messages = [{"role": "system", "content": system_role}, {"role": "user", "content": prompt}]
+                completion = client.chat.completions.create(
+                    model="gpt-4-turbo-2024-04-09",
+                    messages=messages,
+                    temperature=1
+                )
+                return completion.choices[0].message.content.strip()
+            elif model_type == 'claude':
+                client = anthropic.Anthropic(api_key=self.claude_api_key)
+                response = client.messages.create(
+                    model='claude-3-opus-20240229',
+                    max_tokens=4096,
+                    system=system_role,
+                    temperature=0.5,
+                    messages=[{"role": "user", "content": prompt}]
+                )
+                return response.content[0].text
+            elif model_type == 'commandr':
+                co = cohere.Client(self.commandr_api_key)
+                response = co.chat(
+                    model="command-r-plus",
+                    message=prompt,
+                    preamble=system_role
+                )
+                return response.text
+            elif model_type == 'gemini':
+                genai.configure(api_key=self.gemini_api_key)
+                model = genai.GenerativeModel('gemini-pro')
+                response = model.generate_content(prompt)
+                return response.candidates[0].content.parts[0].text
+        except Exception as e:
+            logging.error(f"Exception occurred: {e}")
+            return None
+    def is_content_appropriate(self, content):
+        try:
+            response = openai.moderations.create(input=content)
+            return not response["results"][0]["flagged"]
+        except Exception as e:
+            logging.error(f"Exception occurred while checking content appropriateness: {e}")
+            return True  # In case of an error, default to content being appropriate
+    def get_prompt_files(self, prompt_dir):
+        return [f for f in os.listdir(prompt_dir) if f.endswith('.txt') and f.startswith('question')]
+    def process_paper(self, paper):
+        openai.api_key = self.openai_api_key
+        start_time = time.time()
+        base_prompt = self.prepare_base_prompt(paper)
+        if base_prompt is None:
+            return "Error: Base prompt could not be prepared."
+        moderation_response = openai.moderations.create(input=base_prompt)
+        if moderation_response.results[0].flagged:
+            return ["Desk Rejected", "The paper contains inappropriate or harmful content."]
+        review_output = []
+        previous_responses = []
+        header = ['Summary:', 'Soundness:', 'Presentation:', 'Contribution:', 'Strengths:', 'Weaknesses:', 'Questions:', 'Flag For Ethics Review:', 'Rating:', 'Confidence:', 'Code Of Conduct:']
+        for i in range(1, 12):
+            question_file = os.path.join(self.prompt_dir, f"question{i}.txt")
+            question_text = read_file(question_file)
+            if i == 1:
+                prompt = f"{question_text}\n\n####\n{base_prompt}\n####"
+            else:
+                prompt = f"\nHere is your review so far:\n{' '.join(previous_responses)}\n\nHere are your reviewer instructions. Please answer the following question:\n{question_text}"
+            truncated_prompt = self.truncate_content(prompt)
+            logging.info(f"Processing prompt for question {i}")
+            response = self.call_model(truncated_prompt, self.model)
+            if response is None:
+                response = "N/A"
+            if i in [2, 3, 4, 10]:
+                number_match = re.search(r'\b\d+\b', response)
+                if number_match:
+                    number = int(number_match.group(0))
+                    response = '5/5' if number > 5 else number_match.group(0) + '/5'
+            elif i == 9:
+                number_match = re.search(r'\b\d+\b', response)
+                if number_match:
+                    response = number_match.group(0) + '/10'
+            response_with_header = f"{header[i-1]} {response}"
+            review_output.append(response_with_header)
+            previous_responses.append(response)
+        end_time = time.time()
+        elapsed_time = end_time - start_time
+        print(f"Time taken to process paper: {elapsed_time:.2f} seconds")
+        return review_output

requirements.txt CHANGED Viewed

@@ -1,108 +1,108 @@
-aiofiles==23.2.1
-altair==5.3.0
-annotated-types==0.6.0
-anthropic==0.25.8
-anyio==4.3.0
-attrs==23.2.0
-beautifulsoup4==4.12.3
-boto3==1.34.103
-botocore==1.34.103
-cachetools==5.3.3
-certifi==2024.2.2
-charset-normalizer==3.3.2
-click==8.1.7
-cohere==5.4.0
-colorama==0.4.6
-contourpy==1.2.1
-cycler==0.12.1
-distro==1.9.0
-dnspython==2.6.1
-email_validator==2.1.1
-fastapi==0.111.0
-fastapi-cli==0.0.3
-fastavro==1.9.4
-ffmpy==0.3.2
-filelock==3.14.0
-fonttools==4.51.0
-fsspec==2024.3.1
-google==3.0.0
-google-ai-generativelanguage==0.6.2
-google-api-core==2.19.0
-google-api-python-client==2.129.0
-google-auth==2.29.0
-google-auth-httplib2==0.2.0
-google-generativeai==0.5.2
-googleapis-common-protos==1.63.0
-gradio==4.31.0
-gradio_client==0.16.2
-grpcio==1.63.0
-grpcio-status==1.62.2
-h11==0.14.0
-httpcore==1.0.5
-httplib2==0.22.0
-httptools==0.6.1
-httpx==0.27.0
-httpx-sse==0.4.0
-huggingface-hub==0.23.0
-idna==3.7
-importlib_resources==6.4.0
-Jinja2==3.1.4
-jmespath==1.0.1
-jsonschema==4.22.0
-jsonschema-specifications==2023.12.1
-kiwisolver==1.4.5
-markdown-it-py==3.0.0
-MarkupSafe==2.1.5
-matplotlib==3.8.4
-mdurl==0.1.2
-numpy==1.26.4
-openai==1.28.1
-orjson==3.10.3
-packaging==24.0
-pandas==2.2.2
-pillow==10.3.0
-proto-plus==1.23.0
-protobuf==4.25.3
-pyasn1==0.6.0
-pyasn1_modules==0.4.0
-pydantic==2.7.1
-pydantic_core==2.18.2
-pydub==0.25.1
-Pygments==2.18.0
-PyMuPDF==1.24.3
-PyMuPDFb==1.24.3
-pyparsing==3.1.2
-python-dateutil==2.9.0.post0
-python-dotenv==1.0.1
-python-multipart==0.0.9
-pytz==2024.1
-PyYAML==6.0.1
-referencing==0.35.1
-regex==2024.5.10
-requests==2.31.0
-rich==13.7.1
-rpds-py==0.18.1
-rsa==4.9
-ruff==0.4.4
-s3transfer==0.10.1
-semantic-version==2.10.0
-shellingham==1.5.4
-six==1.16.0
-sniffio==1.3.1
-soupsieve==2.5
-starlette==0.37.2
-tiktoken==0.6.0
-tokenizers==0.19.1
-tomlkit==0.12.0
-toolz==0.12.1
-tqdm==4.66.4
-typer==0.12.3
-types-requests==2.31.0.20240406
-typing_extensions==4.11.0
-tzdata==2024.1
-ujson==5.9.0
-uritemplate==4.1.1
-urllib3==2.2.1
-uvicorn==0.29.0
-watchfiles==0.21.0
-websockets==11.0.3

+aiofiles==23.2.1
+altair==5.3.0
+annotated-types==0.6.0
+anthropic==0.25.8
+anyio==4.3.0
+attrs==23.2.0
+beautifulsoup4==4.12.3
+boto3==1.34.103
+botocore==1.34.103
+cachetools==5.3.3
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+cohere==5.4.0
+colorama==0.4.6
+contourpy==1.2.1
+cycler==0.12.1
+distro==1.9.0
+dnspython==2.6.1
+email_validator==2.1.1
+fastapi==0.111.0
+fastapi-cli==0.0.3
+fastavro==1.9.4
+ffmpy==0.3.2
+filelock==3.14.0
+fonttools==4.51.0
+fsspec==2024.3.1
+google==3.0.0
+google-ai-generativelanguage==0.6.2
+google-api-core==2.19.0
+google-api-python-client==2.129.0
+google-auth==2.29.0
+google-auth-httplib2==0.2.0
+google-generativeai==0.5.2
+googleapis-common-protos==1.63.0
+gradio==4.31.0
+gradio_client==0.16.2
+grpcio==1.63.0
+grpcio-status==1.62.2
+h11==0.14.0
+httpcore==1.0.5
+httplib2==0.22.0
+httptools==0.6.1
+httpx==0.27.0
+httpx-sse==0.4.0
+huggingface-hub==0.23.0
+idna==3.7
+importlib_resources==6.4.0
+Jinja2==3.1.4
+jmespath==1.0.1
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.8.4
+mdurl==0.1.2
+numpy==1.26.4
+openai==1.28.1
+orjson==3.10.3
+packaging==24.0
+pandas==2.2.2
+pillow==10.3.0
+proto-plus==1.23.0
+protobuf==4.25.3
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pydantic==2.7.1
+pydantic_core==2.18.2
+pydub==0.25.1
+Pygments==2.18.0
+PyMuPDF==1.24.3
+PyMuPDFb==1.24.3
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.35.1
+regex==2024.5.10
+requests==2.31.0
+rich==13.7.1
+rpds-py==0.18.1
+rsa==4.9
+ruff==0.4.4
+s3transfer==0.10.1
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+soupsieve==2.5
+starlette==0.37.2
+tiktoken==0.6.0
+tokenizers==0.19.1
+tomlkit==0.12.0
+toolz==0.12.1
+tqdm==4.66.4
+typer==0.12.3
+types-requests==2.31.0.20240406
+typing_extensions==4.11.0
+tzdata==2024.1
+ujson==5.9.0
+uritemplate==4.1.1
+urllib3==2.2.1
+uvicorn==0.29.0
+watchfiles==0.21.0
+websockets==11.0.3

utils.py CHANGED Viewed

@@ -1,49 +1,45 @@
-import fitz
-import os
-import logging
-import random
-from models import Paper, PaperProcessor
-def extract_text_from_pdf(filename):
-    with fitz.open(filename) as pdf_document:
-        text = ""
-        for page in pdf_document:
-            text += page.get_text()
-    return text.encode('latin-1', 'replace').decode('latin-1')
-def process_paper(pdf_file, paper_dir, prompt_dir, api_keys):
-    logging.info(f"Processing file type in process_paper: {type(pdf_file)}")  # Log the type of the file here as well
-    logging.debug(f"Starting to process paper: {pdf_file}")
-    # Ensure the directory exists
-    os.makedirs(paper_dir, exist_ok=True)
-    # Handle file based on its type
-    if isinstance(pdf_file, str):
-        # Assume pdf_file is a path to the PDF file
-        pdf_path = pdf_file
-    elif hasattr(pdf_file, 'name') and hasattr(pdf_file, 'read'):
-        # It's a file-like object
-        pdf_path = os.path.join(paper_dir, pdf_file.name)
-        with open(pdf_path, "wb") as f:
-            f.write(pdf_file.read())
-    else:
-        logging.error("Received object is neither a path nor a file-like object.")
-        return []
-    # Extract text from the PDF
-    extracted_text = extract_text_from_pdf(pdf_path)
-    paper = Paper(pdf_file.name if hasattr(pdf_file, 'name') else os.path.basename(pdf_path), extracted_text)
-    # Randomly select two models
-    models = ['gpt', 'claude', 'gemini', 'commandr']
-    selected_models = random.sample(models, 2)
-    # Process the paper with each selected model
-    reviews = []
-    for model in selected_models:
-        processor = PaperProcessor(prompt_dir, model, **api_keys)
-        review_text = processor.process_paper(paper)
-        #review_dict = {section.split(':')[0]: section.split(':')[1].strip() for section in review_text}
-        reviews.append(review_text)
-    logging.debug(f"Reviews generated: {reviews}")
-    return reviews

+import fitz
+import os
+import logging
+import random
+from models import Paper, PaperProcessor
+def extract_text_from_pdf(filename):
+    with fitz.open(filename) as pdf_document:
+        text = ""
+        for page in pdf_document:
+            text += page.get_text()
+    return text.encode('latin-1', 'replace').decode('latin-1')
+def process_paper(pdf_file, paper_dir, prompt_dir, api_keys):
+    logging.info(f"Processing file type in process_paper: {type(pdf_file)}")
+    logging.debug(f"Starting to process paper: {pdf_file}")
+    os.makedirs(paper_dir, exist_ok=True)
+    if isinstance(pdf_file, str):
+        pdf_path = pdf_file
+    elif hasattr(pdf_file, 'name') and hasattr(pdf_file, 'read'):
+        pdf_path = os.path.join(paper_dir, pdf_file.name)
+        with open(pdf_path, "wb") as f:
+            f.write(pdf_file.read())
+    else:
+        logging.error(
+            "Received object is neither a path nor a file-like object.")
+        return [], []
+    extracted_text = extract_text_from_pdf(pdf_path)
+    paper = Paper(pdf_file.name if hasattr(pdf_file, 'name')
+                  else os.path.basename(pdf_path), extracted_text)
+    models = ['gpt', 'claude', 'gemini', 'commandr']
+    selected_models = random.sample(models, 2)
+    reviews = []
+    for model in selected_models:
+        processor = PaperProcessor(prompt_dir, model, **api_keys)
+        review_text = processor.process_paper(paper)
+        reviews.append(review_text)
+    logging.debug(f"Reviews generated: {reviews}")
+    return reviews, selected_models