File size: 6,048 Bytes
9e62f85 38a86d9 13a9008 9e62f85 38a86d9 9e62f85 38a86d9 9e62f85 38a86d9 9e62f85 38a86d9 9e62f85 38a86d9 9e62f85 38a86d9 9e62f85 38a86d9 9e62f85 38a86d9 9e62f85 38a86d9 9e62f85 38a86d9 9e62f85 38a86d9 9e62f85 38a86d9 9e62f85 38a86d9 9e62f85 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import boto3
import uuid
import datetime
import os
from decimal import Decimal, getcontext
from dotenv import load_dotenv
# Load AWS credentials from environment variables
aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
aws_region = os.environ.get('AWS_REGION')
# Initialize the DynamoDB client
dynamodb = boto3.resource('dynamodb',
# Define the tables
requests_table = dynamodb.Table('reviewer_arena_requests')
leaderboards_table = dynamodb.Table('reviewer_arena_leaderboard')
# Function to write a request to the Requests table
def write_request(user_id, paper_id, model_a, model_b, vote):
request_id = str(uuid.uuid4())
timestamp = str(Decimal(
response = requests_table.put_item(
'RequestID': request_id,
'Timestamp': timestamp,
'UserID': user_id,
'PaperID': paper_id,
'ModelA': model_a,
'ModelB': model_b,
'Vote': vote
return response
# Function to update leaderboard after a vote
def update_leaderboard(model_a, model_b, vote):
# Map vote options to simpler keys
vote_mapping = {
"π A is better": "A is better",
"π B is better": "B is better",
"π Tie": "Tie",
"π Both are bad": "Tie" # Assuming "Both are bad" is treated as a tie
vote = vote_mapping.get(vote, "Tie") # Default to "Tie" if vote is not found
# Retrieve current stats for ModelA and ModelB
model_a_stats = leaderboards_table.get_item(Key={'ModelID': model_a}).get('Item', {})
model_b_stats = leaderboards_table.get_item(Key={'ModelID': model_b}).get('Item', {})
# Initialize stats if they don't exist
if not model_a_stats:
model_a_stats = {'ModelID': model_a, 'Wins': 0, 'Losses': 0, 'Ties': 0, 'EloScore': Decimal(1200), 'Votes': 0}
if not model_b_stats:
model_b_stats = {'ModelID': model_b, 'Wins': 0, 'Losses': 0, 'Ties': 0, 'EloScore': Decimal(1200), 'Votes': 0}
# Update stats based on the vote
update_expressions = {
"A is better": {
"model_a": "SET Wins = Wins + :inc, Votes = Votes + :inc",
"model_b": "SET Losses = Losses + :inc, Votes = Votes + :inc"
"B is better": {
"model_a": "SET Losses = Losses + :inc, Votes = Votes + :inc",
"model_b": "SET Wins = Wins + :inc, Votes = Votes + :inc"
"Tie": {
"model_a": "SET Ties = Ties + :inc, Votes = Votes + :inc",
"model_b": "SET Ties = Ties + :inc, Votes = Votes + :inc"
expression_a = update_expressions[vote]["model_a"]
expression_b = update_expressions[vote]["model_b"]
# Update ModelA stats
Key={'ModelID': model_a},
ExpressionAttributeValues={':inc': 1}
# Update ModelB stats
Key={'ModelID': model_b},
ExpressionAttributeValues={':inc': 1}
# Calculate new Elo scores (simple Elo calculation for illustration)
new_elo_a, new_elo_b = calculate_elo(model_a_stats['EloScore'], model_b_stats['EloScore'], vote)
# Calculate 95% CI for new Elo scores
ci_a_lower, ci_a_upper = calculate_95_ci(new_elo_a, model_a_stats['Votes'] + 1)
ci_b_lower, ci_b_upper = calculate_95_ci(new_elo_b, model_b_stats['Votes'] + 1)
# Update Elo scores and 95% CI
Key={'ModelID': model_a},
UpdateExpression="SET EloScore = :new_elo, CI_Lower = :ci_lower, CI_Upper = :ci_upper",
ExpressionAttributeValues={':new_elo': Decimal(new_elo_a), ':ci_lower': Decimal(ci_a_lower), ':ci_upper': Decimal(ci_a_upper)}
Key={'ModelID': model_b},
UpdateExpression="SET EloScore = :new_elo, CI_Lower = :ci_lower, CI_Upper = :ci_upper",
ExpressionAttributeValues={':new_elo': Decimal(new_elo_b), ':ci_lower': Decimal(ci_b_lower), ':ci_upper': Decimal(ci_b_upper)}
# Set the precision for Decimal
getcontext().prec = 28
# Function to calculate new Elo scores
def calculate_elo(elo_a, elo_b, vote, k=32):
# Ensure elo_a and elo_b are Decimals
elo_a = Decimal(elo_a)
elo_b = Decimal(elo_b)
expected_a = 1 / (1 + Decimal(10) ** ((elo_b - elo_a) / Decimal(400)))
expected_b = 1 / (1 + Decimal(10) ** ((elo_a - elo_b) / Decimal(400)))
if vote == "A is better":
actual_a = Decimal(1)
actual_b = Decimal(0)
elif vote == "B is better":
actual_a = Decimal(0)
actual_b = Decimal(1)
else: # Tie
actual_a = Decimal(0.5)
actual_b = Decimal(0.5)
new_elo_a = elo_a + Decimal(k) * (actual_a - expected_a)
new_elo_b = elo_b + Decimal(k) * (actual_b - expected_b)
return round(new_elo_a, 2), round(new_elo_b, 2)
# Function to calculate 95% CI for Elo scores
def calculate_95_ci(elo, votes, z=1.96):
if votes == 0:
return Decimal(0), Decimal(0)
elo = Decimal(elo) # Ensure elo is a Decimal
std_error = Decimal(400) / (Decimal(votes).sqrt())
margin = Decimal(z) * std_error
return round(elo - margin, 2), round(elo + margin, 2)
# Function to query leaderboard
def get_leaderboard():
response = leaderboards_table.scan()
leaderboard = response.get('Items', [])
# Sort by EloScore in descending order
leaderboard.sort(key=lambda x: x['EloScore'], reverse=True)
return leaderboard