sandbox / judging.py
justinxzhao's picture
Some refactoring, judging responses for direct assessment.
577870e
raw
history blame contribute delete
660 Bytes
from pydantic import BaseModel, Field, conint
from typing import List, Optional, Literal, Union
class Criteria(BaseModel):
name: str
description: str
min_score: conint(ge=0)
max_score: conint(ge=0)
class DirectAssessment(BaseModel):
type: Literal["direct_assessment"]
criteria: List[Criteria]
prompt: str
class PairwiseComparison(BaseModel):
type: Literal["pairwise_comparison"]
granularity: Literal["coarse", "fine", "super fine"]
ties_allowed: bool
position_swapping: bool
reference_model: str
prompt: str
class JudgingConfig(BaseModel):
assessment: Union[DirectAssessment, PairwiseComparison]