Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 2,370 Bytes
2c24f05 156ef43 58b9de9 156ef43 58b9de9 156ef43 58b9de9 156ef43 58b9de9 156ef43 58b9de9 156ef43 d7b7dc6 156ef43 404587d 58b9de9 404587d 58b9de9 d7b7dc6 58b9de9 404587d 58b9de9 d7b7dc6 58b9de9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
def is_summary_valid(summary: str) -> bool:
"""
Checks if the summary is valid.
A summary is valid if it is not empty and contains at least five words.
Args:
summary (str): The summary to check.
Returns:
bool: True if the summary is valid, False otherwise.
"""
if isinstance(summary, str):
words = summary.split()
if len(words) >= 5:
return True
return False
def create_pairs(df):
"""
Creates pairs of source and summary from the dataframe.
Args:
df (DataFrame): The dataframe containing source and summary columns.
Returns:
list: A list of pairs [source, summary].
"""
pairs = []
for _, row in df.iterrows():
pairs.append([row['source'], row['summary']])
return pairs
def format_results(model_name: str, revision: str, precision: str,
factual_consistency_rate: float, hallucination_rate: float,
answer_rate: float, avg_summary_len: float) -> dict:
"""
Formats the evaluation results into a structured dictionary.
Args:
model_name (str): The name of the evaluated model.
revision (str): The revision hash of the model.
precision (str): The precision with which the evaluation was run.
factual_consistency_rate (float): The factual consistency rate.
hallucination_rate (float): The hallucination rate.
answer_rate (float): The answer rate.
avg_summary_len (float): The average summary length.
Returns:
dict: A dictionary containing the structured evaluation results.
"""
results = {
"config": {
"model_dtype": precision, # Precision with which you ran the evaluation
"model_name": model_name, # Name of the model
"model_sha": revision # Hash of the model
},
"results": {
"hallucination_rate": {
"hallucination_rate": hallucination_rate
},
"factual_consistency_rate": {
"factual_consistency_rate": factual_consistency_rate
},
"answer_rate": {
"answer_rate": answer_rate
},
"average_summary_length": {
"average_summary_length": avg_summary_len
},
}
}
return results
|