XufengDuan commited on
Commit
9da8cd9
1 Parent(s): 63a1401

updated scripts

Browse files
Files changed (4) hide show
  1. .DS_Store +0 -0
  2. src/.DS_Store +0 -0
  3. src/backend/model_operations.py +147 -23
  4. src/envs.py +3 -2
.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/.DS_Store CHANGED
Binary files a/src/.DS_Store and b/src/.DS_Store differ
 
src/backend/model_operations.py CHANGED
@@ -21,11 +21,19 @@ import cohere
21
  from openai import OpenAI
22
  # import google
23
  import google.generativeai as genai
24
- from huggingface_hub import InferenceClient
25
 
26
  import src.backend.util as util
27
  import src.envs as envs
28
 
 
 
 
 
 
 
 
 
 
29
  # litellm.set_verbose=False
30
  litellm.set_verbose=True
31
  # Set up basic configuration for logging
@@ -196,8 +204,19 @@ class SummaryGenerator:
196
  break
197
  if i == 5:
198
  print(_response)
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
- _response1, _response2 = _response.split('\n\n')
201
  Experiment_ID.append(ID)
202
  Questions_ID.append(q_column[j])
203
  User_prompt.append(_user_prompt)
@@ -261,15 +280,8 @@ class SummaryGenerator:
261
 
262
  def generate_summary(self, system_prompt: str, user_prompt: str):
263
  # Using Together AI API
264
-
265
- client = InferenceClient(self.model_id, token = envs.TOKEN)
266
- result = client.chat_completion(messages=[{"role": "system", "content": system_prompt},
267
- {"role": "user", "content": user_prompt}],max_tokens=50,stream=False)
268
- print(result.choices[0].message.content)
269
- return result.choices[0].message.content
270
-
271
  using_together_api = False
272
- together_ai_api_models = ['mixtral', 'dbrx', 'wizardlm', 'llama-3']
273
  for together_ai_api_model in together_ai_api_models:
274
  if together_ai_api_model in self.model_id.lower():
275
  using_together_api = True
@@ -335,6 +347,7 @@ class SummaryGenerator:
335
  max_tokens=250,
336
  )
337
  result = response['choices'][0]['message']['content']
 
338
  print(result)
339
  return result
340
 
@@ -379,17 +392,31 @@ class SummaryGenerator:
379
 
380
  # Using HF API or download checkpoints
381
  elif self.local_model is None:
382
- # print(self.model_id)
383
- # exit()
 
 
384
  try: # try use HuggingFace API
 
 
 
 
 
 
 
 
 
385
  response = litellm.completion(
386
- model='command-r-plus' if 'command' in self.model_id else self.model_id,
387
- messages=[{"role": "system", "content": system_prompt},
 
388
  {"role": "user", "content": user_prompt}],
389
- temperature=0.0,
390
- max_tokens=1024,
391
- api_base=self.api_base,
392
- )
 
 
393
  result = response['choices'][0]['message']['content']
394
  print(result)
395
  return result
@@ -399,7 +426,7 @@ class SummaryGenerator:
399
  print("Tokenizer loaded")
400
  self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto", cache_dir='/home/paperspace/cache')
401
  print("Local model loaded")
402
- # exit()
403
  # Using local model
404
  if self.local_model: # cannot call API. using local model
405
  messages=[
@@ -1025,7 +1052,100 @@ class EvaluationModel:
1025
 
1026
 
1027
 
1028
- '''是不是有不同的问题,如何计算'''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1029
  def evaluate_humanlike(self, summaries_df, human_data_path, result_save_path):
1030
  '''
1031
  evaluate humanlike score
@@ -1036,13 +1156,15 @@ class EvaluationModel:
1036
  '''coding human data'''
1037
  # self.huamn_df = pd.read_csv(human_data_path)
1038
  # self.data = self.code_results(self.huamn_df)
1039
- # save_path = human_data_path.replace('.csv','_coding.csv')
 
1040
  # if save_path is not None:
1041
  # print(f'Save human coding results to {save_path}')
1042
  # fpath = Path(save_path)
1043
  # fpath.parent.mkdir(parents=True, exist_ok=True)
1044
  # self.data.to_csv(fpath)
1045
 
 
1046
  '''coding llm data'''
1047
  save_path = result_save_path.replace('.csv','_coding.csv')
1048
  self.llm_df = self.code_results_llm(summaries_df)
@@ -1051,9 +1173,11 @@ class EvaluationModel:
1051
  fpath = Path(save_path)
1052
  fpath.parent.mkdir(parents=True, exist_ok=True)
1053
  self.llm_df.to_csv(fpath)
1054
- # exit()
 
 
1055
 
1056
- return 9.00
1057
 
1058
 
1059
 
 
21
  from openai import OpenAI
22
  # import google
23
  import google.generativeai as genai
 
24
 
25
  import src.backend.util as util
26
  import src.envs as envs
27
 
28
+ # import pandas as pd
29
+ import scipy
30
+ from scipy.spatial.distance import jensenshannon
31
+ # import numpy as np
32
+
33
+
34
+
35
+
36
+
37
  # litellm.set_verbose=False
38
  litellm.set_verbose=True
39
  # Set up basic configuration for logging
 
204
  break
205
  if i == 5:
206
  print(_response)
207
+ if _response == None:
208
+ _response1, _response2 = "", ""
209
+ else:
210
+ try:
211
+ import re
212
+ _response1,_response2 = re.split(r'\n\s*\n', _response.strip())
213
+ except:
214
+ _response1 = _response.split('\n\n')
215
+ if len(_response) == 2:
216
+ _response1, _response2 = _response[0], _response[1]
217
+ else:
218
+ _response1, _response2 = _response[0], ""
219
 
 
220
  Experiment_ID.append(ID)
221
  Questions_ID.append(q_column[j])
222
  User_prompt.append(_user_prompt)
 
280
 
281
  def generate_summary(self, system_prompt: str, user_prompt: str):
282
  # Using Together AI API
 
 
 
 
 
 
 
283
  using_together_api = False
284
+ together_ai_api_models = ['mixtral', 'dbrx', 'wizardlm']
285
  for together_ai_api_model in together_ai_api_models:
286
  if together_ai_api_model in self.model_id.lower():
287
  using_together_api = True
 
347
  max_tokens=250,
348
  )
349
  result = response['choices'][0]['message']['content']
350
+ # print()
351
  print(result)
352
  return result
353
 
 
392
 
393
  # Using HF API or download checkpoints
394
  elif self.local_model is None:
395
+ # print(self.model_id)
396
+ # print(self.api_base)
397
+ # mistralai/Mistral-7B-Instruct-v0.1
398
+ # https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1
399
  try: # try use HuggingFace API
400
+ # response = litellm.completion(
401
+ # model="huggingface/"+'command-r-plus' if 'command' in self.model_id else self.model_id,
402
+ # messages=[{"role": "system", "content": system_prompt},
403
+ # {"role": "user", "content": user_prompt}],
404
+ # temperature=0.0,
405
+ # max_tokens=1024,
406
+ # api_base= "https://api-inference.huggingface.co/models/" + self.model_id,
407
+ # )
408
+ self.model_id = 'command-r-plus' if 'command' in self.model_id else self.model_id
409
  response = litellm.completion(
410
+ model="huggingface/" + self.model_id,
411
+ # mistralai/Mistral-7B-Instruct-v0.1",
412
+ messages=[{"role": "system", "content": system_prompt},
413
  {"role": "user", "content": user_prompt}],
414
+ temperature=0.0,
415
+ max_tokens=1024,
416
+ api_base="https://api-inference.huggingface.co/models/" + self.model_id)
417
+ print("模型返回结果",response)
418
+ print("模型返回结果结束")
419
+ # exit()
420
  result = response['choices'][0]['message']['content']
421
  print(result)
422
  return result
 
426
  print("Tokenizer loaded")
427
  self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto", cache_dir='/home/paperspace/cache')
428
  print("Local model loaded")
429
+ # exit()
430
  # Using local model
431
  if self.local_model: # cannot call API. using local model
432
  messages=[
 
1052
 
1053
 
1054
 
1055
+
1056
+
1057
+ def calculate_js_divergence(self, file_path_1, file_path_2):
1058
+ """
1059
+ Calculate the Jensen-Shannon divergence for response distributions between two datasets.
1060
+ - Extracts E5 and E51 pairs, creates new data based on comparison,
1061
+ removes the original E5 and E51, and then calculates the JS divergence between the datasets.
1062
+
1063
+ Parameters:
1064
+ file_path_1 (str): Path to the first dataset file (Excel format).
1065
+ file_path_2 (str): Path to the second dataset file (CSV format).
1066
+
1067
+ Returns:
1068
+ float: The average JS divergence across all common Question_IDs.
1069
+ """
1070
+ # Load the datasets
1071
+ human_df = pd.read_excel(file_path_1)
1072
+ llm_df = pd.read_csv(file_path_2)
1073
+
1074
+ def create_e5_entries(df):
1075
+ new_entries = []
1076
+ for i in range(len(df) - 1):
1077
+ if 'E51' in df.iloc[i]['Experiment']:
1078
+ priming_id = df.iloc[i][0]-1
1079
+ priming_row_id = df[df.iloc[:, 0] == priming_id].index[0]
1080
+ new_question_id = df.iloc[priming_row_id]['Question_ID']
1081
+ label = 1 if df.iloc[i]['Coding'] == df.iloc[priming_row_id]['Coding'] else 0
1082
+ new_entries.append({
1083
+ 'Question_ID': new_question_id,
1084
+ 'Response': f'{df.iloc[i]["Coding"]}-{df.iloc[priming_row_id]["Coding"]}',
1085
+ 'Coding': label
1086
+ })
1087
+ return pd.DataFrame(new_entries)
1088
+
1089
+ # Create new E5 entries for both datasets
1090
+ human_e5 = create_e5_entries(human_df)
1091
+ llm_e5 = create_e5_entries(llm_df)
1092
+
1093
+ # Remove E5 and E51 entries from both datasets
1094
+ human_df = human_df[~human_df['Question_ID'].str.contains('E5')]
1095
+ llm_df = llm_df[~llm_df['Question_ID'].str.contains('E5')]
1096
+
1097
+ # Append new E5 entries to the cleaned dataframes
1098
+ human_df = pd.concat([human_df, human_e5], ignore_index=True)
1099
+ llm_df = pd.concat([llm_df, llm_e5], ignore_index=True)
1100
+
1101
+ ### Calculate Average JS Divergence ###
1102
+
1103
+ # Extract the relevant columns for JS divergence calculation
1104
+ human_responses = human_df[['Question_ID', 'Coding']]
1105
+ llm_responses = llm_df[['Question_ID', 'Coding']]
1106
+
1107
+ # Get unique Question_IDs present in both datasets
1108
+ common_question_ids = set(human_responses['Question_ID']).intersection(set(llm_responses['Question_ID']))
1109
+
1110
+ # Initialize a list to store JS divergence for each Question_ID
1111
+ js_divergence_list = []
1112
+ js_divergence ={}
1113
+
1114
+ # Calculate JS divergence for each common Question_ID
1115
+ for q_id in common_question_ids:
1116
+ # Get response distributions for the current Question_ID in both datasets
1117
+ human_dist = human_responses[human_responses['Question_ID'] == q_id]['Coding'].value_counts(normalize=True)
1118
+ llm_dist = llm_responses[llm_responses['Question_ID'] == q_id]['Coding'].value_counts(normalize=True)
1119
+
1120
+ # Reindex the distributions to have the same index, filling missing values with 0
1121
+ all_responses = set(human_dist.index).union(set(llm_dist.index))
1122
+ human_dist = human_dist.reindex(all_responses, fill_value=0)
1123
+ llm_dist = llm_dist.reindex(all_responses, fill_value=0)
1124
+
1125
+ # Calculate JS divergence and add to the list
1126
+ js_div = jensenshannon(human_dist, llm_dist, base=2)
1127
+ experiment_id = q_id.split('_')[1]
1128
+ if experiment_id not in js_divergence:
1129
+ js_divergence[experiment_id] = []
1130
+ js_divergence[experiment_id].append(js_div)
1131
+
1132
+ js_divergence_list.append(js_div)
1133
+ #js_divergence[q_id] = js_div
1134
+
1135
+
1136
+
1137
+ # Calculate the average JS divergence
1138
+ # JS per experiment
1139
+ avg_js_divergence_per_experiment = {exp: 1- np.nanmean(divs) for exp, divs in js_divergence.items()}
1140
+ print(avg_js_divergence_per_experiment)
1141
+
1142
+ # JS overall
1143
+ avg_js_divergence = 1 - np.nanmean(js_divergence_list)
1144
+ print("avg_js_divergence:", avg_js_divergence)
1145
+
1146
+ return avg_js_divergence
1147
+
1148
+
1149
  def evaluate_humanlike(self, summaries_df, human_data_path, result_save_path):
1150
  '''
1151
  evaluate humanlike score
 
1156
  '''coding human data'''
1157
  # self.huamn_df = pd.read_csv(human_data_path)
1158
  # self.data = self.code_results(self.huamn_df)
1159
+ save_path = human_data_path.replace('.csv','_coding.csv')
1160
+ human_save_path = "./src/datasets/coding_human.xlsx"
1161
  # if save_path is not None:
1162
  # print(f'Save human coding results to {save_path}')
1163
  # fpath = Path(save_path)
1164
  # fpath.parent.mkdir(parents=True, exist_ok=True)
1165
  # self.data.to_csv(fpath)
1166
 
1167
+
1168
  '''coding llm data'''
1169
  save_path = result_save_path.replace('.csv','_coding.csv')
1170
  self.llm_df = self.code_results_llm(summaries_df)
 
1173
  fpath = Path(save_path)
1174
  fpath.parent.mkdir(parents=True, exist_ok=True)
1175
  self.llm_df.to_csv(fpath)
1176
+ # file_path_1 = '/Users/simon/Downloads/coding_human.xlsx'
1177
+ # file_path_2 = '/Users/simon/Downloads/Meta-Llama-3.1-70B-Instruct_coding.csv'
1178
+ avg_js_divergence = self.calculate_js_divergence("./src/datasets/coding_human.xlsx", save_path)
1179
 
1180
+ return avg_js_divergence
1181
 
1182
 
1183
 
src/envs.py CHANGED
@@ -4,7 +4,8 @@ from huggingface_hub import HfApi
4
 
5
 
6
  # replace this with our token
7
- TOKEN = os.environ.get("HF_TOKEN", None)
 
8
  # print(TOKEN)
9
  # OWNER = "vectara"
10
  # REPO_ID = f"{OWNER}/Humanlike"
@@ -12,7 +13,7 @@ TOKEN = os.environ.get("HF_TOKEN", None)
12
  # RESULTS_REPO = f"{OWNER}/results"
13
 
14
 
15
- OWNER = "Simondon" # Change to your org - don't forget to create a results and request dataset, with the correct format!
16
  # ----------------------------------
17
 
18
  REPO_ID = f"{OWNER}/Humanlike"
 
4
 
5
 
6
  # replace this with our token
7
+ # TOKEN = os.environ.get("HF_TOKEN", None)
8
+ TOKEN = os.getenv("HF_TOKEN")
9
  # print(TOKEN)
10
  # OWNER = "vectara"
11
  # REPO_ID = f"{OWNER}/Humanlike"
 
13
  # RESULTS_REPO = f"{OWNER}/results"
14
 
15
 
16
+ OWNER = "tangtang1995" # Change to your org - don't forget to create a results and request dataset, with the correct format!
17
  # ----------------------------------
18
 
19
  REPO_ID = f"{OWNER}/Humanlike"