Spaces:

XufengDuan
/

HumanLikeness

Sleeping

App Files Files Community

XufengDuan commited on Aug 11, 2024

Commit

9da8cd9

1 Parent(s): 63a1401

updated scripts

Browse files

Files changed (4) hide show

.DS_Store +0 -0
src/.DS_Store +0 -0
src/backend/model_operations.py +147 -23
src/envs.py +3 -2

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/.DS_Store CHANGED Viewed

Binary files a/src/.DS_Store and b/src/.DS_Store differ

src/backend/model_operations.py CHANGED Viewed

@@ -21,11 +21,19 @@ import cohere
 from openai import OpenAI
 # import  google
 import google.generativeai as genai
-from huggingface_hub import InferenceClient
 import src.backend.util as util
 import src.envs as envs
 # litellm.set_verbose=False
 litellm.set_verbose=True
 # Set up basic configuration for logging
@@ -196,8 +204,19 @@ class SummaryGenerator:
                                     break
                         if  i == 5:
                             print(_response)
-                            _response1, _response2 = _response.split('\n\n')
                             Experiment_ID.append(ID)
                             Questions_ID.append(q_column[j])
                             User_prompt.append(_user_prompt)
@@ -261,15 +280,8 @@ class SummaryGenerator:
     def generate_summary(self, system_prompt: str, user_prompt: str):
         # Using Together AI API
-        client = InferenceClient(self.model_id, token = envs.TOKEN)
-        result = client.chat_completion(messages=[{"role": "system", "content": system_prompt},
-                                        {"role": "user", "content": user_prompt}],max_tokens=50,stream=False)
-        print(result.choices[0].message.content)
-        return result.choices[0].message.content
         using_together_api = False
-        together_ai_api_models = ['mixtral', 'dbrx', 'wizardlm', 'llama-3']
         for together_ai_api_model in together_ai_api_models:
             if together_ai_api_model in self.model_id.lower():
                 using_together_api = True
@@ -335,6 +347,7 @@ class SummaryGenerator:
                 max_tokens=250,
             )
             result = response['choices'][0]['message']['content']
             print(result)
             return result
@@ -379,17 +392,31 @@ class SummaryGenerator:
         # Using HF API or download checkpoints
         elif self.local_model is None:
-            # print(self.model_id)
-            # exit()
             try: # try use HuggingFace API
                 response = litellm.completion(
-                    model='command-r-plus' if 'command' in self.model_id else self.model_id,
-                    messages=[{"role": "system", "content": system_prompt},
                                 {"role": "user", "content": user_prompt}],
-                    temperature=0.0,
-                    max_tokens=1024,
-                    api_base=self.api_base,
-                )
                 result = response['choices'][0]['message']['content']
                 print(result)
                 return result
@@ -399,7 +426,7 @@ class SummaryGenerator:
                 print("Tokenizer loaded")
                 self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto", cache_dir='/home/paperspace/cache')
                 print("Local model loaded")
-            # exit()
         # Using local model
         if self.local_model: # cannot call API. using local model
             messages=[
@@ -1025,7 +1052,100 @@ class EvaluationModel:
-        '''是不是有不同的问题，如何计算'''
     def evaluate_humanlike(self, summaries_df, human_data_path, result_save_path):
         '''
         evaluate humanlike score
@@ -1036,13 +1156,15 @@ class EvaluationModel:
         '''coding human data'''
         # self.huamn_df = pd.read_csv(human_data_path)
         # self.data = self.code_results(self.huamn_df)
-        # save_path = human_data_path.replace('.csv','_coding.csv')
         # if save_path is not None:
         #     print(f'Save human coding results to {save_path}')
         #     fpath = Path(save_path)
         #     fpath.parent.mkdir(parents=True, exist_ok=True)
         #     self.data.to_csv(fpath)
         '''coding llm data'''
         save_path = result_save_path.replace('.csv','_coding.csv')
         self.llm_df = self.code_results_llm(summaries_df)
@@ -1051,9 +1173,11 @@ class EvaluationModel:
             fpath = Path(save_path)
             fpath.parent.mkdir(parents=True, exist_ok=True)
             self.llm_df.to_csv(fpath)
-        # exit()
-        return 9.00

 from openai import OpenAI
 # import  google
 import google.generativeai as genai
 import src.backend.util as util
 import src.envs as envs
+# import pandas as pd
+import scipy
+from scipy.spatial.distance import jensenshannon
+# import numpy as np
 # litellm.set_verbose=False
 litellm.set_verbose=True
 # Set up basic configuration for logging
                                     break
                         if  i == 5:
                             print(_response)
+                            if _response == None:
+                                _response1, _response2 = "", ""
+                            else:
+                                try:
+                                    import re
+                                    _response1,_response2 = re.split(r'\n\s*\n', _response.strip())
+                                except:
+                                    _response1 = _response.split('\n\n')
+                                    if len(_response) == 2:
+                                        _response1, _response2 = _response[0], _response[1]
+                                    else:
+                                        _response1, _response2 = _response[0], ""
                             Experiment_ID.append(ID)
                             Questions_ID.append(q_column[j])
                             User_prompt.append(_user_prompt)
     def generate_summary(self, system_prompt: str, user_prompt: str):
         # Using Together AI API
         using_together_api = False
+        together_ai_api_models = ['mixtral', 'dbrx', 'wizardlm']
         for together_ai_api_model in together_ai_api_models:
             if together_ai_api_model in self.model_id.lower():
                 using_together_api = True
                 max_tokens=250,
             )
             result = response['choices'][0]['message']['content']
+            # print()
             print(result)
             return result
         # Using HF API or download checkpoints
         elif self.local_model is None:
+#             print(self.model_id)
+#             print(self.api_base)
+#             mistralai/Mistral-7B-Instruct-v0.1
+# https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1
             try: # try use HuggingFace API
+                # response = litellm.completion(
+                #     model="huggingface/"+'command-r-plus' if 'command' in self.model_id else self.model_id,
+                #     messages=[{"role": "system", "content": system_prompt},
+                #                 {"role": "user", "content": user_prompt}],
+                #     temperature=0.0,
+                #     max_tokens=1024,
+                #     api_base= "https://api-inference.huggingface.co/models/" + self.model_id,
+                # )
+                self.model_id = 'command-r-plus' if 'command' in self.model_id else self.model_id
                 response = litellm.completion(
+                            model="huggingface/" + self.model_id,
+                            # mistralai/Mistral-7B-Instruct-v0.1",
+                            messages=[{"role": "system", "content": system_prompt},
                                 {"role": "user", "content": user_prompt}],
+                            temperature=0.0,
+                            max_tokens=1024,
+                            api_base="https://api-inference.huggingface.co/models/" + self.model_id)
+                print("模型返回结果",response)
+                print("模型返回结果结束")
+                # exit()
                 result = response['choices'][0]['message']['content']
                 print(result)
                 return result
                 print("Tokenizer loaded")
                 self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto", cache_dir='/home/paperspace/cache')
                 print("Local model loaded")
+        # exit()
         # Using local model
         if self.local_model: # cannot call API. using local model
             messages=[
+    def calculate_js_divergence(self, file_path_1, file_path_2):
+        """
+        Calculate the Jensen-Shannon divergence for response distributions between two datasets.
+        - Extracts E5 and E51 pairs, creates new data based on comparison,
+        removes the original E5 and E51, and then calculates the JS divergence between the datasets.
+        Parameters:
+        file_path_1 (str): Path to the first dataset file (Excel format).
+        file_path_2 (str): Path to the second dataset file (CSV format).
+        Returns:
+        float: The average JS divergence across all common Question_IDs.
+        """
+        # Load the datasets
+        human_df = pd.read_excel(file_path_1)
+        llm_df = pd.read_csv(file_path_2)
+        def create_e5_entries(df):
+            new_entries = []
+            for i in range(len(df) - 1):
+                if 'E51' in df.iloc[i]['Experiment']:
+                    priming_id = df.iloc[i][0]-1
+                    priming_row_id = df[df.iloc[:, 0] == priming_id].index[0]
+                    new_question_id = df.iloc[priming_row_id]['Question_ID']
+                    label = 1 if df.iloc[i]['Coding'] == df.iloc[priming_row_id]['Coding'] else 0
+                    new_entries.append({
+                        'Question_ID': new_question_id,
+                        'Response': f'{df.iloc[i]["Coding"]}-{df.iloc[priming_row_id]["Coding"]}',
+                        'Coding': label
+                    })
+            return pd.DataFrame(new_entries)
+        # Create new E5 entries for both datasets
+        human_e5 = create_e5_entries(human_df)
+        llm_e5 = create_e5_entries(llm_df)
+        # Remove E5 and E51 entries from both datasets
+        human_df = human_df[~human_df['Question_ID'].str.contains('E5')]
+        llm_df = llm_df[~llm_df['Question_ID'].str.contains('E5')]
+        # Append new E5 entries to the cleaned dataframes
+        human_df = pd.concat([human_df, human_e5], ignore_index=True)
+        llm_df = pd.concat([llm_df, llm_e5], ignore_index=True)
+        ### Calculate Average JS Divergence ###
+        # Extract the relevant columns for JS divergence calculation
+        human_responses = human_df[['Question_ID', 'Coding']]
+        llm_responses = llm_df[['Question_ID', 'Coding']]
+        # Get unique Question_IDs present in both datasets
+        common_question_ids = set(human_responses['Question_ID']).intersection(set(llm_responses['Question_ID']))
+        # Initialize a list to store JS divergence for each Question_ID
+        js_divergence_list = []
+        js_divergence ={}
+        # Calculate JS divergence for each common Question_ID
+        for q_id in common_question_ids:
+            # Get response distributions for the current Question_ID in both datasets
+            human_dist = human_responses[human_responses['Question_ID'] == q_id]['Coding'].value_counts(normalize=True)
+            llm_dist = llm_responses[llm_responses['Question_ID'] == q_id]['Coding'].value_counts(normalize=True)
+            # Reindex the distributions to have the same index, filling missing values with 0
+            all_responses = set(human_dist.index).union(set(llm_dist.index))
+            human_dist = human_dist.reindex(all_responses, fill_value=0)
+            llm_dist = llm_dist.reindex(all_responses, fill_value=0)
+            # Calculate JS divergence and add to the list
+            js_div = jensenshannon(human_dist, llm_dist, base=2)
+            experiment_id = q_id.split('_')[1]
+            if experiment_id not in js_divergence:
+                js_divergence[experiment_id] = []
+            js_divergence[experiment_id].append(js_div)
+            js_divergence_list.append(js_div)
+            #js_divergence[q_id] = js_div
+        # Calculate the average JS divergence
+        # JS per experiment
+        avg_js_divergence_per_experiment = {exp: 1- np.nanmean(divs) for exp, divs in js_divergence.items()}
+        print(avg_js_divergence_per_experiment)
+        # JS overall
+        avg_js_divergence = 1 - np.nanmean(js_divergence_list)
+        print("avg_js_divergence:", avg_js_divergence)
+        return avg_js_divergence
     def evaluate_humanlike(self, summaries_df, human_data_path, result_save_path):
         '''
         evaluate humanlike score
         '''coding human data'''
         # self.huamn_df = pd.read_csv(human_data_path)
         # self.data = self.code_results(self.huamn_df)
+        save_path = human_data_path.replace('.csv','_coding.csv')
+        human_save_path =  "./src/datasets/coding_human.xlsx"
         # if save_path is not None:
         #     print(f'Save human coding results to {save_path}')
         #     fpath = Path(save_path)
         #     fpath.parent.mkdir(parents=True, exist_ok=True)
         #     self.data.to_csv(fpath)
         '''coding llm data'''
         save_path = result_save_path.replace('.csv','_coding.csv')
         self.llm_df = self.code_results_llm(summaries_df)
             fpath = Path(save_path)
             fpath.parent.mkdir(parents=True, exist_ok=True)
             self.llm_df.to_csv(fpath)
+        # file_path_1 = '/Users/simon/Downloads/coding_human.xlsx'
+        # file_path_2 = '/Users/simon/Downloads/Meta-Llama-3.1-70B-Instruct_coding.csv'
+        avg_js_divergence = self.calculate_js_divergence("./src/datasets/coding_human.xlsx", save_path)
+        return avg_js_divergence

src/envs.py CHANGED Viewed

@@ -4,7 +4,8 @@ from huggingface_hub import HfApi
 # replace this with our token
-TOKEN = os.environ.get("HF_TOKEN", None)
 # print(TOKEN)
 # OWNER = "vectara"
 # REPO_ID = f"{OWNER}/Humanlike"
@@ -12,7 +13,7 @@ TOKEN = os.environ.get("HF_TOKEN", None)
 # RESULTS_REPO = f"{OWNER}/results"
-OWNER = "Simondon" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
 REPO_ID = f"{OWNER}/Humanlike"

 # replace this with our token
+# TOKEN = os.environ.get("HF_TOKEN", None)
+TOKEN = os.getenv("HF_TOKEN")
 # print(TOKEN)
 # OWNER = "vectara"
 # REPO_ID = f"{OWNER}/Humanlike"
 # RESULTS_REPO = f"{OWNER}/results"
+OWNER = "tangtang1995" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
 REPO_ID = f"{OWNER}/Humanlike"