Spaces:
Runtime error
Runtime error
zetavg
commited on
Commit
β’
02c87a8
1
Parent(s):
d189042
move sample data
Browse files- llama_lora/utils/data.py +30 -12
- {datasets β sample_data/datasets}/alpaca_data_cleaned_first_100.json +0 -0
- {datasets β sample_data/datasets}/alpaca_data_cleaned_first_1000.json +0 -0
- {datasets β sample_data/datasets}/alpaca_data_cleaned_first_500.json +0 -0
- {datasets β sample_data/datasets}/stanford_alpaca_seed_tasks.jsonl +0 -0
- {datasets β sample_data/datasets}/unhelpful_ai.json +0 -0
- {datasets β sample_data/datasets}/yoda.json +0 -0
- {lora_models β sample_data/lora_models}/alpaca-lora-7b-yoda-v01/finetune_params.json +0 -0
- {lora_models β sample_data/lora_models}/alpaca-lora-7b-yoda-v01/info.json +0 -0
- {lora_models β sample_data/lora_models}/alpaca-lora-7b/finetune_params.json +0 -0
- {lora_models β sample_data/lora_models}/alpaca-lora-7b/info.json +0 -0
- {lora_models β sample_data/lora_models}/unhelpful-ai-on-alpaca-v01/finetune_params.json +0 -0
- {lora_models β sample_data/lora_models}/unhelpful-ai-on-alpaca-v01/info.json +0 -0
- {lora_models β sample_data/lora_models}/unhelpful-ai-v01/finetune_params.json +0 -0
- {lora_models β sample_data/lora_models}/unhelpful-ai-v01/info.json +0 -0
- {templates β sample_data/templates}/README.md +0 -0
- {templates β sample_data/templates}/alpaca.json +0 -0
- {templates β sample_data/templates}/alpaca_legacy.json +0 -0
- {templates β sample_data/templates}/alpaca_sample.json +0 -0
- {templates β sample_data/templates}/alpaca_short.json +0 -0
- {templates β sample_data/templates}/user_and_ai.json +0 -0
- {templates β sample_data/templates}/vigogne.json +0 -0
llama_lora/utils/data.py
CHANGED
@@ -4,7 +4,6 @@ import fnmatch
|
|
4 |
import json
|
5 |
|
6 |
from ..config import Config
|
7 |
-
from ..globals import Global
|
8 |
|
9 |
|
10 |
def init_data_dir():
|
@@ -13,12 +12,16 @@ def init_data_dir():
|
|
13 |
parent_directory_path = os.path.dirname(current_file_path)
|
14 |
project_dir_path = os.path.abspath(
|
15 |
os.path.join(parent_directory_path, "..", ".."))
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
copy_sample_data_if_not_exists(
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
|
23 |
|
24 |
def copy_sample_data_if_not_exists(source, destination):
|
@@ -32,21 +35,33 @@ def copy_sample_data_if_not_exists(source, destination):
|
|
32 |
def get_available_template_names():
|
33 |
templates_directory_path = os.path.join(Config.data_dir, "templates")
|
34 |
all_files = os.listdir(templates_directory_path)
|
35 |
-
names = [
|
|
|
|
|
|
|
|
|
36 |
return sorted(names)
|
37 |
|
38 |
|
39 |
def get_available_dataset_names():
|
40 |
datasets_directory_path = os.path.join(Config.data_dir, "datasets")
|
41 |
all_files = os.listdir(datasets_directory_path)
|
42 |
-
names = [
|
|
|
|
|
|
|
|
|
43 |
return sorted(names)
|
44 |
|
45 |
|
46 |
def get_available_lora_model_names():
|
47 |
lora_models_directory_path = os.path.join(Config.data_dir, "lora_models")
|
48 |
all_items = os.listdir(lora_models_directory_path)
|
49 |
-
names = [
|
|
|
|
|
|
|
|
|
50 |
return sorted(names)
|
51 |
|
52 |
|
@@ -67,7 +82,9 @@ def get_info_of_available_lora_model(name):
|
|
67 |
if not path_of_available_lora_model:
|
68 |
return None
|
69 |
|
70 |
-
with open(
|
|
|
|
|
71 |
return json.load(json_file)
|
72 |
|
73 |
except Exception as e:
|
@@ -95,4 +112,5 @@ def get_dataset_content(name):
|
|
95 |
return data
|
96 |
else:
|
97 |
raise ValueError(
|
98 |
-
f"Unknown file format: {file_name}. Expects '*.json' or '*.jsonl'"
|
|
|
|
4 |
import json
|
5 |
|
6 |
from ..config import Config
|
|
|
7 |
|
8 |
|
9 |
def init_data_dir():
|
|
|
12 |
parent_directory_path = os.path.dirname(current_file_path)
|
13 |
project_dir_path = os.path.abspath(
|
14 |
os.path.join(parent_directory_path, "..", ".."))
|
15 |
+
sample_data_dir_path = os.path.join(project_dir_path, "sample_data")
|
16 |
+
copy_sample_data_if_not_exists(
|
17 |
+
os.path.join(sample_data_dir_path, "templates"),
|
18 |
+
os.path.join(Config.data_dir, "templates"))
|
19 |
+
copy_sample_data_if_not_exists(
|
20 |
+
os.path.join(sample_data_dir_path, "datasets"),
|
21 |
+
os.path.join(Config.data_dir, "datasets"))
|
22 |
+
copy_sample_data_if_not_exists(
|
23 |
+
os.path.join(sample_data_dir_path, "lora_models"),
|
24 |
+
os.path.join(Config.data_dir, "lora_models"))
|
25 |
|
26 |
|
27 |
def copy_sample_data_if_not_exists(source, destination):
|
|
|
35 |
def get_available_template_names():
|
36 |
templates_directory_path = os.path.join(Config.data_dir, "templates")
|
37 |
all_files = os.listdir(templates_directory_path)
|
38 |
+
names = [
|
39 |
+
filename.rstrip(".json") for filename in all_files
|
40 |
+
if fnmatch.fnmatch(
|
41 |
+
filename, "*.json") or fnmatch.fnmatch(filename, "*.py")
|
42 |
+
]
|
43 |
return sorted(names)
|
44 |
|
45 |
|
46 |
def get_available_dataset_names():
|
47 |
datasets_directory_path = os.path.join(Config.data_dir, "datasets")
|
48 |
all_files = os.listdir(datasets_directory_path)
|
49 |
+
names = [
|
50 |
+
filename for filename in all_files
|
51 |
+
if fnmatch.fnmatch(filename, "*.json")
|
52 |
+
or fnmatch.fnmatch(filename, "*.jsonl")
|
53 |
+
]
|
54 |
return sorted(names)
|
55 |
|
56 |
|
57 |
def get_available_lora_model_names():
|
58 |
lora_models_directory_path = os.path.join(Config.data_dir, "lora_models")
|
59 |
all_items = os.listdir(lora_models_directory_path)
|
60 |
+
names = [
|
61 |
+
item for item in all_items
|
62 |
+
if os.path.isdir(
|
63 |
+
os.path.join(lora_models_directory_path, item))
|
64 |
+
]
|
65 |
return sorted(names)
|
66 |
|
67 |
|
|
|
82 |
if not path_of_available_lora_model:
|
83 |
return None
|
84 |
|
85 |
+
with open(
|
86 |
+
os.path.join(path_of_available_lora_model, "info.json"), "r"
|
87 |
+
) as json_file:
|
88 |
return json.load(json_file)
|
89 |
|
90 |
except Exception as e:
|
|
|
112 |
return data
|
113 |
else:
|
114 |
raise ValueError(
|
115 |
+
f"Unknown file format: {file_name}. Expects '*.json' or '*.jsonl'"
|
116 |
+
)
|
{datasets β sample_data/datasets}/alpaca_data_cleaned_first_100.json
RENAMED
File without changes
|
{datasets β sample_data/datasets}/alpaca_data_cleaned_first_1000.json
RENAMED
File without changes
|
{datasets β sample_data/datasets}/alpaca_data_cleaned_first_500.json
RENAMED
File without changes
|
{datasets β sample_data/datasets}/stanford_alpaca_seed_tasks.jsonl
RENAMED
File without changes
|
{datasets β sample_data/datasets}/unhelpful_ai.json
RENAMED
File without changes
|
{datasets β sample_data/datasets}/yoda.json
RENAMED
File without changes
|
{lora_models β sample_data/lora_models}/alpaca-lora-7b-yoda-v01/finetune_params.json
RENAMED
File without changes
|
{lora_models β sample_data/lora_models}/alpaca-lora-7b-yoda-v01/info.json
RENAMED
File without changes
|
{lora_models β sample_data/lora_models}/alpaca-lora-7b/finetune_params.json
RENAMED
File without changes
|
{lora_models β sample_data/lora_models}/alpaca-lora-7b/info.json
RENAMED
File without changes
|
{lora_models β sample_data/lora_models}/unhelpful-ai-on-alpaca-v01/finetune_params.json
RENAMED
File without changes
|
{lora_models β sample_data/lora_models}/unhelpful-ai-on-alpaca-v01/info.json
RENAMED
File without changes
|
{lora_models β sample_data/lora_models}/unhelpful-ai-v01/finetune_params.json
RENAMED
File without changes
|
{lora_models β sample_data/lora_models}/unhelpful-ai-v01/info.json
RENAMED
File without changes
|
{templates β sample_data/templates}/README.md
RENAMED
File without changes
|
{templates β sample_data/templates}/alpaca.json
RENAMED
File without changes
|
{templates β sample_data/templates}/alpaca_legacy.json
RENAMED
File without changes
|
{templates β sample_data/templates}/alpaca_sample.json
RENAMED
File without changes
|
{templates β sample_data/templates}/alpaca_short.json
RENAMED
File without changes
|
{templates β sample_data/templates}/user_and_ai.json
RENAMED
File without changes
|
{templates β sample_data/templates}/vigogne.json
RENAMED
File without changes
|