loubnabnl HF staff commited on
Commit
7c0d726
β€’
1 Parent(s): 05d8027

reformat code

Browse files
Files changed (1) hide show
  1. app.py +62 -25
app.py CHANGED
@@ -7,7 +7,10 @@ import streamlit as st
7
 
8
 
9
  GITHUB_CODE = "https://huggingface.co/datasets/lvwerra/github-code"
10
- INCODER_IMG = "https://huggingface.co/datasets/loubnabnl/repo-images/raw/main/incoder.png"
 
 
 
11
 
12
  @st.cache()
13
  def load_examples():
@@ -15,20 +18,34 @@ def load_examples():
15
  examples = json.load(f)
16
  return examples
17
 
 
18
  def generate_code(model_name, gen_prompt, max_new_tokens, temperature, seed):
19
- url = f'https://hf.space/embed/loubnabnl/{model_name.lower()}-subspace/+/api/predict/'
20
- r = requests.post(url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]})
21
- generated_text = r.json()['data'][0]
 
 
 
 
22
  return generated_text
23
-
 
24
  st.set_page_config(page_icon=":laptop:", layout="wide")
25
 
26
  st.sidebar.header("Models")
27
  models = ["CodeParrot", "InCoder"]
28
- selected_models = st.sidebar.multiselect("Select code generation models to compare", models, default=["CodeParrot"])
 
 
29
 
30
  st.sidebar.header("Tasks")
31
- tasks = [" ", "Pretraining datasets", "Model architecture", "Model evaluation", "Code generation"]
 
 
 
 
 
 
32
  selected_task = st.sidebar.selectbox("Select a task", tasks)
33
 
34
 
@@ -37,25 +54,27 @@ if selected_task == " ":
37
  with open("utils/intro.txt", "r") as f:
38
  intro = f.read()
39
  st.markdown(intro)
40
-
41
  elif selected_task == "Pretraining datasets":
42
  st.title("Pretraining datasets πŸ“š")
43
- st.markdown(f"Preview of some code files from Github repositories in [Github-code dataset]({GITHUB_CODE}):")
 
 
44
  df = pd.read_csv("utils/data_preview.csv")
45
  st.dataframe(df)
46
  for model in selected_models:
47
  with open(f"datasets/{model.lower()}.txt", "r") as f:
48
  text = f.read()
49
  st.markdown(f"### {model}")
50
- st.markdown(text)
51
-
52
  elif selected_task == "Model architecture":
53
  st.title("Model architecture")
54
  for model in selected_models:
55
  with open(f"architectures/{model.lower()}.txt", "r") as f:
56
  text = f.read()
57
  st.markdown(f"## {model}")
58
- st.markdown(text)
59
  if model == "InCoder":
60
  st.image(INCODER_IMG, caption="Figure 1: InCoder training", width=700)
61
 
@@ -64,31 +83,49 @@ elif selected_task == "Model evaluation":
64
  with open("evaluation/intro.txt", "r") as f:
65
  intro = f.read()
66
  st.markdown(intro)
67
-
68
  elif selected_task == "Code generation":
69
  st.title("Code generation πŸ’»")
70
  st.sidebar.header("Examples")
71
  examples = load_examples()
72
  example_names = [example["name"] for example in examples]
73
  name2id = dict([(name, i) for i, name in enumerate(example_names)])
74
- selected_example = st.sidebar.selectbox("Select one of the following examples or implement yours", example_names)
 
 
75
  example_text = examples[name2id[selected_example]]["value"]
76
  default_length = examples[name2id[selected_example]]["length"]
77
  st.sidebar.header("Generation settings")
78
- temperature = st.sidebar.slider("Temperature:", value=0.2, min_value=0.0, step=0.1, max_value=2.0)
79
- max_new_tokens = st.sidebar.slider("Number of tokens to generate:", value=default_length, min_value=8, step=8, max_value=256)
80
- seed = st.sidebar.slider("Random seed:", value=42, min_value=0, step=1, max_value=1000)
81
- gen_prompt = st.text_area("Generate code with prompt:", value=example_text, height=220,).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  if st.button("Generate code!"):
83
  with st.spinner("Generating code..."):
84
  # Create a multiprocessing Pool
85
- pool = Pool()
86
- generate_parallel=partial(generate_code,
87
- gen_prompt=gen_prompt,
88
- max_new_tokens=max_new_tokens,
89
- temperature=temperature,
90
- seed=seed)
 
 
91
  output = pool.map(generate_parallel, selected_models)
92
  for i in range(len(output)):
93
  st.markdown(f"**{selected_models[i]}**")
94
- st.code(output[i])
 
7
 
8
 
9
  GITHUB_CODE = "https://huggingface.co/datasets/lvwerra/github-code"
10
+ INCODER_IMG = (
11
+ "https://huggingface.co/datasets/loubnabnl/repo-images/raw/main/incoder.png"
12
+ )
13
+
14
 
15
  @st.cache()
16
  def load_examples():
 
18
  examples = json.load(f)
19
  return examples
20
 
21
+
22
  def generate_code(model_name, gen_prompt, max_new_tokens, temperature, seed):
23
+ url = (
24
+ f"https://hf.space/embed/loubnabnl/{model_name.lower()}-subspace/+/api/predict/"
25
+ )
26
+ r = requests.post(
27
+ url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]}
28
+ )
29
+ generated_text = r.json()["data"][0]
30
  return generated_text
31
+
32
+
33
  st.set_page_config(page_icon=":laptop:", layout="wide")
34
 
35
  st.sidebar.header("Models")
36
  models = ["CodeParrot", "InCoder"]
37
+ selected_models = st.sidebar.multiselect(
38
+ "Select code generation models to compare", models, default=["CodeParrot"]
39
+ )
40
 
41
  st.sidebar.header("Tasks")
42
+ tasks = [
43
+ " ",
44
+ "Pretraining datasets",
45
+ "Model architecture",
46
+ "Model evaluation",
47
+ "Code generation",
48
+ ]
49
  selected_task = st.sidebar.selectbox("Select a task", tasks)
50
 
51
 
 
54
  with open("utils/intro.txt", "r") as f:
55
  intro = f.read()
56
  st.markdown(intro)
57
+
58
  elif selected_task == "Pretraining datasets":
59
  st.title("Pretraining datasets πŸ“š")
60
+ st.markdown(
61
+ f"Preview of some code files from Github repositories in [Github-code dataset]({GITHUB_CODE}):"
62
+ )
63
  df = pd.read_csv("utils/data_preview.csv")
64
  st.dataframe(df)
65
  for model in selected_models:
66
  with open(f"datasets/{model.lower()}.txt", "r") as f:
67
  text = f.read()
68
  st.markdown(f"### {model}")
69
+ st.markdown(text)
70
+
71
  elif selected_task == "Model architecture":
72
  st.title("Model architecture")
73
  for model in selected_models:
74
  with open(f"architectures/{model.lower()}.txt", "r") as f:
75
  text = f.read()
76
  st.markdown(f"## {model}")
77
+ st.markdown(text)
78
  if model == "InCoder":
79
  st.image(INCODER_IMG, caption="Figure 1: InCoder training", width=700)
80
 
 
83
  with open("evaluation/intro.txt", "r") as f:
84
  intro = f.read()
85
  st.markdown(intro)
86
+
87
  elif selected_task == "Code generation":
88
  st.title("Code generation πŸ’»")
89
  st.sidebar.header("Examples")
90
  examples = load_examples()
91
  example_names = [example["name"] for example in examples]
92
  name2id = dict([(name, i) for i, name in enumerate(example_names)])
93
+ selected_example = st.sidebar.selectbox(
94
+ "Select one of the following examples or implement yours", example_names
95
+ )
96
  example_text = examples[name2id[selected_example]]["value"]
97
  default_length = examples[name2id[selected_example]]["length"]
98
  st.sidebar.header("Generation settings")
99
+ temperature = st.sidebar.slider(
100
+ "Temperature:", value=0.2, min_value=0.0, step=0.1, max_value=2.0
101
+ )
102
+ max_new_tokens = st.sidebar.slider(
103
+ "Number of tokens to generate:",
104
+ value=default_length,
105
+ min_value=8,
106
+ step=8,
107
+ max_value=256,
108
+ )
109
+ seed = st.sidebar.slider(
110
+ "Random seed:", value=42, min_value=0, step=1, max_value=1000
111
+ )
112
+ gen_prompt = st.text_area(
113
+ "Generate code with prompt:",
114
+ value=example_text,
115
+ height=220,
116
+ ).strip()
117
  if st.button("Generate code!"):
118
  with st.spinner("Generating code..."):
119
  # Create a multiprocessing Pool
120
+ pool = Pool()
121
+ generate_parallel = partial(
122
+ generate_code,
123
+ gen_prompt=gen_prompt,
124
+ max_new_tokens=max_new_tokens,
125
+ temperature=temperature,
126
+ seed=seed,
127
+ )
128
  output = pool.map(generate_parallel, selected_models)
129
  for i in range(len(output)):
130
  st.markdown(f"**{selected_models[i]}**")
131
+ st.code(output[i])