use multithreading instead of multiprocessing
Browse files
app.py
CHANGED
@@ -1,12 +1,10 @@
|
|
1 |
import json
|
2 |
import pandas as pd
|
3 |
import requests
|
4 |
-
|
5 |
-
from functools import partial
|
6 |
import streamlit as st
|
7 |
|
8 |
|
9 |
-
GITHUB_CODE = "https://huggingface.co/datasets/lvwerra/github-code"
|
10 |
MODELS = ["CodeParrot", "InCoder", "CodeGen", "PolyCoder"]
|
11 |
GENERATION_MODELS = ["CodeParrot", "InCoder", "CodeGen"]
|
12 |
|
@@ -17,7 +15,14 @@ def load_examples():
|
|
17 |
return examples
|
18 |
|
19 |
|
20 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
url = (
|
22 |
f"https://hf.space/embed/loubnabnl/{model_name.lower()}-subspace/+/api/predict/"
|
23 |
)
|
@@ -25,12 +30,21 @@ def generate_code(model_name, gen_prompt, max_new_tokens, temperature, seed):
|
|
25 |
url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]}
|
26 |
)
|
27 |
generated_text = r.json()["data"][0]
|
28 |
-
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
st.set_page_config(page_icon=":laptop:", layout="wide")
|
36 |
with open("utils/table_contents.txt", "r") as f:
|
@@ -45,9 +59,11 @@ read_markdown("utils/intro.txt")
|
|
45 |
st.subheader("1 - Code datasets")
|
46 |
read_markdown("datasets/intro.txt")
|
47 |
read_markdown("datasets/github_code.txt")
|
|
|
48 |
#st.markdown(f"Preview of some code files from Github repositories in [Github-code dataset]({GITHUB_CODE}):")
|
49 |
#df = pd.read_csv("utils/data_preview.csv")
|
50 |
#st.dataframe(df)
|
|
|
51 |
col1, col2= st.columns([1,2])
|
52 |
with col1:
|
53 |
selected_model = st.selectbox("", MODELS, key=1)
|
@@ -107,19 +123,21 @@ gen_prompt = st.text_area(
|
|
107 |
).strip()
|
108 |
if st.button("Generate code!"):
|
109 |
with st.spinner("Generating code..."):
|
110 |
-
#
|
111 |
-
|
112 |
-
|
113 |
-
|
|
|
114 |
gen_prompt=gen_prompt,
|
115 |
max_new_tokens=max_new_tokens,
|
116 |
temperature=temperature,
|
117 |
seed=seed,
|
118 |
)
|
119 |
-
|
120 |
-
|
|
|
121 |
st.markdown(f"**{selected_models[i]}**")
|
122 |
-
st.code(
|
123 |
|
124 |
# Resources
|
125 |
st.subheader("Resources")
|
|
|
1 |
import json
|
2 |
import pandas as pd
|
3 |
import requests
|
4 |
+
import threading
|
|
|
5 |
import streamlit as st
|
6 |
|
7 |
|
|
|
8 |
MODELS = ["CodeParrot", "InCoder", "CodeGen", "PolyCoder"]
|
9 |
GENERATION_MODELS = ["CodeParrot", "InCoder", "CodeGen"]
|
10 |
|
|
|
15 |
return examples
|
16 |
|
17 |
|
18 |
+
def read_markdown(path):
|
19 |
+
with open(path, "r") as f:
|
20 |
+
output = f.read()
|
21 |
+
st.markdown(output, unsafe_allow_html=True)
|
22 |
+
|
23 |
+
|
24 |
+
def generate_code(generations, model_name, gen_prompt, max_new_tokens, temperature, seed):
|
25 |
+
# call space using its API endpoint
|
26 |
url = (
|
27 |
f"https://hf.space/embed/loubnabnl/{model_name.lower()}-subspace/+/api/predict/"
|
28 |
)
|
|
|
30 |
url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]}
|
31 |
)
|
32 |
generated_text = r.json()["data"][0]
|
33 |
+
generations.append(generated_text)
|
34 |
|
35 |
+
|
36 |
+
def generate_code_threads(generations, models, gen_prompt, max_new_tokens, temperature, seed):
|
37 |
+
threads = []
|
38 |
+
for model_name in models:
|
39 |
+
# create the thread
|
40 |
+
threads.append(
|
41 |
+
threading.Thread(target=generate_code, args=(generations, model_name, gen_prompt, max_new_tokens, temperature, seed))
|
42 |
+
)
|
43 |
+
threads[-1].start()
|
44 |
+
|
45 |
+
for t in threads:
|
46 |
+
t.join()
|
47 |
+
|
48 |
|
49 |
st.set_page_config(page_icon=":laptop:", layout="wide")
|
50 |
with open("utils/table_contents.txt", "r") as f:
|
|
|
59 |
st.subheader("1 - Code datasets")
|
60 |
read_markdown("datasets/intro.txt")
|
61 |
read_markdown("datasets/github_code.txt")
|
62 |
+
#GITHUB_CODE = "https://huggingface.co/datasets/lvwerra/github-code"
|
63 |
#st.markdown(f"Preview of some code files from Github repositories in [Github-code dataset]({GITHUB_CODE}):")
|
64 |
#df = pd.read_csv("utils/data_preview.csv")
|
65 |
#st.dataframe(df)
|
66 |
+
|
67 |
col1, col2= st.columns([1,2])
|
68 |
with col1:
|
69 |
selected_model = st.selectbox("", MODELS, key=1)
|
|
|
123 |
).strip()
|
124 |
if st.button("Generate code!"):
|
125 |
with st.spinner("Generating code..."):
|
126 |
+
# use threading
|
127 |
+
generations = []
|
128 |
+
generate_code_threads(
|
129 |
+
generations,
|
130 |
+
selected_models,
|
131 |
gen_prompt=gen_prompt,
|
132 |
max_new_tokens=max_new_tokens,
|
133 |
temperature=temperature,
|
134 |
seed=seed,
|
135 |
)
|
136 |
+
for i in range(len(generations)):
|
137 |
+
print(generations[i])
|
138 |
+
for i in range(len(generations)):
|
139 |
st.markdown(f"**{selected_models[i]}**")
|
140 |
+
st.code(generations[i])
|
141 |
|
142 |
# Resources
|
143 |
st.subheader("Resources")
|