File size: 4,851 Bytes
c9e8e4a 3bce3fb a16fa71 aa07439 41d27ac c9e8e4a 68bc50c cddb272 fa5e188 c9e8e4a f4313df c9e8e4a 7c0d726 aa07439 7c0d726 aa07439 7c0d726 aa07439 7c0d726 2dc5a7a 5482687 807f36d c5fafcd 0d5adbc cc3091d 0d5adbc 7212da7 50f4554 4bd868a aa07439 7d968ad a5b4c8d aa07439 9d2b32b 0b16412 4bd868a 0d5adbc 7036561 816c983 12798fb 29136c5 46dbbb1 0b16412 0d5adbc a7dffcb 0b16412 8d58283 0d5adbc 7036561 33147c8 29136c5 606a970 29136c5 68bc50c 99db140 596c6fa 606a970 12798fb 33147c8 12798fb 606a970 12798fb cc14b64 12798fb 06d2b63 33147c8 06d2b63 aa07439 06d2b63 aa07439 06d2b63 aa07439 cc3091d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import json
import pandas as pd
import requests
import threading
import streamlit as st
MODELS = ["CodeParrot", "InCoder", "CodeGen", "PolyCoder"]
GENERATION_MODELS = ["CodeParrot", "InCoder", "CodeGen"]
@st.cache()
def load_examples():
with open("utils/examples.json", "r") as f:
examples = json.load(f)
return examples
def read_markdown(path):
with open(path, "r") as f:
output = f.read()
st.markdown(output, unsafe_allow_html=True)
def generate_code(generations, model_name, gen_prompt, max_new_tokens, temperature, seed):
# call space using its API endpoint
url = (
f"https://hf.space/embed/loubnabnl/{model_name.lower()}-subspace/+/api/predict/"
)
r = requests.post(
url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]}
)
generated_text = r.json()["data"][0]
generations.append(generated_text)
def generate_code_threads(generations, models, gen_prompt, max_new_tokens, temperature, seed):
threads = []
for model_name in models:
# create the thread
threads.append(
threading.Thread(target=generate_code, args=(generations, model_name, gen_prompt, max_new_tokens, temperature, seed))
)
threads[-1].start()
for t in threads:
t.join()
st.set_page_config(page_icon=":laptop:", layout="wide")
with open("utils/table_contents.txt", "r") as f:
contents = f.read()
st.sidebar.markdown(contents)
# Introduction
st.title("Code generation with 🤗")
read_markdown("utils/intro.txt")
# Pretraining datasets
st.subheader("1 - Code datasets")
read_markdown("datasets/intro.txt")
read_markdown("datasets/github_code.txt")
#GITHUB_CODE = "https://huggingface.co/datasets/lvwerra/github-code"
#st.markdown(f"Preview of some code files from Github repositories in [Github-code dataset]({GITHUB_CODE}):")
#df = pd.read_csv("utils/data_preview.csv")
#st.dataframe(df)
col1, col2= st.columns([1,2])
with col1:
selected_model = st.selectbox("", MODELS, key=1)
read_markdown(f"datasets/{selected_model.lower()}.txt")
# Model architecture
st.subheader("2 - Model architecture")
read_markdown("architectures/intro.txt")
col1, col2= st.columns([1,2])
with col1:
selected_model = st.selectbox("", MODELS, key=2)
read_markdown(f"architectures/{selected_model.lower()}.txt")
# Model evaluation
st.subheader("3 - Code models evaluation")
read_markdown("evaluation/intro.txt")
read_markdown("evaluation/demo_humaneval.txt")
# Code generation
st.subheader("4 - Code generation ✨")
col1, col2, col3 = st.columns([7,1,6])
with col1:
st.markdown("**Models**")
selected_models = st.multiselect(
"Select code generation models to compare:", GENERATION_MODELS, default=["CodeParrot"], key=3
)
st.markdown(" ")
st.markdown("**Examples**")
examples = load_examples()
example_names = [example["name"] for example in examples]
name2id = dict([(name, i) for i, name in enumerate(example_names)])
selected_example = st.selectbox(
"Select one of the following examples or implement yours:", example_names
)
example_text = examples[name2id[selected_example]]["value"]
default_length = examples[name2id[selected_example]]["length"]
with col3:
st.markdown("**Generation settings**")
temperature = st.slider(
"Temperature:", value=0.2, min_value=0.0, step=0.1, max_value=2.0
)
max_new_tokens = st.slider(
"Number of tokens to generate:",
value=default_length,
min_value=8,
step=4,
max_value=256,
)
seed = st.slider(
"Random seed:", value=42, min_value=0, step=1, max_value=1000
)
gen_prompt = st.text_area(
"Generate code with prompt:",
value=example_text,
height=200,
).strip()
if st.button("Generate code!"):
with st.spinner("Generating code..."):
# use threading
generations = []
generate_code_threads(
generations,
selected_models,
gen_prompt=gen_prompt,
max_new_tokens=max_new_tokens,
temperature=temperature,
seed=seed,
)
for i in range(len(generations)):
print(generations[i])
for i in range(len(generations)):
st.markdown(f"**{selected_models[i]}**")
st.code(generations[i])
# Resources
st.subheader("Resources")
read_markdown("utils/resources.txt") |