File size: 4,282 Bytes
c9e8e4a 3bce3fb c9e8e4a 1e8c169 dc737c0 7982bc6 c9e8e4a 7982bc6 c9e8e4a 7982bc6 7cf1a13 fc158ea c9e8e4a 7cf1a13 1c7be52 7cf1a13 c3214e3 c9e8e4a 9684d46 7cf1a13 9684d46 c3214e3 a5d2ff2 7cf1a13 234ad28 33c3beb 3bce3fb 7982bc6 f25abd8 3bce3fb c9e8e4a 3bce3fb 7cf1a13 f25abd8 5b9eb09 f25abd8 7cf1a13 c3214e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
from transformers import pipeline
import torch
import json
import pandas as pd
@st.cache(allow_output_mutation=True)
def load_tokenizer(model_ckpt):
return AutoTokenizer.from_pretrained(model_ckpt)
@st.cache(allow_output_mutation=True)
def load_model(model_ckpt):
model = AutoModelForCausalLM.from_pretrained(model_ckpt, low_cpu_mem_usage=True)
return model
@st.cache()
def load_examples():
with open("examples.json", "r") as f:
examples = json.load(f)
return examples
st.set_page_config(page_icon=':laptop:', layout="wide")
st.sidebar.header("Models")
models = ["CodeParrot", "OPT", "InCoder"]
selected_models = st.sidebar.multiselect('Select code generation models to compare:',
models,
default=["CodeParrot"])
st.sidebar.header("Tasks")
tasks = [" ","Model architecture", "Model evaluation", "Pretraining dataset", "Code generation"]
selected_task = st.sidebar.selectbox("Select a task:", tasks)
tokenizer1 = load_tokenizer("lvwerra/codeparrot")
model1 = load_model("lvwerra/codeparrot")
tokenizer2 = load_tokenizer("facebook/incoder-1B")
model2 = load_model("facebook/incoder-1B")
#tokenizer3 = load_tokenizer("facebook/opt-1.3b")
#model3 = load_model("facebook/opt-1.3b")
pipelines = {}
for element in models:
if element == "CodeParrot":
pipelines[element] = pipeline("text-generation", model=model1, tokenizer=tokenizer1)
elif element == "InCoder":
tokenizer = load_tokenizer("facebook/incoder-1B")
model = load_model("facebook/incoder-1B")
pipelines[element] = pipeline("text-generation", model=model2, tokenizer=tokenizer2)
#else:
# tokenizer = load_tokenizer("facebook/opt-1.3b")
# model = load_model("facebook/opt-1.3b")
# pipelines[element] = pipeline("text-generation", model=model3, tokenizer=tokenizer3)
examples = load_examples()
example_names = [example["name"] for example in examples]
name2id = dict([(name, i) for i, name in enumerate(example_names)])
set_seed(42)
gen_kwargs = {}
if selected_task == " ":
st.title("Code Generation Models comparison")
with open("intro.txt", "r") as f:
intro = f.read()
st.markdown(intro)
elif selected_task == "Pretraining dataset":
st.title("Pretraining datasets π")
st.Markdown("Preview of some code files from Github repositories")
df = pd.read_csv("preview-github-data.csv")
st.dataframe(df)
for model in selected_models:
with open(f"datasets/{model.lower()}.txt", "r") as f:
text = f.read()
st.markdown(f"### {model}:")
st.markdown(text)
elif selected_task == "Model architecture":
st.title("Model architecture π¨")
for model in selected_models:
with open(f"architectures/{model.lower()}.txt", "r") as f:
text = f.read()
st.markdown(f"## {model}:")
st.markdown(text)
elif selected_task == "Code generation":
st.title("Code generation π»")
st.sidebar.header("Examples")
selected_example = st.sidebar.selectbox("Select one of the following examples:", example_names)
example_text = examples[name2id[selected_example]]["value"]
default_length = examples[name2id[selected_example]]["length"]
st.sidebar.header("Generation settings")
gen_kwargs["do_sample"] = st.sidebar.radio("Decoding strategy:", ["Greedy", "Sample"]) == "Sample"
gen_kwargs["max_new_tokens"] = st.sidebar.slider("Number of tokens to generate:", value=default_length, min_value=8, step=8, max_value=256)
if gen_kwargs["do_sample"]:
gen_kwargs["temperature"] = 0.2
gen_kwargs["top_k"] = 0
gen_kwargs["top_p"] = 0.95
gen_prompt = st.text_area("Generate code with prompt:", value=example_text, height=220,).strip()
if st.button("Generate code!"):
with st.spinner("Generating code..."):
for model in selected_models:
if model != "OPT":
pipe = pipelines[model]
generated_text = pipe(gen_prompt, **gen_kwargs)[0]['generated_text']
st.markdown(f"{model}:")
st.code(generated_text)
|