import gradio as gr import torch from typing import Dict, List, Any from transformers import AutoTokenizer, CodeGenForCausalLM, set_seed import argparse import json import os from pathlib import Path import random from time import time import torch # check for GPU device = 0 if torch.cuda.is_available() else "cpu" seed = 16 max_length=2048 top_p=0.95 num_return_sequences=1 pad_token_id=50256 prefix = "# Import libraries.\n\nimport numpy as np\n" os.environ["TOKENIZERS_PARALLELISM"] = "false" random.seed(seed) os.environ["PYTHONHASHSEED"] = str(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = True def truncate(completion): import re def find_re(string, pattern, start_pos): m = pattern.search(string, start_pos) return m.start() if m else -1 terminals = [re.compile(r, re.MULTILINE) for r in ['^#', re.escape('<|endoftext|>'), "^'''", '^"""', '\n\n\n']] prints = list(re.finditer('^print', completion, re.MULTILINE)) if len(prints) > 1: completion = completion[:prints[1].start()] defs = list(re.finditer('^def', completion, re.MULTILINE)) if len(defs) > 1: completion = completion[:defs[1].start()] start_pos = 0 terminals_pos = [pos for pos in [find_re(completion, terminal, start_pos) for terminal in terminals] if pos != -1] if len(terminals_pos) > 0: return completion[:min(terminals_pos)] else: return completion tokenizer = AutoTokenizer.from_pretrained("giulio98/codegen-2B-mono-xlcost") if torch.cuda.is_available(): model = CodeGenForCausalLM.from_pretrained("giulio98/codegen-2B-mono-xlcost", torch_dtype=torch.float16) else: model = CodeGenForCausalLM.from_pretrained("giulio98/codegen-2B-mono-xlcost") # create inference pipeline tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_size = "left" model.to(device) title = "Code Generator " def sample_multi(prompt, max_gen_length, temp, force_word): force_words = [] if force_word != '': force_words.append(tokenizer([force_word], add_special_tokens=False).input_ids) force_words = [item for sublist in force_words for item in sublist] input_ids = tokenizer( prompt, truncation=True, padding=True, return_tensors="pt", ).input_ids input_ids_len = input_ids.shape[1] assert input_ids_len < max_length with torch.no_grad(): input_ids = input_ids.to(device) if len(force_words) != 0: tokens = model.generate( input_ids, do_sample=False, num_return_sequences=num_return_sequences, temperature=temp, max_length=input_ids_len + max_gen_length, top_p=top_p, pad_token_id=pad_token_id, use_cache=True, num_beams=5, force_words_ids=force_words ) else: tokens = model.generate( input_ids, do_sample=True, num_return_sequences=num_return_sequences, temperature=temp, max_length=input_ids_len + max_gen_length, top_p=top_p, pad_token_id=pad_token_id, use_cache=True, ) completion_txt = tokenizer.batch_decode(tokens[:, input_ids_len:, ...], skip_special_tokens=True) return completion_txt[0] def sample_completion(prompt, function, max_gen_length, temp, force_word, print_=False): print("prompt is: ", prompt) print("function is: ", function) prompt = prompt + "\n" original_text = prompt prompt = prefix + prompt print("prompt after is: ", prompt) bad_words = [] force_words = [] bad_words.append(tokenizer(['###'], add_special_tokens=False).input_ids) bad_words.append(tokenizer(['\''], add_special_tokens=False).input_ids) bad_words.append(tokenizer(['\'\''], add_special_tokens=False).input_ids) bad_words.append(tokenizer(['\'\'\''], add_special_tokens=False).input_ids) if force_word != '': force_words.append(tokenizer([force_word], add_special_tokens=False).input_ids) if function: pass #force_words.append(tokenizer(['def'], add_special_tokens=False).input_ids) else: bad_words.append(tokenizer(['def'], add_special_tokens=False).input_ids) print("last prompt: ", prompt.split("#")[-1].lower()) if "print" in prompt.split("#")[-1].lower(): force_words.append(tokenizer(['print'], add_special_tokens=False).input_ids) print_ = True force_words = [item for sublist in force_words for item in sublist] bad_words = [item for sublist in bad_words for item in sublist] input_ids = tokenizer( prompt, truncation=True, padding=True, return_tensors="pt", ).input_ids print("force words", force_words) print("bad words", bad_words) input_ids_len = input_ids.shape[1] assert input_ids_len < max_length with torch.no_grad(): input_ids = input_ids.to(device) if len(force_words) == 0 and len(bad_words)==0: tokens = model.generate( input_ids, do_sample=True, num_return_sequences=num_return_sequences, temperature=temp, max_length=input_ids_len + max_gen_length, top_p=top_p, pad_token_id=pad_token_id, use_cache=True, ) elif len(force_words) == 0 and len(bad_words)!=0: tokens = model.generate( input_ids, do_sample=True, num_return_sequences=num_return_sequences, temperature=temp, max_length=input_ids_len + max_gen_length, top_p=top_p, pad_token_id=pad_token_id, use_cache=True, bad_words_ids= bad_words ) elif len(force_words)!=0 and len(bad_words) ==0: tokens = model.generate( input_ids, do_sample=False, num_return_sequences=num_return_sequences, temperature=temp, max_length=input_ids_len + max_gen_length, top_p=top_p, pad_token_id=pad_token_id, use_cache=True, num_beams=5, force_words_ids=force_words ) elif len(force_words)!=0 and len(bad_words) !=0: tokens = model.generate( input_ids, do_sample=False, num_return_sequences=num_return_sequences, temperature=temp, max_length=input_ids_len + max_gen_length, top_p=top_p, pad_token_id=pad_token_id, use_cache=True, num_beams=5, force_words_ids=force_words, bad_words_ids= bad_words ) completion_txt = tokenizer.batch_decode(tokens[:, input_ids_len:, ...]) print("before truncate:", completion_txt[0]) print("after truncate:", truncate(completion_txt[0])) return truncate(completion_txt[0]), original_text def complete_with_gpt(text, function, tokens_auto, temp_auto, force_word): # Use the last 50 characters of the text as context text = text.lstrip().rstrip() completion, original_text = sample_completion(text[-1024:], function, tokens_auto, temp_auto, force_word) return original_text + completion def make_prompt(gen_prompt): return "\"\"\"\n" + gen_prompt + "\n\"\"\"\n###\n" def complete_multi(text, tokens_multi, temp_multi, force_word): # Use the last 50 characters of the text as context text = make_prompt(text.lstrip().rstrip()) completion = sample_multi(text, tokens_multi, temp_multi, force_word) return completion def predict(input, history=[], temp=0.2, max_gen_length=256): # tokenize the new input sentence new_user_input_ids = tokenizer.encode(make_prompt(input), return_tensors='pt') # append the new user input tokens to the chat history bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1) bot_input_ids = bot_input_ids.to(device) # generate a response history = model.generate(bot_input_ids, do_sample=True, num_return_sequences=num_return_sequences, temperature=temp, max_new_tokens=max_gen_length, top_p=top_p, use_cache=True, pad_token_id=tokenizer.eos_token_id).tolist() print(history) # convert the tokens to text, and then split the responses into lines response = tokenizer.decode(history[0]).split("\n\"\"\"\n###\n") print('decoded_response-->>'+str(response)) response = [(response[i].replace("\"\"\"\n", ""), response[i+1].replace('<|endoftext|>', "")) for i in range(0, len(response - 1), 2)] # convert to tuples of list #print('response-->>'+str(response)) return response, history with gr.Blocks() as demo: with gr.Tabs(): with gr.TabItem("Autocomplete"): with gr.Row(): textbox = gr.Textbox(placeholder="Type here...", lines=16) checkbox = gr.Checkbox(label="Function implementation?") tokens_auto = gr.inputs.Slider( minimum=8, maximum=256, step=1, default=128, label="Number of tokens to generate", ) temp_auto = gr.inputs.Slider( minimum=0, maximum=2.5, step=0.1, default=0.2, label="Temperature", ) textbox_force = gr.Textbox(label="Insert force word...") btn_autocomplete = gr.Button("Generate") with gr.TabItem("Multisteps"): with gr.Row(): textbox_input = gr.Textbox(lines=10, label="English instructions") textbox_output = gr.Textbox(label="Predicted Python code", lines=10) tokens_multi = gr.inputs.Slider( minimum=8, maximum=256, step=1, default=128, label="Number of tokens to generate", ) temp_multi = gr.inputs.Slider( minimum=0, maximum=2.5, step=0.1, default=0.2, label="Temperature", ) textbox_force_multi = gr.Textbox(label="Insert force word...") btn_multi = gr.Button("Generate") btn_autocomplete.click(complete_with_gpt, inputs=[textbox, checkbox, tokens_auto, temp_auto, textbox_force], outputs=[textbox]) btn_multi.click(complete_multi, inputs=[textbox_input, tokens_multi, temp_multi, textbox_force_multi], outputs=textbox_output) #btn_chat.click(predict, inputs=[text_chat, state], outputs=[chatbot, state]) demo.launch()