|
|
|
|
|
print('Load libs') |
|
import http.server |
|
import socketserver |
|
import threading |
|
import time |
|
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, T5ForConditionalGeneration, T5Tokenizer |
|
from collections import defaultdict |
|
from bs4 import BeautifulSoup |
|
from threading import Thread |
|
import requests as rq |
|
import random |
|
import torch |
|
import json |
|
import time |
|
import os |
|
import re |
|
|
|
import difflib |
|
import logging |
|
logging.getLogger('http.server').setLevel(logging.ERROR) |
|
|
|
|
|
|
|
os.system('cls||clear') |
|
|
|
|
|
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
|
|
|
def load_model(model_name_or_path): |
|
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) |
|
model = AutoModelForCausalLM.from_pretrained(model_name_or_path) |
|
|
|
total_params = sum(p.numel() for p in model.parameters()) |
|
wpe_weights = model.get_input_embeddings().weight |
|
wpe_weights_np = wpe_weights.detach().numpy() |
|
mt_size = list(wpe_weights.shape)[0] |
|
model = model.to(DEVICE) |
|
params = {'model': model_name_or_path, 'size': f'{int(total_params / 10**7) / 100}B', 'text': mt_size, 'device': DEVICE} |
|
return model, tokenizer, params |
|
|
|
|
|
def model_log(params): |
|
model_name = f'Model: {params["model"]}' |
|
param_size = f'Size model: {params["size"]}' |
|
max_tokens = f'Maximum Tokens: {params["text"]}' |
|
device_info = f'Device: {params["device"]}' |
|
max_length = max([len(model_name), len(param_size), len(max_tokens), len(device_info)]) |
|
padding = ''.zfill(max_length + 4).replace('0', '#') |
|
model_name = f'# {model_name}{"".zfill(max_length - len(model_name)).replace("0", " ")} #' |
|
param_size = f'# {param_size}{"".zfill(max_length - len(param_size)).replace("0", " ")} #' |
|
max_tokens = f'# {max_tokens}{"".zfill(max_length - len(max_tokens)).replace("0", " ")} #' |
|
device_info = f'# {device_info}{"".zfill(max_length - len(device_info)).replace("0", " ")} #' |
|
return f'{padding}\n{model_name}\n{param_size}\n{max_tokens}\n{device_info}\n{padding}' |
|
|
|
def encode_ids(text, tokenizer): |
|
return tokenizer.encode(text, return_tensors="pt").to(DEVICE) |
|
|
|
def generate_step_by_step(config, model, tokenizer,file): |
|
if True: |
|
text_input = config['text'] |
|
input_ids = encode_ids(text_input, tokenizer) |
|
target = config['maxsize'] - len(input_ids[0]) |
|
current_length = len(input_ids[0]) |
|
for i in range(target): |
|
output = model.generate(input_ids, |
|
do_sample=config['do_sample'], |
|
temperature=config['temperature'], |
|
top_k=config['top_k'], |
|
top_p=config['top_p'], |
|
max_length=current_length + 6, |
|
pad_token_id=tokenizer.eos_token_id, |
|
num_return_sequences=config['num_return_sequences'] |
|
) |
|
current_length += 4 |
|
text_output = tokenizer.decode(output[0][:current_length]) |
|
generated_text = text_output[len(config['text']):] |
|
''' |
|
if generated_text.count('import ') > 2: |
|
generated_text+='\nИзвените но бот не умеет писать код\n\n' |
|
if '```python' in generated_text: |
|
generated_text+='\nИзвените но бот не умеет писать код\n\n' |
|
''' |
|
if len(generated_text.split('\n')) != 1: |
|
return generated_text.split('\n')[0] |
|
else: |
|
''' |
|
cleaned_text, found_repeats = remove_repeated_phrases(generated_text) |
|
if found_repeats: |
|
return cleaned_text |
|
else: |
|
''' |
|
file2=open(file,'w',encoding='utf-8') |
|
file2.write(text_input+generated_text) |
|
file2.close() |
|
|
|
os.system('cls||clear') |
|
print(str(text_input+generated_text).replace('Me: ','User: ')) |
|
|
|
input_ids = encode_ids(text_output, tokenizer) |
|
try: |
|
return generated_text |
|
except: |
|
return '' |
|
|
|
def botAw(text, model, tokenizer, params,file): |
|
config = { |
|
'text': text, |
|
'do_sample': True, |
|
'temperature': 0.5, |
|
'top_k': 20, |
|
'top_p': 0.9, |
|
'maxsize': params["text"], |
|
'num_return_sequences': 1, |
|
} |
|
generated_text = generate_step_by_step(config, model, tokenizer,file) |
|
return generated_text |
|
|
|
print('Load GPT') |
|
gpt_model_name = 'ifmain/StableGPT4-Micro-1.6B' |
|
gpt_model, gpt_tokenizer, gpt_params = load_model(gpt_model_name) |
|
print(model_log(gpt_params)) |
|
print() |
|
|
|
|
|
file='file.txt' |
|
|
|
try: |
|
f=open(file,'r',encoding='UTF-8') |
|
text=f.read() |
|
f.close() |
|
except: |
|
f=open(file,'w',encoding='UTF-8') |
|
f.close() |
|
|
|
|
|
def var2(): |
|
while True: |
|
input('Press Enter to process') |
|
f=open(file,'r',encoding='UTF-8') |
|
text=f.read() |
|
f.close() |
|
|
|
o=botAw(text, gpt_model, gpt_tokenizer, gpt_params,file) |
|
|
|
f=open(file,'w',encoding='UTF-8') |
|
f.write(text+o) |
|
f.close() |
|
|
|
var2() |