Spaces:
Runtime error
Runtime error
import os | |
import time | |
import gradio as gr | |
from huggingface_hub import InferenceClient | |
bloom_repo = "bigscience/bloom" | |
bloom_template = """Text translation. | |
{source} text: | |
<s>{query}</s> | |
{target} translated text: | |
<s>""" | |
bloom_model_kwargs=dict( | |
max_new_tokens=1000, | |
temperature=0.3, | |
# truncate=1512, | |
seed=42, | |
stop_sequences=["</s>","<|endoftext|>","<|end|>"], | |
top_p=0.95, | |
repetition_penalty=1.1, | |
) | |
client = InferenceClient(model=bloom_repo, token=os.environ.get("HUGGINGFACEHUB_API_TOKEN", None)) | |
def split_text_into_chunks(text, chunk_size=1000): | |
lines = text.split('\n') | |
chunks = [] | |
chunk = "" | |
for line in lines: | |
# If adding the current line doesn't exceed the chunk size, add the line to the chunk | |
if len(chunk) + len(line) <= chunk_size: | |
chunk += line + "<newline>" | |
else: | |
# If adding the line exceeds chunk size, store the current chunk and start a new one | |
chunks.append(chunk) | |
chunk = line + "<newline>" | |
# Don't forget the last chunk | |
chunks.append(chunk) | |
return chunks | |
def translation(source, target, text): | |
output = "" | |
result = "" | |
chunks = split_text_into_chunks(text) | |
for chunk in chunks: | |
try: | |
input_prompt = bloom_template.replace("{source}", source) | |
input_prompt = input_prompt.replace("{target}", target) | |
input_prompt = input_prompt.replace("{query}", chunk) | |
stream = client.text_generation(input_prompt, stream=True, details=True, return_full_text=False, **bloom_model_kwargs) | |
for response in stream: | |
output += response.token.text | |
for stop_str in bloom_model_kwargs['stop_sequences']: | |
if output.endswith(stop_str): | |
output = output[:-len(stop_str)] | |
yield output.replace("<newline>","\n") | |
#yield output.replace("<newline>","\n") | |
result += output | |
except Exception as e: | |
print(f"ERROR: LLM show {e}") | |
time.sleep(1) | |
#yield result.replace("<newline>","\n").strip() | |
if result == "": result = text | |
return result.replace("<newline>","\n").strip() | |
gr.Interface(translation, inputs=["text","text","text"], outputs="text").queue(concurrency_count=100).launch() |