import os
import time
import gradio as gr
from langchain.llms import HuggingFaceHub

llama_repo = os.getenv('HF_MODEL_LLAMA_REPO')
starchat_repo = os.getenv('HF_MODEL_STARCHAT_REPO')
bloom_repo = os.getenv('HF_MODEL_BLOOM_REPO')

llamma_template = """<s>[INST]<<SYS>>I want you to act as document language translator. You do translation {source} texts in document into then you return to me the translated document AND DO NOTHING ELSE.<</SYS>>[/INST]
[INST]Begin of the document:
{query}
End of the document.[/INST]
{target} translated document:
"""

starchat_template = """<|system|>I want you to act as document language translator. You do translation {source} texts in document into then you return to me the translated document AND DO NOTHING ELSE.<</SYS>>
Begin of the document:
{query}
End of the document<|end|>
<|assistant|>
{target} translated document:
"""

bloom_template = """Text translation.
{source} text:
<s>{query}</s>
{target} translated text:
<s>"""

model_kwargs={
            "max_new_tokens":2048,
            "temperature": 0.01,
            "truncate": 4096,
            "seed" : 42,
            "stop" : ["</s>","<|endoftext|>","<|end|>"],
            }

bloom_model_kwargs={
            "max_new_tokens":1000,
            "temperature": 0.01,
#            "truncate": 1512,
            "seed" : 42,
            "stop" : ["</s>","<|endoftext|>","<|end|>"],
            }

llm1 = HuggingFaceHub(repo_id=llama_repo, task="text-generation", model_kwargs=model_kwargs)
llm2 = HuggingFaceHub(repo_id=starchat_repo, task="text-generation", model_kwargs=model_kwargs)
llm3 = HuggingFaceHub(repo_id=bloom_repo, task="text-generation", model_kwargs=bloom_model_kwargs)

def split_text_into_chunks(text, chunk_size=1000):
    lines = text.split('\n')
    chunks = []
    chunk = ""
    for line in lines:
        # If adding the current line doesn't exceed the chunk size, add the line to the chunk
        if len(chunk) + len(line) <= chunk_size:
            chunk += line + "<newline>"
        else:
            # If adding the line exceeds chunk size, store the current chunk and start a new one
            chunks.append(chunk)
            chunk = line + "<newline>"
    # Don't forget the last chunk
    chunks.append(chunk)
    return chunks
    
def translation(source, target, text):
    response = ""
    chunks = split_text_into_chunks(text)
    for chunk in chunks:
        try:
            input_prompt = bloom_template.replace("{source}", source)
            input_prompt = input_prompt.replace("{target}", target)
            input_prompt = input_prompt.replace("{query}", chunk)
            stchunk = llm3(input_prompt)
            for eot in bloom_model_kwargs['stop']:
                stchunk = stchunk.replace(eot,"")
            response += stchunk
        except Exception as e:
            print(f"ERROR: LLM show {e}")
        time.sleep(5)
    if response == "": response = text
    return response.replace("<newline>","\n").strip()

gr.Interface(translation, inputs=["text","text","text"], outputs="text").launch()