futranbg commited on
Commit
e5db612
·
1 Parent(s): ff26b87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -46
app.py CHANGED
@@ -1,26 +1,9 @@
1
  import os
2
  import time
3
  import gradio as gr
4
- from langchain.llms import HuggingFaceHub
5
 
6
- llama_repo = os.getenv('HF_MODEL_LLAMA_REPO')
7
- starchat_repo = os.getenv('HF_MODEL_STARCHAT_REPO')
8
- bloom_repo = os.getenv('HF_MODEL_BLOOM_REPO')
9
-
10
- llamma_template = """<s>[INST]<<SYS>>I want you to act as document language translator. You do translation {source} texts in document into then you return to me the translated document AND DO NOTHING ELSE.<</SYS>>[/INST]
11
- [INST]Begin of the document:
12
- {query}
13
- End of the document.[/INST]
14
- {target} translated document:
15
- """
16
-
17
- starchat_template = """<|system|>I want you to act as document language translator. You do translation {source} texts in document into then you return to me the translated document AND DO NOTHING ELSE.<</SYS>>
18
- Begin of the document:
19
- {query}
20
- End of the document<|end|>
21
- <|assistant|>
22
- {target} translated document:
23
- """
24
 
25
  bloom_template = """Text translation.
26
  {source} text:
@@ -28,25 +11,17 @@ bloom_template = """Text translation.
28
  {target} translated text:
29
  <s>"""
30
 
31
- model_kwargs={
32
- "max_new_tokens":2048,
33
- "temperature": 0.01,
34
- "truncate": 4096,
35
- "seed" : 42,
36
- "stop" : ["</s>","<|endoftext|>","<|end|>"],
37
- }
38
-
39
- bloom_model_kwargs={
40
- "max_new_tokens":1000,
41
- "temperature": 0.01,
42
- # "truncate": 1512,
43
- "seed" : 42,
44
- "stop" : ["</s>","<|endoftext|>","<|end|>"],
45
- }
46
 
47
- llm1 = HuggingFaceHub(repo_id=llama_repo, task="text-generation", model_kwargs=model_kwargs)
48
- llm2 = HuggingFaceHub(repo_id=starchat_repo, task="text-generation", model_kwargs=model_kwargs)
49
- llm3 = HuggingFaceHub(repo_id=bloom_repo, task="text-generation", model_kwargs=bloom_model_kwargs)
50
 
51
  def split_text_into_chunks(text, chunk_size=1000):
52
  lines = text.split('\n')
@@ -65,21 +40,28 @@ def split_text_into_chunks(text, chunk_size=1000):
65
  return chunks
66
 
67
  def translation(source, target, text):
68
- response = ""
 
69
  chunks = split_text_into_chunks(text)
70
  for chunk in chunks:
71
  try:
72
  input_prompt = bloom_template.replace("{source}", source)
73
  input_prompt = input_prompt.replace("{target}", target)
74
  input_prompt = input_prompt.replace("{query}", chunk)
75
- stchunk = llm3(input_prompt)
76
- for eot in bloom_model_kwargs['stop']:
77
- stchunk = stchunk.replace(eot,"")
78
- response += stchunk
 
 
 
 
 
79
  except Exception as e:
80
  print(f"ERROR: LLM show {e}")
81
- time.sleep(5)
82
- if response == "": response = text
83
- return response.replace("<newline>","\n").strip()
 
84
 
85
- gr.Interface(translation, inputs=["text","text","text"], outputs="text").launch()
 
1
  import os
2
  import time
3
  import gradio as gr
4
+ from huggingface_hub import InferenceClient
5
 
6
+ bloom_repo = "bigscience/bloom"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  bloom_template = """Text translation.
9
  {source} text:
 
11
  {target} translated text:
12
  <s>"""
13
 
14
+ bloom_model_kwargs=dict(
15
+ max_new_tokens=1000,
16
+ temperature=0.3,
17
+ # truncate=1512,
18
+ seed=42,
19
+ stop_sequences=["</s>","<|endoftext|>","<|end|>"],
20
+ top_p=0.95,
21
+ repetition_penalty=1.1,
22
+ )
 
 
 
 
 
 
23
 
24
+ client = InferenceClient(model=bloom_repo, token=os.environ.get("HUGGINGFACEHUB_API_TOKEN", None))
 
 
25
 
26
  def split_text_into_chunks(text, chunk_size=1000):
27
  lines = text.split('\n')
 
40
  return chunks
41
 
42
  def translation(source, target, text):
43
+ output = ""
44
+ result = ""
45
  chunks = split_text_into_chunks(text)
46
  for chunk in chunks:
47
  try:
48
  input_prompt = bloom_template.replace("{source}", source)
49
  input_prompt = input_prompt.replace("{target}", target)
50
  input_prompt = input_prompt.replace("{query}", chunk)
51
+ stream = client.text_generation(input_prompt, stream=True, details=True, return_full_text=False, **bloom_model_kwargs)
52
+ for response in stream:
53
+ output += response.token.text
54
+ for stop_str in bloom_model_kwargs['stop_sequences']:
55
+ if output.endswith(stop_str):
56
+ output = output[:-len(stop_str)]
57
+ yield output.replace("<newline>","\n")
58
+ #yield output.replace("<newline>","\n")
59
+ result += output
60
  except Exception as e:
61
  print(f"ERROR: LLM show {e}")
62
+ time.sleep(1)
63
+ #yield result.replace("<newline>","\n").strip()
64
+ if result == "": result = text
65
+ return result.replace("<newline>","\n").strip()
66
 
67
+ gr.Interface(translation, inputs=["text","text","text"], outputs="text").queue(concurrency_count=100).launch()