Spaces:

flax-community
/

gpt2-thai

Runtime error

App Files Files Community

sakares commited on Jul 17, 2021

Commit

c3d7797

1 Parent(s): 1450336

init app template with streamlit. credit to GPT2 Indonesian https://huggingface.co/spaces/flax-community/gpt2-indonesian

Browse files

Files changed (5) hide show

README.md +4 -4
app.py +118 -0
prompts.py +20 -0
requirements.txt +4 -0
start.sh +10 -0

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Gpt2 Thai
-emoji: 💩
-colorFrom: purple
-colorTo: blue
 sdk: streamlit
 app_file: app.py
 pinned: false

 ---
+title: GPT2 Thai
+emoji: 🐘
+colorFrom: indigo
+colorTo: indigo
 sdk: streamlit
 app_file: app.py
 pinned: false

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import json
+import requests
+from mtranslate import translate
+from prompts import PROMPT_LIST
+import streamlit as st
+import random
+headers = {}
+MODELS = {
+    "GPT-2 Base": {
+        "url": "https://api-inference.huggingface.co/models/flax-community/gpt2-base-thai"
+    }
+}
+def query(payload, model_name):
+    data = json.dumps(payload)
+    print("model url:", MODELS[model_name]["url"])
+    response = requests.request("POST", MODELS[model_name]["url"], headers=headers, data=data)
+    return json.loads(response.content.decode("utf-8"))
+def process(text: str,
+            model_name: str,
+            max_len: int,
+            temp: float,
+            top_k: int,
+            top_p: float):
+    payload = {
+        "inputs": text,
+        "parameters": {
+            "max_new_tokens": max_len,
+            "top_k": top_k,
+            "top_p": top_p,
+            "temperature": temp,
+            "repetition_penalty": 2.0,
+        },
+        "options": {
+            "use_cache": True,
+        }
+    }
+    return query(payload, model_name)
+st.set_page_config(page_title="Thai GPT-2 Demo")
+st.title("Thai GPT-2")
+st.sidebar.subheader("Configurable parameters")
+max_len = st.sidebar.text_input(
+    "Maximum length",
+    value=100,
+    help="The maximum length of the sequence to be generated."
+)
+temp = st.sidebar.slider(
+    "Temperature",
+    value=1.0,
+    min_value=0.1,
+    max_value=100.0,
+    help="The value used to module the next token probabilities."
+)
+top_k = st.sidebar.text_input(
+    "Top k",
+    value=50,
+    help="The number of highest probability vocabulary tokens to keep for top-k-filtering."
+)
+top_p = st.sidebar.text_input(
+    "Top p",
+    value=0.95,
+    help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation."
+)
+do_sample = st.sidebar.selectbox('Sampling?', (True, False), help="Whether or not to use sampling; use greedy decoding otherwise.")
+st.markdown(
+    """Thai GPT-2 demo. Part of the [Huggingface JAX/Flax event](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/)."""
+)
+model_name = st.selectbox('Model',(['GPT-2 Small', 'GPT-2 Medium']))
+ALL_PROMPTS = list(PROMPT_LIST.keys())+["Custom"]
+prompt = st.selectbox('Prompt', ALL_PROMPTS, index=len(ALL_PROMPTS)-1)
+if prompt == "Custom":
+    prompt_box = "Enter your text here"
+else:
+    prompt_box = random.choice(PROMPT_LIST[prompt])
+text = st.text_area("Enter text", prompt_box)
+if st.button("Run"):
+    with st.spinner(text="Getting results..."):
+        st.subheader("Result")
+        print(f"maxlen:{max_len}, temp:{temp}, top_k:{top_k}, top_p:{top_p}")
+        result = process(text=text,
+                         model_name=model_name,
+                         max_len=int(max_len),
+                         temp=temp,
+                         top_k=int(top_k),
+                         top_p=float(top_p))
+        print("result:", result)
+        if "error" in result:
+            if type(result["error"]) is str:
+                st.write(f'{result["error"]}. Please try it again in about {result["estimated_time"]:.0f} seconds')
+            else:
+                if type(result["error"]) is list:
+                    for error in result["error"]:
+                        st.write(f'{error}')
+        else:
+            result = result[0]["generated_text"]
+            st.write(result.replace("\n", "  \n"))
+            st.text("English translation")
+            st.write(translate(result, "en", "id").replace("\n", "  \n"))

prompts.py ADDED Viewed

	@@ -0,0 +1,20 @@

+PROMPT_LIST = {
+    "บทสนทนาทั่วไป (conversation)": [
+        "สวัสดีตอนเช้า",
+        "สบายดีไหม",
+        "ขอบคุณ"
+    ],
+    "เรื่องสั้น (short story)": [
+        "เธอกับฉัน เราพบกันโดยบังเอิญที่ร้านกาแฟแห่งหนึ่ง",
+        "บ่ายสี่โมงแล้ว แสงอาทิตย์เริ่มจะอ่อนลงบ้าง",
+        "เธอใช้มือปาดน้ำตาที่ไหลลงมาตามใบหน้าเธอ"],
+    "ประวัติศาสตร์ (history)": [
+        "การปฏิวัติอุตสาหกรรมครั้งแรกซึ่งเริ่มในคริสต์ศตวรรษที่ 18",
+        "แนวคิดเรื่องเครื่องจักรที่คิดได้และสิ่งมีชีวิตเทียมนั้นมีมาตั้งแต่สมัยกรีกโบราณ",
+        "ช่วงต้นคริสต์ศตวรรษที่ 21 ปัญญาประดิษฐ์ประสบความสำเร็จอย่างสูง"],
+    "เนื้อเพลง (lyrics)": [
+    "โจ๊ะโจ๊ะ...มันทำลายสมองคน",
+    "รักของเธอมีจริงหรือเปล่า",
+    "ก็รู้ว่าฉันไม่มีความหมาย"
+    ]
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+requests==2.24.0
+requests-toolbelt==0.9.1
+mtranslate

start.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+#!/usr/bin/env bash
+set -e
+if [ "$DEBUG" = true ] ; then
+    echo 'Debugging - ON'
+    nodemon --exec streamlit run main.py
+else
+    echo 'Debugging - OFF'
+    streamlit run main.py
+fi