import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline sys_message = """ This model can generate untruths, lies or inappropriate things. Only for testing and validation. """ pipe = pipeline("text-generation", model="tevykuch/sftsl0th", device=0, framework="pt") # Configuration settings for model generation (example) generation_config = { "max_new_tokens": 2048, "temperature": 0.50, "top_p": 0.95, "top_k": 30, "repetition_penalty": 1.1, "eos_token_id": pipe.tokenizer.eos_token_id } # tokenizer = AutoTokenizer.from_pretrained("tevykuch/sftsl0th") # llm = AutoModelForCausalLM.from_pretrained("tevykuch/sftsl0th") # def stream(prompt): # # Tokenize the prompt # inputs = tokenizer.encode(prompt, return_tensors="pt") # # Generate a response # output_ids = llm.generate(inputs, **generation_config) # # Decode the generated ids to a string # response = tokenizer.decode(output_ids[0], skip_special_tokens=True) # return response def stream(prompt): outputs = pipe(prompt, **generation_config) response = outputs[0]["generated_text"] return response chat_interface = gr.ChatInterface( fn=stream, stop_btn=None, examples=[ ["តើពណ៌ចម្បងទាំងបីមានអ្វីខ្លះ?"], ["តើយើងអាចកាត់បន្ថយការបំពុលបរិយាកាសយ៉ាងដូចម្តេច?"], ["រៀបរាប់ពីពេលវេលាដែលអ្នកត្រូវធ្វើការសម្រេចចិត្តលំបាក។"], ["កំណត់អត្តសញ្ញាណមួយសេសចេញ។"], ["សរសេររឿងខ្លីមួយក្នុងការធ្វើវិសោធនកម្មរបស់បុគ្គលទីបីអំពីតួឯកដែលត្រូវធ្វើការសម្រេចចិត្តអាជីពដ៏សំខាន់មួយ។"], ["វាយតម្លៃប្រយោគនេះសម្រាប់កំហុសអក្ខរាវិរុទ្ធនិងវេយ្យាករណ៍"] ], ) with gr.Blocks() as demo: gr.HTML("<h1><center> sl0th inference tester only (not final) <h1><center>") gr.HTML( "<h4 style='text-align: center'>" "<a href='https://huggingface.co/tevykuch/sl0th' target='_blank'>Model: Sl0th Mistral 7b 0.2</a> | " "</h4>" ) gr.HTML("<p><center>Finetune here <a href='https://huggingface.co/unsloth/mistral-7b-bnb-4bit' target='_blank'>Mistral 7b</a> thanks dataset maker (my coworker) <a href='https://huggingface.co/datasets/metythorn/khmerllm-dataset-alpaca-52k-v1'>Alpaca-data-pt-br</a>.<p><center>") chat_interface.render() gr.Markdown(sys_message) gr.DuplicateButton(value="Duplicate the Magic", elem_id="duplicate-button") if __name__ == "__main__": demo.queue(max_size=10).launch()