Spaces:

dkhatate
/

hindilanguage

Runtime error

dkhatate commited on 28 days ago

Commit

5b0f86d

•

1 Parent(s): 64071f5

modified codde

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,17 +1,31 @@
-import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# Load the model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("Cognitive-Lab/LLama3-Gaja-Hindi-8B-v0.1")
-model = AutoModelForCausalLM.from_pretrained("Cognitive-Lab/LLama3-Gaja-Hindi-8B-v0.1").to("cuda")
-def generate_text(prompt):
-    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
-    with torch.no_grad():
-        output = model.generate(inputs["input_ids"], max_new_tokens=50)
-    return tokenizer.decode(output[0], skip_special_tokens=True)
-# Launch Gradio app
-interface = gr.Interface(fn=generate_text, inputs="text", outputs="text")
-interface.launch()

+from accelerate import init_empty_weights
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+# Initialize the tokenizer and model with empty weights
+with init_empty_weights():
+    tokenizer = AutoTokenizer.from_pretrained("Cognitive-Lab/LLama3-Gaja-Hindi-8B-v0.1")
+    model = AutoModelForCausalLM.from_pretrained("Cognitive-Lab/LLama3-Gaja-Hindi-8B-v0.1")
+# Move the model to the GPU if available; otherwise, keep it on the CPU
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+# Define a function to run inference
+def generate_response(prompt, max_new_tokens=30):
+    # Tokenize the input
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    # Generate response using the model
+    with torch.no_grad():  # Disable gradient calculation
+        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
+    # Decode the generated tokens to get the output text
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response
+# Example usage
+if __name__ == "__main__":
+    prompt = "आपका नाम क्या है?"  # Example Hindi prompt
+    response = generate_response(prompt)
+    print(response)

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 transformers
 torch  # If you are using PyTorch
 gradio

 transformers
 torch  # If you are using PyTorch
 gradio
+accelerate