Spaces:

AIModels24
/

Indian_Law_Justice

Sleeping

AIModels24 commited on Nov 28, 2024

Commit

e0051f5

verified ·

1 Parent(s): d9e9b95

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,26 +1,50 @@
 import torch
 from unsloth import FastLanguageModel
-# Load the model on the CPU
-model_name = "AIModels24/Indian_Constitution"  # Replace with your actual model path
-# Explicitly set the device to CPU
 device = torch.device('cpu')
-# Load model without GPU dependencies
-model = FastLanguageModel.from_pretrained(
-    model_name=model_name,
-    max_seq_length=2048,
-    load_in_4bit=False,  # Disable 4-bit quantization (required for CPU)
-    dtype=torch.float32,  # Use float32 (default for CPU)
-)
-model = model.to(device)  # Ensure the model is loaded to CPU
-# Example inference function
 def generate_text(prompt):
     inputs = tokenizer(prompt, return_tensors="pt")
     inputs = inputs.to(device)  # Move inputs to CPU
     with torch.no_grad():
         outputs = model.generate(inputs['input_ids'], max_length=200)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)

+import os
 import torch
+import streamlit as st
+from transformers import AutoTokenizer
 from unsloth import FastLanguageModel
+# Disable CUDA and force CPU
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 device = torch.device('cpu')
+# Load the model and tokenizer
+model_name = "your-username/Indian_law_500Epochs"  # Replace with your actual model path
+@st.cache_resource
+def load_model():
+    # Load the tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    # Load the model without GPU-specific settings
+    model = FastLanguageModel.from_pretrained(
+        model_name=model_name,
+        max_seq_length=2048,
+        load_in_4bit=False,  # Disable 4-bit quantization for CPU
+        dtype=torch.float32,  # Use float32 for CPU
+    )
+    # Move model to CPU
+    model = model.to(device)
+    return model, tokenizer
+model, tokenizer = load_model()
+# Inference function
 def generate_text(prompt):
     inputs = tokenizer(prompt, return_tensors="pt")
     inputs = inputs.to(device)  # Move inputs to CPU
     with torch.no_grad():
         outputs = model.generate(inputs['input_ids'], max_length=200)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Streamlit UI
+st.title("Indian Law Fine-Tuned Model Inference")
+prompt = st.text_area("Enter your prompt:")
+if st.button("Generate Response"):
+    if prompt:
+        response = generate_text(prompt)
+        st.write(response)
+    else:
+        st.write("Please enter a prompt!")