Spaces:

AIModels24
/

Indian_Law_Justice

Sleeping

App Files Files Community

AIModels24 commited on Nov 28, 2024

Commit

e2a5d47

verified ·

1 Parent(s): e0051f5

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -34

app.py CHANGED Viewed

@@ -1,50 +1,85 @@
-import os
 import torch
 import streamlit as st
-from transformers import AutoTokenizer
-from unsloth import FastLanguageModel
-# Disable CUDA and force CPU
-os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-device = torch.device('cpu')
-# Load the model and tokenizer
-model_name = "your-username/Indian_law_500Epochs"  # Replace with your actual model path
 @st.cache_resource
-def load_model():
     # Load the tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    # Load the model without GPU-specific settings
-    model = FastLanguageModel.from_pretrained(
-        model_name=model_name,
-        max_seq_length=2048,
-        load_in_4bit=False,  # Disable 4-bit quantization for CPU
-        dtype=torch.float32,  # Use float32 for CPU
     )
-    # Move model to CPU
-    model = model.to(device)
     return model, tokenizer
-model, tokenizer = load_model()
-# Inference function
-def generate_text(prompt):
-    inputs = tokenizer(prompt, return_tensors="pt")
-    inputs = inputs.to(device)  # Move inputs to CPU
-    with torch.no_grad():
-        outputs = model.generate(inputs['input_ids'], max_length=200)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Streamlit UI
-st.title("Indian Law Fine-Tuned Model Inference")
-prompt = st.text_area("Enter your prompt:")
 if st.button("Generate Response"):
-    if prompt:
-        response = generate_text(prompt)
-        st.write(response)
     else:
-        st.write("Please enter a prompt!")

 import torch
 import streamlit as st
+from peft import PeftModel
+# from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load the model and tokenizer
+# def load_model_and_tokenizer():
+#     model_name = "AIModels24/Indian_Constitution"  # Replace with your model name
+#     # Define quantization configuration for 4-bit quantization
+#     # quant_config = BitsAndBytesConfig(load_in_4bit=True)  # 4-bit quantization
+#     # Load the tokenizer
+#     tokenizer = AutoTokenizer.from_pretrained(model_name)
+#     # Load the model with 4-bit quantization
+#     model = AutoModelForCausalLM.from_pretrained(
+#         model_name,
+#         # quantization_config=quant_config,
+#         device_map=None,
+#         low_cpu_mem_usage=True
+#     )
+#     return model, tokenizer
 @st.cache_resource
+def load_model_and_tokenizer():
+    # Base model
+    base_model_name = "unsloth/llama-3-8b-bnb-4bit"
+    adapter_name = "AIModels24/Indian_Constitution"
     # Load the tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+    # Load the base model
+    model = AutoModelForCausalLM.from_pretrained(
+        base_model_name,
+        device_map=None,
+        low_cpu_mem_usage=True,
+        use_cache=True
     )
+    # Load the LoRA adapter
+    model = PeftModel.from_pretrained(model, adapter_name)
     return model, tokenizer
+# Load model and tokenizer using the function
+model, tokenizer = load_model_and_tokenizer()
+## prompt function
+alpaca_prompt = "### Instruction:\n{}\n\n### Response:\n"
+# Streamlit User Interface
+st.title("भारतीय कानून व्यवस्था")
+st.subheader("AI-powered responses for legal questions in Indian law")
+# Input text box for user question
+instruction = st.text_area("Enter your question:", placeholder="Ask a question about Indian law...")
+# Generate response button
 if st.button("Generate Response"):
+    if instruction.strip():
+        with st.spinner("Generating response..."):
+            # Prepare the prompt for the model
+            inputs = tokenizer(
+                [alpaca_prompt.format(instruction)],
+                return_tensors="pt"
+            ).to("cuda")
+            # Generate the response
+            outputs = model.generate(**inputs, max_new_tokens=150, use_cache=True)
+            response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+            # Extract the clean response
+            response_cleaned = response.split("### Response:\n")[-1].strip()
+        # Display the response
+        st.success("Response:")
+        st.write(response_cleaned)
     else:
+        st.error("Please enter a question to generate a response.")