AIModels24 commited on
Commit
e2a5d47
·
verified ·
1 Parent(s): e0051f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -34
app.py CHANGED
@@ -1,50 +1,85 @@
1
- import os
2
  import torch
3
  import streamlit as st
4
- from transformers import AutoTokenizer
5
- from unsloth import FastLanguageModel
 
 
 
 
6
 
7
- # Disable CUDA and force CPU
8
- os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
9
- device = torch.device('cpu')
10
 
11
- # Load the model and tokenizer
12
- model_name = "your-username/Indian_law_500Epochs" # Replace with your actual model path
 
 
 
 
 
 
 
 
 
 
13
 
14
  @st.cache_resource
15
- def load_model():
 
 
 
 
16
  # Load the tokenizer
17
- tokenizer = AutoTokenizer.from_pretrained(model_name)
18
-
19
- # Load the model without GPU-specific settings
20
- model = FastLanguageModel.from_pretrained(
21
- model_name=model_name,
22
- max_seq_length=2048,
23
- load_in_4bit=False, # Disable 4-bit quantization for CPU
24
- dtype=torch.float32, # Use float32 for CPU
 
25
  )
26
- # Move model to CPU
27
- model = model.to(device)
 
28
 
29
  return model, tokenizer
30
 
31
- model, tokenizer = load_model()
32
 
33
- # Inference function
34
- def generate_text(prompt):
35
- inputs = tokenizer(prompt, return_tensors="pt")
36
- inputs = inputs.to(device) # Move inputs to CPU
37
- with torch.no_grad():
38
- outputs = model.generate(inputs['input_ids'], max_length=200)
39
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
40
 
41
- # Streamlit UI
42
- st.title("Indian Law Fine-Tuned Model Inference")
43
- prompt = st.text_area("Enter your prompt:")
44
 
 
 
 
 
45
  if st.button("Generate Response"):
46
- if prompt:
47
- response = generate_text(prompt)
48
- st.write(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  else:
50
- st.write("Please enter a prompt!")
 
 
1
  import torch
2
  import streamlit as st
3
+ from peft import PeftModel
4
+ # from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
6
+ # Load the model and tokenizer
7
+ # def load_model_and_tokenizer():
8
+ # model_name = "AIModels24/Indian_Constitution" # Replace with your model name
9
 
10
+ # # Define quantization configuration for 4-bit quantization
11
+ # # quant_config = BitsAndBytesConfig(load_in_4bit=True) # 4-bit quantization
 
12
 
13
+ # # Load the tokenizer
14
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+
16
+ # # Load the model with 4-bit quantization
17
+ # model = AutoModelForCausalLM.from_pretrained(
18
+ # model_name,
19
+ # # quantization_config=quant_config,
20
+ # device_map=None,
21
+ # low_cpu_mem_usage=True
22
+ # )
23
+
24
+ # return model, tokenizer
25
 
26
  @st.cache_resource
27
+ def load_model_and_tokenizer():
28
+ # Base model
29
+ base_model_name = "unsloth/llama-3-8b-bnb-4bit"
30
+ adapter_name = "AIModels24/Indian_Constitution"
31
+
32
  # Load the tokenizer
33
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
34
+
35
+ # Load the base model
36
+ model = AutoModelForCausalLM.from_pretrained(
37
+ base_model_name,
38
+ device_map=None,
39
+ low_cpu_mem_usage=True,
40
+ use_cache=True
41
+
42
  )
43
+
44
+ # Load the LoRA adapter
45
+ model = PeftModel.from_pretrained(model, adapter_name)
46
 
47
  return model, tokenizer
48
 
 
49
 
50
+ # Load model and tokenizer using the function
51
+ model, tokenizer = load_model_and_tokenizer()
52
+
53
+ ## prompt function
54
+ alpaca_prompt = "### Instruction:\n{}\n\n### Response:\n"
55
+
 
56
 
57
+ # Streamlit User Interface
58
+ st.title("भारतीय कानून व्यवस्था")
59
+ st.subheader("AI-powered responses for legal questions in Indian law")
60
 
61
+ # Input text box for user question
62
+ instruction = st.text_area("Enter your question:", placeholder="Ask a question about Indian law...")
63
+
64
+ # Generate response button
65
  if st.button("Generate Response"):
66
+ if instruction.strip():
67
+ with st.spinner("Generating response..."):
68
+ # Prepare the prompt for the model
69
+ inputs = tokenizer(
70
+ [alpaca_prompt.format(instruction)],
71
+ return_tensors="pt"
72
+ ).to("cuda")
73
+
74
+ # Generate the response
75
+ outputs = model.generate(**inputs, max_new_tokens=150, use_cache=True)
76
+ response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
77
+
78
+ # Extract the clean response
79
+ response_cleaned = response.split("### Response:\n")[-1].strip()
80
+
81
+ # Display the response
82
+ st.success("Response:")
83
+ st.write(response_cleaned)
84
  else:
85
+ st.error("Please enter a question to generate a response.")