AIModels24 commited on
Commit
e0051f5
·
verified ·
1 Parent(s): d9e9b95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -13
app.py CHANGED
@@ -1,26 +1,50 @@
 
1
  import torch
 
 
2
  from unsloth import FastLanguageModel
3
 
4
- # Load the model on the CPU
5
- model_name = "AIModels24/Indian_Constitution" # Replace with your actual model path
6
-
7
- # Explicitly set the device to CPU
8
  device = torch.device('cpu')
9
 
10
- # Load model without GPU dependencies
11
- model = FastLanguageModel.from_pretrained(
12
- model_name=model_name,
13
- max_seq_length=2048,
14
- load_in_4bit=False, # Disable 4-bit quantization (required for CPU)
15
- dtype=torch.float32, # Use float32 (default for CPU)
16
- )
 
 
 
 
 
 
 
 
 
 
17
 
18
- model = model.to(device) # Ensure the model is loaded to CPU
19
 
20
- # Example inference function
 
 
21
  def generate_text(prompt):
22
  inputs = tokenizer(prompt, return_tensors="pt")
23
  inputs = inputs.to(device) # Move inputs to CPU
24
  with torch.no_grad():
25
  outputs = model.generate(inputs['input_ids'], max_length=200)
26
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import torch
3
+ import streamlit as st
4
+ from transformers import AutoTokenizer
5
  from unsloth import FastLanguageModel
6
 
7
+ # Disable CUDA and force CPU
8
+ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 
 
9
  device = torch.device('cpu')
10
 
11
+ # Load the model and tokenizer
12
+ model_name = "your-username/Indian_law_500Epochs" # Replace with your actual model path
13
+
14
+ @st.cache_resource
15
+ def load_model():
16
+ # Load the tokenizer
17
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
18
+
19
+ # Load the model without GPU-specific settings
20
+ model = FastLanguageModel.from_pretrained(
21
+ model_name=model_name,
22
+ max_seq_length=2048,
23
+ load_in_4bit=False, # Disable 4-bit quantization for CPU
24
+ dtype=torch.float32, # Use float32 for CPU
25
+ )
26
+ # Move model to CPU
27
+ model = model.to(device)
28
 
29
+ return model, tokenizer
30
 
31
+ model, tokenizer = load_model()
32
+
33
+ # Inference function
34
  def generate_text(prompt):
35
  inputs = tokenizer(prompt, return_tensors="pt")
36
  inputs = inputs.to(device) # Move inputs to CPU
37
  with torch.no_grad():
38
  outputs = model.generate(inputs['input_ids'], max_length=200)
39
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
40
+
41
+ # Streamlit UI
42
+ st.title("Indian Law Fine-Tuned Model Inference")
43
+ prompt = st.text_area("Enter your prompt:")
44
+
45
+ if st.button("Generate Response"):
46
+ if prompt:
47
+ response = generate_text(prompt)
48
+ st.write(response)
49
+ else:
50
+ st.write("Please enter a prompt!")