Praveen0309 commited on
Commit
3aef1dd
1 Parent(s): 387e963
Files changed (2) hide show
  1. Dockerfile +1 -4
  2. app.py +14 -14
Dockerfile CHANGED
@@ -1,8 +1,5 @@
1
  FROM python:3.10
2
 
3
- # Use the official Python 3.9 image
4
- # FROM python:3.9
5
-
6
  # Set the working directory to /code
7
  WORKDIR /code
8
 
@@ -26,4 +23,4 @@ WORKDIR $HOME/app
26
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
27
  COPY --chown=user . $HOME/app
28
 
29
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
  FROM python:3.10
2
 
 
 
 
3
  # Set the working directory to /code
4
  WORKDIR /code
5
 
 
23
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
24
  COPY --chown=user . $HOME/app
25
 
26
+ CMD ["flask", "run", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -8,7 +8,7 @@ from transformers import AutoProcessor, LlavaForConditionalGeneration, BitsAndBy
8
  from deep_translator import GoogleTranslator
9
  from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
10
  import warnings
11
- # from flask import Flask
12
 
13
  # from flask_ngrok import run_with_ngrok
14
  app = Flask(__name__)
@@ -16,16 +16,16 @@ app = Flask(__name__)
16
 
17
  warnings.filterwarnings('ignore')
18
 
19
- # model_id = "HuggingFaceH4/vsft-llava-1.5-7b-hf-trl"
20
- # quantization_config = BitsAndBytesConfig(load_in_4bit=True)
21
- # base_model = LlavaForConditionalGeneration.from_pretrained(model_id, quantization_config=quantization_config, torch_dtype=torch.float16)
22
 
23
- # # Load the PEFT Lora adapter
24
- # peft_lora_adapter_path = "Praveen0309/llava-1.5-7b-hf-ft-mix-vsft-3"
25
- # peft_lora_adapter = PeftModel.from_pretrained(base_model, peft_lora_adapter_path, adapter_name="lora_adapter")
26
- # base_model.load_adapter(peft_lora_adapter_path, adapter_name="lora_adapter")
27
 
28
- # processor = AutoProcessor.from_pretrained("HuggingFaceH4/vsft-llava-1.5-7b-hf-trl")
29
  # model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
30
  # tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
31
 
@@ -49,7 +49,7 @@ warnings.filterwarnings('ignore')
49
  def inference(image_prompt, image):
50
  prompt = f"USER: <image>\n{image_prompt} ASSISTANT:"
51
  inputs = processor(text=prompt, images=image, return_tensors="pt")
52
- generate_ids = base_model.generate(**inputs, max_new_tokens=1024)
53
  decoded_response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
54
 
55
  # prompt = "USER: <image>\nWhat's the content of the image? ASSISTANT:"
@@ -144,8 +144,8 @@ def get_bot_response():
144
  image = image_cache['image']
145
  # print(image)
146
  query = request.args.get('msg')
147
- output = query
148
- # output = google_response(image, query)
149
  return output
150
  else:
151
  return "Please upload an image to continue"
@@ -154,8 +154,8 @@ def get_bot_response():
154
 
155
 
156
  # Run the Flask app
157
- # if __name__ == "__main__":
158
- app.run(debug = True)
159
 
160
 
161
  # from pymongo import MongoClient
 
8
  from deep_translator import GoogleTranslator
9
  from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
10
  import warnings
11
+ from flask import Flask
12
 
13
  # from flask_ngrok import run_with_ngrok
14
  app = Flask(__name__)
 
16
 
17
  warnings.filterwarnings('ignore')
18
 
19
+ model_id = "HuggingFaceH4/vsft-llava-1.5-7b-hf-trl"
20
+ quantization_config = BitsAndBytesConfig(load_in_4bit=True)
21
+ base_model = LlavaForConditionalGeneration.from_pretrained(model_id, quantization_config=quantization_config, torch_dtype=torch.float16)
22
 
23
+ # Load the PEFT Lora adapter
24
+ peft_lora_adapter_path = "Praveen0309/llava-1.5-7b-hf-ft-mix-vsft-3"
25
+ peft_lora_adapter = PeftModel.from_pretrained(base_model, peft_lora_adapter_path, adapter_name="lora_adapter")
26
+ base_model.load_adapter(peft_lora_adapter_path, adapter_name="lora_adapter")
27
 
28
+ processor = AutoProcessor.from_pretrained("HuggingFaceH4/vsft-llava-1.5-7b-hf-trl")
29
  # model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
30
  # tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
31
 
 
49
  def inference(image_prompt, image):
50
  prompt = f"USER: <image>\n{image_prompt} ASSISTANT:"
51
  inputs = processor(text=prompt, images=image, return_tensors="pt")
52
+ generate_ids = base_model.generate(**inputs, max_new_tokens=15)
53
  decoded_response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
54
 
55
  # prompt = "USER: <image>\nWhat's the content of the image? ASSISTANT:"
 
144
  image = image_cache['image']
145
  # print(image)
146
  query = request.args.get('msg')
147
+ # output = query
148
+ output = google_response(image, query)
149
  return output
150
  else:
151
  return "Please upload an image to continue"
 
154
 
155
 
156
  # Run the Flask app
157
+ if __name__ == "__main__":
158
+ app.run(host="0.0.0.0", port=5000, debug=True)
159
 
160
 
161
  # from pymongo import MongoClient