from flask import Flask, request, jsonify import torch from PIL import Image from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig # Get API token from environment variable api_token = os.getenv("HF_TOKEN").strip() # Model configuration bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.float16, ) # Model and tokenizer loading model = AutoModel.from_pretrained( "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1", quantization_config=bnb_config, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True, attn_implementation="flash_attention_2", ) tokenizer = AutoTokenizer.from_pretrained( "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1", trust_remote_code=True ) app = Flask(__name__) # Model configuration and loading (same as before) @app.route('/analyze', methods=['POST']) def analyze(): image = request.files['image'] question = request.form['question'] # Preprocess image image = Image.open(image).convert('RGB') # Prepare input msgs = [{'role': 'user', 'content': [image, question]}] # Generate response res = model.chat( image=image, msgs=msgs, tokenizer=tokenizer, sampling=True, temperature=0.95, stream=True ) # Process response generated_text = "" for new_text in res: generated_text += new_text return jsonify({'response': generated_text}) if __name__ == '__main__': app.run(debug=True)