Spaces:

sounar
/

ContactDoctor-API

Paused

File size: 1,614 Bytes

5be90eb
72ff248
8e90fc6
5be90eb
832ce7b
8e90fc6
 
2bf9d03
5be90eb
8e90fc6
5be90eb
 
 
 
8e90fc6
0b2a88c
5be90eb
8e90fc6
5be90eb
 
 
 
 
d16c5f3
8e90fc6
 
5be90eb
 
8e90fc6
65272a9
4f5fa66
2629ae5
5be90eb

from flask import Flask, request, jsonify
import torch
from PIL import Image
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig

# Get API token from environment variable
api_token = os.getenv("HF_TOKEN").strip()

# Model configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, 
    bnb_4bit_quant_type="nf4", 
    bnb_4bit_use_double_quant=True, 
    bnb_4bit_compute_dtype=torch.float16,
)

# Model and tokenizer loading
model = AutoModel.from_pretrained(
    "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1", 
    quantization_config=bnb_config, 
    device_map="auto", 
    torch_dtype=torch.float16, 
    trust_remote_code=True, 
    attn_implementation="flash_attention_2",
)
tokenizer = AutoTokenizer.from_pretrained(
    "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1", 
    trust_remote_code=True
)



app = Flask(__name__)

# Model configuration and loading (same as before)

@app.route('/analyze', methods=['POST'])
def analyze():
    image = request.files['image']
    question = request.form['question']

    # Preprocess image
    image = Image.open(image).convert('RGB')

    # Prepare input
    msgs = [{'role': 'user', 'content': [image, question]}]

    # Generate response
    res = model.chat(
        image=image, 
        msgs=msgs, 
        tokenizer=tokenizer, 
        sampling=True, 
        temperature=0.95, 
        stream=True
    )

    # Process response
    generated_text = ""
    for new_text in res:
        generated_text += new_text

    return jsonify({'response': generated_text})

if __name__ == '__main__':
    app.run(debug=True)