# palmyra-vision ## usage ```py from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig from PIL import Image import requests import torch processor = AutoProcessor.from_pretrained( "Writer/palmyra-vision-dummy-weights", trust_remote_code=True, torch_dtype="auto", device_map="auto", use_fast=False, ) model = AutoModelForCausalLM.from_pretrained( "Writer/palmyra-vision-dummy-weights", trust_remote_code=True, torch_dtype="auto", device_map="auto", ) inputs = processor.process( images=[ Image.open( requests.get("https://picsum.photos/seed/picsum/200/300", stream=True).raw ) ], text="what is this image about?", ) inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()} output = model.generate_from_batch( inputs, GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"), tokenizer=processor.tokenizer, ) generated_tokens = output[0, inputs["input_ids"].size(1) :] generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True) print(generated_text) ```