Tevatron usage
Tevatron usage: https://github.com/texttron/tevatron/tree/main/examples/multimodal
Load the model
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
from peft import PeftModel, PeftConfig
def get_model(peft_model_name):
config = PeftConfig.from_pretrained(peft_model_name)
base_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(base_model, peft_model_name)
model = model.merge_and_unload()
model.eval()
return model
model = get_model('Tevatron/unified-retriever-v0.1').to('cuda:0')
processor = AutoProcessor.from_pretrained('Tevatron/unified-retriever-v0.1')
Encode text query
import torch
from qwen_vl_utils import process_vision_info
def get_embedding(last_hidden_state: torch.Tensor) -> torch.Tensor:
reps = last_hidden_state[:, -1]
reps = torch.nn.functional.normalize(reps, p=2, dim=-1)
return reps
queries = ["Where can we see Llama?", "What is the LLaMA AI model?"]
query_messages = []
for query in queries:
message = [
{
'role': 'user',
'content': [
{'type': 'text', 'text': f'Query: {query}'},
]
}
]
query_messages.append(message)
query_texts = [
processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=False) + "<|endoftext|>"
for msg in query_messages
]
query_image_inputs, query_video_inputs = process_vision_info(query_messages)
query_inputs = processor(text=query_texts, images=query_image_inputs, videos=query_video_inputs, padding='longest', return_tensors='pt').to('cuda:0')
with torch.no_grad():
output = model(**query_inputs, return_dict=True, output_hidden_states=True)
query_embeddings = get_embedding(output.hidden_states[-1])
For encoding the textual documents, the code is the same as the above query encoding, but remove the
'Query: '
prefix.
Encode Document Screenshot
import requests
from io import BytesIO
from PIL import Image
# URLs of the images
url1 = "https://huggingface.co/Tevatron/dse-phi3-docmatix-v2/resolve/main/animal-llama.png"
url2 = "https://huggingface.co/Tevatron/dse-phi3-docmatix-v2/resolve/main/meta-llama.png"
response1 = requests.get(url1)
response2 = requests.get(url2)
doc_image1 = Image.open(BytesIO(response1.content))
doc_image2 = Image.open(BytesIO(response2.content))
doc_images = [doc_image1, doc_image2]
doc_messages = []
for doc in doc_images:
message = [
{
'role': 'user',
'content': [
{'type': 'text', 'text': ''},
{'type': 'image', 'image': doc, 'resized_height': 784, 'resized_width': 784}
]
}
]
doc_messages.append(message)
doc_texts = [
processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=False) + "<|endoftext|>"
for msg in doc_messages
]
doc_image_inputs, doc_video_inputs = process_vision_info(doc_messages)
doc_inputs = processor(text=doc_texts, images=doc_image_inputs, videos=doc_video_inputs, padding='longest', return_tensors='pt').to('cuda:0')
with torch.no_grad():
output = model(**doc_inputs, return_dict=True, output_hidden_states=True)
doc_embeddings = get_embedding(output.hidden_states[-1])
Compute Similarity
from torch.nn.functional import cosine_similarity
num_queries = query_embeddings.size(0)
num_passages = doc_embeddings.size(0)
for i in range(num_queries):
query_embedding = query_embeddings[i].unsqueeze(0)
similarities = cosine_similarity(query_embedding, doc_embeddings)
print(f"Similarities for Query {i+1}: {similarities.cpu().float().numpy()}")
# Similarities for Query 1: [0.3282001 0.17449486]
# Similarities for Query 2: [0.08133292 0.30867738]
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support
HF Inference deployability: The model has no library tag.
Model tree for Tevatron/unified-retriever-v0.1
Base model
Qwen/Qwen2.5-VL-3B-Instruct