init commit
Browse files- README.md +7 -7
- modeling.py +4 -6
README.md
CHANGED
@@ -27,24 +27,24 @@ library_name: transformers
|
|
27 |
[Blog](https://jina.ai/) | [API](https://jina.ai/reranker) | [AWS](#) | [Azure](#) | [Arxiv](coming soon)
|
28 |
|
29 |
|
30 |
-
# jina-reranker-
|
31 |
|
32 |
## Intended Usage & Model Info
|
33 |
|
34 |
-
The **Jina Reranker
|
35 |
|
36 |
|
37 |
# Usage
|
38 |
|
39 |
_This model repository is licenced for research and evaluation purposes under CC-BY-NC-4.0. For commercial usage, please refer to Jina AI's APIs, AWS Sagemaker or Azure Marketplace offerings. Please [contact us](https://jina.ai/contact-sales) for any further clarifications._
|
40 |
-
1. The easiest way to use `jina-reranker-
|
41 |
|
42 |
```bash
|
43 |
curl https://api.jina.ai/v1/rerank \
|
44 |
-H "Content-Type: application/json" \
|
45 |
-H "Authorization: Bearer YOUR_API_KEY" \
|
46 |
-d '{
|
47 |
-
"model": "jina-reranker-
|
48 |
"query": "Organic skincare products for sensitive skin",
|
49 |
"documents": [
|
50 |
{"text": "Organic skincare for sensitive skin with aloe vera and chamomile."},
|
@@ -76,12 +76,12 @@ And then:
|
|
76 |
from transformers import AutoModel
|
77 |
|
78 |
model = AutoModel.from_pretrained(
|
79 |
-
'jinaai/jina-reranker-
|
80 |
torch_dtype="auto",
|
81 |
trust_remote_code=True,
|
82 |
)
|
83 |
|
84 |
-
model.to('cuda')
|
85 |
model.eval()
|
86 |
|
87 |
# Example query and documents
|
@@ -102,7 +102,7 @@ documents = [
|
|
102 |
# construct sentence pairs
|
103 |
sentence_pairs = [[query, doc] for doc in documents]
|
104 |
|
105 |
-
scores = model.compute_score(sentence_pairs, max_length=
|
106 |
```
|
107 |
|
108 |
The scores will be a list of floats, where each float represents the relevance score of the corresponding document to the query. Higher scores indicate higher relevance.
|
|
|
27 |
[Blog](https://jina.ai/) | [API](https://jina.ai/reranker) | [AWS](#) | [Azure](#) | [Arxiv](coming soon)
|
28 |
|
29 |
|
30 |
+
# jina-reranker-m0
|
31 |
|
32 |
## Intended Usage & Model Info
|
33 |
|
34 |
+
The **Jina Reranker M0** (`jina-reranker-m0`) is multi-lingual, and multi-modal model that has been fine-tuned for text and visual document reranking task, which is a crucial component in many information retrieval systems. It takes a query and a document pair as input and outputs a score indicating the relevance of the document to the query. The model is trained on a large dataset of query-document pairs and is capable of reranking documents in multiple languages with high accuracy.
|
35 |
|
36 |
|
37 |
# Usage
|
38 |
|
39 |
_This model repository is licenced for research and evaluation purposes under CC-BY-NC-4.0. For commercial usage, please refer to Jina AI's APIs, AWS Sagemaker or Azure Marketplace offerings. Please [contact us](https://jina.ai/contact-sales) for any further clarifications._
|
40 |
+
1. The easiest way to use `jina-reranker-m0` is to call Jina AI's [Reranker API](https://jina.ai/reranker/).
|
41 |
|
42 |
```bash
|
43 |
curl https://api.jina.ai/v1/rerank \
|
44 |
-H "Content-Type: application/json" \
|
45 |
-H "Authorization: Bearer YOUR_API_KEY" \
|
46 |
-d '{
|
47 |
+
"model": "jina-reranker-m0",
|
48 |
"query": "Organic skincare products for sensitive skin",
|
49 |
"documents": [
|
50 |
{"text": "Organic skincare for sensitive skin with aloe vera and chamomile."},
|
|
|
76 |
from transformers import AutoModel
|
77 |
|
78 |
model = AutoModel.from_pretrained(
|
79 |
+
'jinaai/jina-reranker-m0',
|
80 |
torch_dtype="auto",
|
81 |
trust_remote_code=True,
|
82 |
)
|
83 |
|
84 |
+
model.to('cuda') # or 'cpu' if no GPU is available
|
85 |
model.eval()
|
86 |
|
87 |
# Example query and documents
|
|
|
102 |
# construct sentence pairs
|
103 |
sentence_pairs = [[query, doc] for doc in documents]
|
104 |
|
105 |
+
scores = model.compute_score(sentence_pairs, max_length=10240)
|
106 |
```
|
107 |
|
108 |
The scores will be a list of floats, where each float represents the relevance score of the corresponding document to the query. Higher scores indicate higher relevance.
|
modeling.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import torch
|
2 |
from torch import nn
|
3 |
-
from typing import Optional, Tuple, List, Union
|
4 |
from transformers import Qwen2VLForConditionalGeneration
|
5 |
import logging
|
6 |
import warnings
|
@@ -70,6 +70,7 @@ def formatting_prompts_func(
|
|
70 |
|
71 |
return prompt
|
72 |
|
|
|
73 |
class JinaVLForRanking(Qwen2VLForConditionalGeneration):
|
74 |
def __init__(self, config):
|
75 |
super().__init__(config)
|
@@ -129,6 +130,7 @@ class JinaVLForRanking(Qwen2VLForConditionalGeneration):
|
|
129 |
|
130 |
if not hasattr(self, "_processor"):
|
131 |
from transformers import AutoProcessor
|
|
|
132 |
self._processor = AutoProcessor.from_pretrained(self.name_or_path, trust_remote_code=True)
|
133 |
|
134 |
assert isinstance(pairs, list)
|
@@ -173,11 +175,7 @@ class JinaVLForRanking(Qwen2VLForConditionalGeneration):
|
|
173 |
if len(tokens['input_ids']) >= max_doc_length:
|
174 |
d = self._processor.tokenizer.decode(tokens['input_ids'])
|
175 |
|
176 |
-
batch_inputs.append(
|
177 |
-
formatting_prompts_func(
|
178 |
-
q, d, query_type=query_type, doc_type=doc_type
|
179 |
-
)
|
180 |
-
)
|
181 |
|
182 |
batch_images = None
|
183 |
if doc_type == 'image':
|
|
|
1 |
import torch
|
2 |
from torch import nn
|
3 |
+
from typing import Optional, Tuple, List, Union
|
4 |
from transformers import Qwen2VLForConditionalGeneration
|
5 |
import logging
|
6 |
import warnings
|
|
|
70 |
|
71 |
return prompt
|
72 |
|
73 |
+
|
74 |
class JinaVLForRanking(Qwen2VLForConditionalGeneration):
|
75 |
def __init__(self, config):
|
76 |
super().__init__(config)
|
|
|
130 |
|
131 |
if not hasattr(self, "_processor"):
|
132 |
from transformers import AutoProcessor
|
133 |
+
|
134 |
self._processor = AutoProcessor.from_pretrained(self.name_or_path, trust_remote_code=True)
|
135 |
|
136 |
assert isinstance(pairs, list)
|
|
|
175 |
if len(tokens['input_ids']) >= max_doc_length:
|
176 |
d = self._processor.tokenizer.decode(tokens['input_ids'])
|
177 |
|
178 |
+
batch_inputs.append(formatting_prompts_func(q, d, query_type=query_type, doc_type=doc_type))
|
|
|
|
|
|
|
|
|
179 |
|
180 |
batch_images = None
|
181 |
if doc_type == 'image':
|