numb3r3 commited on
Commit
030f959
·
verified ·
1 Parent(s): f0146d5

init commit

Browse files
Files changed (2) hide show
  1. README.md +7 -7
  2. modeling.py +4 -6
README.md CHANGED
@@ -27,24 +27,24 @@ library_name: transformers
27
  [Blog](https://jina.ai/) | [API](https://jina.ai/reranker) | [AWS](#) | [Azure](#) | [Arxiv](coming soon)
28
 
29
 
30
- # jina-reranker-v3
31
 
32
  ## Intended Usage & Model Info
33
 
34
- The **Jina Reranker v3** (`jina-reranker-v3`) is multi-lingual, and multi-modal model that has been fine-tuned for text and visual document reranking task, which is a crucial component in many information retrieval systems. It takes a query and a document pair as input and outputs a score indicating the relevance of the document to the query. The model is trained on a large dataset of query-document pairs and is capable of reranking documents in multiple languages with high accuracy.
35
 
36
 
37
  # Usage
38
 
39
  _This model repository is licenced for research and evaluation purposes under CC-BY-NC-4.0. For commercial usage, please refer to Jina AI's APIs, AWS Sagemaker or Azure Marketplace offerings. Please [contact us](https://jina.ai/contact-sales) for any further clarifications._
40
- 1. The easiest way to use `jina-reranker-v3` is to call Jina AI's [Reranker API](https://jina.ai/reranker/).
41
 
42
  ```bash
43
  curl https://api.jina.ai/v1/rerank \
44
  -H "Content-Type: application/json" \
45
  -H "Authorization: Bearer YOUR_API_KEY" \
46
  -d '{
47
- "model": "jina-reranker-v3",
48
  "query": "Organic skincare products for sensitive skin",
49
  "documents": [
50
  {"text": "Organic skincare for sensitive skin with aloe vera and chamomile."},
@@ -76,12 +76,12 @@ And then:
76
  from transformers import AutoModel
77
 
78
  model = AutoModel.from_pretrained(
79
- 'jinaai/jina-reranker-v3',
80
  torch_dtype="auto",
81
  trust_remote_code=True,
82
  )
83
 
84
- model.to('cuda') # or 'cpu' if no GPU is available
85
  model.eval()
86
 
87
  # Example query and documents
@@ -102,7 +102,7 @@ documents = [
102
  # construct sentence pairs
103
  sentence_pairs = [[query, doc] for doc in documents]
104
 
105
- scores = model.compute_score(sentence_pairs, max_length=1024)
106
  ```
107
 
108
  The scores will be a list of floats, where each float represents the relevance score of the corresponding document to the query. Higher scores indicate higher relevance.
 
27
  [Blog](https://jina.ai/) | [API](https://jina.ai/reranker) | [AWS](#) | [Azure](#) | [Arxiv](coming soon)
28
 
29
 
30
+ # jina-reranker-m0
31
 
32
  ## Intended Usage & Model Info
33
 
34
+ The **Jina Reranker M0** (`jina-reranker-m0`) is multi-lingual, and multi-modal model that has been fine-tuned for text and visual document reranking task, which is a crucial component in many information retrieval systems. It takes a query and a document pair as input and outputs a score indicating the relevance of the document to the query. The model is trained on a large dataset of query-document pairs and is capable of reranking documents in multiple languages with high accuracy.
35
 
36
 
37
  # Usage
38
 
39
  _This model repository is licenced for research and evaluation purposes under CC-BY-NC-4.0. For commercial usage, please refer to Jina AI's APIs, AWS Sagemaker or Azure Marketplace offerings. Please [contact us](https://jina.ai/contact-sales) for any further clarifications._
40
+ 1. The easiest way to use `jina-reranker-m0` is to call Jina AI's [Reranker API](https://jina.ai/reranker/).
41
 
42
  ```bash
43
  curl https://api.jina.ai/v1/rerank \
44
  -H "Content-Type: application/json" \
45
  -H "Authorization: Bearer YOUR_API_KEY" \
46
  -d '{
47
+ "model": "jina-reranker-m0",
48
  "query": "Organic skincare products for sensitive skin",
49
  "documents": [
50
  {"text": "Organic skincare for sensitive skin with aloe vera and chamomile."},
 
76
  from transformers import AutoModel
77
 
78
  model = AutoModel.from_pretrained(
79
+ 'jinaai/jina-reranker-m0',
80
  torch_dtype="auto",
81
  trust_remote_code=True,
82
  )
83
 
84
+ model.to('cuda') # or 'cpu' if no GPU is available
85
  model.eval()
86
 
87
  # Example query and documents
 
102
  # construct sentence pairs
103
  sentence_pairs = [[query, doc] for doc in documents]
104
 
105
+ scores = model.compute_score(sentence_pairs, max_length=10240)
106
  ```
107
 
108
  The scores will be a list of floats, where each float represents the relevance score of the corresponding document to the query. Higher scores indicate higher relevance.
modeling.py CHANGED
@@ -1,6 +1,6 @@
1
  import torch
2
  from torch import nn
3
- from typing import Optional, Tuple, List, Union, Any
4
  from transformers import Qwen2VLForConditionalGeneration
5
  import logging
6
  import warnings
@@ -70,6 +70,7 @@ def formatting_prompts_func(
70
 
71
  return prompt
72
 
 
73
  class JinaVLForRanking(Qwen2VLForConditionalGeneration):
74
  def __init__(self, config):
75
  super().__init__(config)
@@ -129,6 +130,7 @@ class JinaVLForRanking(Qwen2VLForConditionalGeneration):
129
 
130
  if not hasattr(self, "_processor"):
131
  from transformers import AutoProcessor
 
132
  self._processor = AutoProcessor.from_pretrained(self.name_or_path, trust_remote_code=True)
133
 
134
  assert isinstance(pairs, list)
@@ -173,11 +175,7 @@ class JinaVLForRanking(Qwen2VLForConditionalGeneration):
173
  if len(tokens['input_ids']) >= max_doc_length:
174
  d = self._processor.tokenizer.decode(tokens['input_ids'])
175
 
176
- batch_inputs.append(
177
- formatting_prompts_func(
178
- q, d, query_type=query_type, doc_type=doc_type
179
- )
180
- )
181
 
182
  batch_images = None
183
  if doc_type == 'image':
 
1
  import torch
2
  from torch import nn
3
+ from typing import Optional, Tuple, List, Union
4
  from transformers import Qwen2VLForConditionalGeneration
5
  import logging
6
  import warnings
 
70
 
71
  return prompt
72
 
73
+
74
  class JinaVLForRanking(Qwen2VLForConditionalGeneration):
75
  def __init__(self, config):
76
  super().__init__(config)
 
130
 
131
  if not hasattr(self, "_processor"):
132
  from transformers import AutoProcessor
133
+
134
  self._processor = AutoProcessor.from_pretrained(self.name_or_path, trust_remote_code=True)
135
 
136
  assert isinstance(pairs, list)
 
175
  if len(tokens['input_ids']) >= max_doc_length:
176
  d = self._processor.tokenizer.decode(tokens['input_ids'])
177
 
178
+ batch_inputs.append(formatting_prompts_func(q, d, query_type=query_type, doc_type=doc_type))
 
 
 
 
179
 
180
  batch_images = None
181
  if doc_type == 'image':