Update README.md
Browse files
README.md
CHANGED
@@ -38,6 +38,8 @@ sentences = [
|
|
38 |
|
39 |
tokenized_query = ViTokenizer.tokenize(query)
|
40 |
tokenized_sentences = [ViTokenizer.tokenize(sent) for sent in sentences]
|
|
|
|
|
41 |
```
|
42 |
|
43 |
## Usage with sentence-transformers
|
@@ -45,22 +47,28 @@ tokenized_sentences = [ViTokenizer.tokenize(sent) for sent in sentences]
|
|
45 |
```python
|
46 |
from sentence_transformers import CrossEncoder
|
47 |
model = CrossEncoder('itdainb/vietnamese-cross-encoder', max_length=256)
|
48 |
-
scores = model.predict(
|
49 |
```
|
50 |
|
51 |
## Usage with transformers
|
52 |
|
53 |
```python
|
54 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
|
55 |
import torch
|
56 |
|
57 |
model = AutoModelForSequenceClassification.from_pretrained('itdainb/vietnamese-cross-encoder')
|
58 |
tokenizer = AutoTokenizer.from_pretrained('itdainb/vietnamese-cross-encoder')
|
59 |
|
60 |
-
|
|
|
|
|
61 |
|
62 |
model.eval()
|
63 |
with torch.no_grad():
|
64 |
-
|
|
|
|
|
|
|
65 |
print(scores)
|
66 |
```
|
|
|
38 |
|
39 |
tokenized_query = ViTokenizer.tokenize(query)
|
40 |
tokenized_sentences = [ViTokenizer.tokenize(sent) for sent in sentences]
|
41 |
+
|
42 |
+
tokenized_pairs = [[tokenized_query, sent] for sent in tokenized_sentences]
|
43 |
```
|
44 |
|
45 |
## Usage with sentence-transformers
|
|
|
47 |
```python
|
48 |
from sentence_transformers import CrossEncoder
|
49 |
model = CrossEncoder('itdainb/vietnamese-cross-encoder', max_length=256)
|
50 |
+
scores = model.predict(tokenized_pairs)
|
51 |
```
|
52 |
|
53 |
## Usage with transformers
|
54 |
|
55 |
```python
|
56 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
57 |
+
|
58 |
import torch
|
59 |
|
60 |
model = AutoModelForSequenceClassification.from_pretrained('itdainb/vietnamese-cross-encoder')
|
61 |
tokenizer = AutoTokenizer.from_pretrained('itdainb/vietnamese-cross-encoder')
|
62 |
|
63 |
+
activation_fct = torch.nn.Identity()
|
64 |
+
|
65 |
+
features = tokenizer(*tokenized_pairs, padding=True, truncation="longest_first", return_tensors="pt", max_length=tokenizer.config.max_length)
|
66 |
|
67 |
model.eval()
|
68 |
with torch.no_grad():
|
69 |
+
model_predictions = self.model(**features, return_dict=True)
|
70 |
+
logits = activation_fct(model_predictions.logits)
|
71 |
+
|
72 |
+
scores = [score[0] for score in logits]
|
73 |
print(scores)
|
74 |
```
|