Spaces:
Sleeping
Sleeping
File size: 1,486 Bytes
bfadc34 3456a58 bfadc34 3456a58 bfadc34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Load the DistilBART-CNN-12-6 model
# loading the model outside of the function makes it faster
SUMMARIZATION_MODEL = "sshleifer/distilbart-cnn-12-6"
tokenizer = AutoTokenizer.from_pretrained(SUMMARIZATION_MODEL)
model = AutoModelForSeq2SeqLM.from_pretrained(SUMMARIZATION_MODEL, device_map="cuda:0")
def summarize(text, max_len=20):
"""
Summarizes the given text using the DistilBART-CNN-12-6 model.
Args:
text (str): The text to be summarized.
max_length (int, optional): The maximum length of the summary. Defaults to 20.
Returns:
str: The summarized text.
"""
inputs = tokenizer(text,
return_tensors="pt",
max_length=max_len,
truncation=True,
).input_ids
# Move the inputs tensor to the same device as the model tensor
inputs = inputs.cuda()
outputs = model.generate(inputs,
max_new_tokens=100,
num_beams=8,
length_penalty=0.2,
early_stopping=False
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
def summarizePipeline(text):
from transformers import pipeline
pipe = pipeline(
"summarization",
model=model,
tokenizer=tokenizer,
)
return pipe(text)[0]["summary_text"]
|