Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 5,678 Bytes
77d5469 87e5c9c 21c203b 77d5469 87e5c9c c006617 87e5c9c 8312087 87e5c9c c006617 87e5c9c c006617 87e5c9c c006617 87e5c9c 9350787 87e5c9c 21c203b 9350787 2956200 21c203b 87e5c9c 9350787 2956200 8312087 87e5c9c 7813441 c006617 87e5c9c 9350787 c006617 87e5c9c c006617 87e5c9c 9350787 87e5c9c 77d5469 87e5c9c c006617 87e5c9c c006617 87e5c9c c006617 87e5c9c 55b49e6 87e5c9c 55b49e6 87e5c9c c006617 87e5c9c c006617 87e5c9c 77d5469 c006617 77d5469 c006617 77d5469 87e5c9c 5dfe75e 87e5c9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
"""
summarize - a module for summarizing text using a model from the Hugging Face model hub
"""
import logging
import os
import pprint as pp
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")
import torch
from tqdm.auto import tqdm
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from utils import validate_pytorch2
def load_model_and_tokenizer(model_name: str) -> tuple:
"""
load_model_and_tokenizer - load a model and tokenizer from a model name/ID on the hub
:param str model_name: the model name/ID on the hub
:return tuple: a tuple containing the model and tokenizer
"""
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModelForSeq2SeqLM.from_pretrained(
model_name,
use_auth_token=os.environ.get("HF_TOKEN", None),
).to(device)
model = model.eval()
tokenizer = AutoTokenizer.from_pretrained(
model_name,
use_auth_token=os.environ.get("HF_TOKEN", None),
)
logging.info(f"Loaded model {model_name} to {device}")
if validate_pytorch2():
try:
logging.info("Compiling model with Torch 2.0")
model = torch.compile(model)
except Exception as e:
logging.warning(f"Could not compile model with Torch 2.0: {e}")
else:
logging.info("Torch 2.0 not detected, skipping compilation")
return model, tokenizer
def summarize_and_score(
ids, mask, model, tokenizer, is_general_attention_model=True, **kwargs
) -> tuple:
"""
summarize_and_score - given a batch of ids and a mask, return a summary and a score for the summary
Args:
ids (): the batch of ids
mask (): the attention mask for the batch
model (): the model to use for summarization
tokenizer (): the tokenizer to use for summarization
is_general_attention_model (bool, optional): whether the model is a general attention model. Defaults to True.
**kwargs: any additional arguments to pass to the model
Returns:
tuple (str, float): the summary, the score for the summary
"""
ids = ids[None, :]
mask = mask[None, :]
input_ids = ids.to("cuda") if torch.cuda.is_available() else ids
attention_mask = mask.to("cuda") if torch.cuda.is_available() else mask
global_attention_mask = torch.zeros_like(attention_mask)
# put global attention on <s> token
global_attention_mask[:, 0] = 1
if is_general_attention_model:
summary_pred_ids = model.generate(
input_ids,
attention_mask=attention_mask,
output_scores=True,
return_dict_in_generate=True,
**kwargs,
)
else:
summary_pred_ids = model.generate(
input_ids,
attention_mask=attention_mask,
global_attention_mask=global_attention_mask,
output_scores=True,
return_dict_in_generate=True,
**kwargs,
)
summary = tokenizer.batch_decode(
summary_pred_ids.sequences,
skip_special_tokens=True,
remove_invalid_values=True,
)
score = round(summary_pred_ids.sequences_scores.cpu().numpy()[0], 4)
return summary, score
def summarize_via_tokenbatches(
input_text: str,
model,
tokenizer,
batch_length=2048,
batch_stride=16,
min_batch_length=512,
**kwargs,
) -> list:
"""
summarize_via_tokenbatches - summarize a long string via batches of tokens
Args:
input_text (str): the text to summarize
model (): the model to use for summarization
tokenizer (): the tokenizer to use for summarization
batch_length (int, optional): the length of each batch. Defaults to 2048.
batch_stride (int, optional): the stride of each batch. Defaults to 16. The stride is the number of tokens that overlap between batches.
min_batch_length (int, optional): the minimum length of each batch. Defaults to 512.
**kwargs: any additional arguments to pass to the model for inference
Returns:
list: a list of dictionaries containing the input tokens, the summary, and the summary score
"""
logger = logging.getLogger(__name__)
# log all input parameters
if batch_length < min_batch_length:
logger.warning(
f"batch_length must be at least {min_batch_length}. Setting batch_length to {min_batch_length}"
)
batch_length = min_batch_length
logger.info(f"input parameters:\n{pp.pformat(kwargs)}")
logger.info(f"batch_length: {batch_length}, batch_stride: {batch_stride}")
encoded_input = tokenizer(
input_text,
padding="max_length",
truncation=True,
max_length=batch_length,
stride=batch_stride,
return_overflowing_tokens=True,
add_special_tokens=False,
return_tensors="pt",
)
in_id_arr, att_arr = encoded_input.input_ids, encoded_input.attention_mask
gen_summaries = []
pbar = tqdm(total=len(in_id_arr))
for _id, _mask in zip(in_id_arr, att_arr):
result, score = summarize_and_score(
ids=_id,
mask=_mask,
model=model,
tokenizer=tokenizer,
**kwargs,
)
score = round(float(score), 4)
_sum = {
"input_tokens": _id,
"summary": result,
"summary_score": score,
}
gen_summaries.append(_sum)
logger.debug(f"Score for batch: {score}. num chars: {len(repr(result))}")
logger.debug(f"Summary:\n\t{result}")
pbar.update()
pbar.close()
return gen_summaries
|