from typing import Optional, Union

import torch
import transformers


class Generator:
    def __init__(self, lm_model_name, device, entropy=None):

        self.device = device

        self.tokenizer = transformers.AutoTokenizer.from_pretrained(
            lm_model_name
        )
        self.lm = transformers.AutoModelForCausalLM.from_pretrained(
            lm_model_name
        ).to(device)
        self.lm.eval()

        self.lm.config.pad_token_id = self.lm.config.eos_token_id
        self.tokenizer.add_special_tokens(
            {"pad_token": self.tokenizer.decode(self.lm.config.eos_token_id)}
        )
        self.caif_sampler = None
        self.ordinary_sampler = None
        self.entropy_based_stats = {
            "skips": 0,
            "avg_entropy": 0,
            "count": 0,
        }
        self.entropy = entropy

    def set_caif_sampler(self, sampler):
        self.caif_sampler = sampler

    def set_ordinary_sampler(self, sampler):
        self.ordinary_sampler = sampler

    def sample_sequences(
        self,
        num_samples: int,
        input_prompt: Optional[str],
        max_length: int,
        caif_period: int,
        caif_tokens_num: Union[int, None] = None,
        entropy: float = None,
        **sampler_kwargs
    ):
        self.entropy = entropy

        input_ids, past, ended_sequences = self.get_input_ids(
            input_prompt,
            num_samples,
        )

        for i in range(max_length):
            is_caif_step = (
                i % caif_period == 0 and self.caif_sampler is not None
            )
            input_ids, past, ended_sequences = self.generation_step(
                input_ids,
                past,
                ended_sequences,
                is_caif_step,
                caif_tokens_num=caif_tokens_num,
                **sampler_kwargs
            )
            if ended_sequences.all():
                break

        return (
            [
                self.tokenizer.decode(sequence, skip_special_tokens=True)
                for sequence in input_ids
            ],
            input_ids,
        )

    def generation_step(
        self,
        input_ids,
        past,
        ended_sequences,
        is_caif_step: bool,
        caif_tokens_num=None,
        **sampler_kwargs
    ):
        prepared_inputs = self.lm.prepare_inputs_for_generation(
            input_ids, past, use_cache=True
        )
        outputs = self.lm(
            **prepared_inputs,
            output_attentions=False,
            output_hidden_states=False,
            return_dict=True
        )

        past = outputs.past_key_values
        if self.entropy is not None:
            normalized = torch.nn.functional.log_softmax(
                outputs.logits, dim=-1
            )
            p = torch.exp(normalized)
            output_probs = p
            output_information = -normalized
            output_entropy = (output_probs * output_information).sum(-1)[:, -1]
            batch_size = output_entropy.shape[0]
            caif_mask = torch.ge(output_entropy, self.entropy)
            ordinary_mask = ~caif_mask
            self.entropy_based_stats["skips"] += caif_mask.sum() / batch_size
            self.entropy_based_stats["count"] += 1
            self.entropy_based_stats["avg_entropy"] += (
                output_entropy.sum() / batch_size
            )
            flatten_entropy = output_entropy.view(-1).cpu().tolist()
            if "entropy" not in self.entropy_based_stats.keys():
                self.entropy_based_stats["entropy"] = flatten_entropy
            else:
                self.entropy_based_stats["entropy"] += flatten_entropy

            if caif_mask.sum() == 0:
                next_tokens_sampler = self.ordinary_sampler
                next_tokens = next_tokens_sampler(
                    input_ids,
                    outputs.logits,
                    caif_tokens_num=caif_tokens_num,
                    **sampler_kwargs
                )
                next_tokens = (
                    next_tokens * (1 - ended_sequences.long())
                    + self.lm.config.eos_token_id * ended_sequences.long()
                ).long()

            elif caif_mask.sum() == batch_size:
                next_tokens_sampler = self.caif_sampler
                next_tokens = next_tokens_sampler(
                    input_ids,
                    outputs.logits,
                    caif_tokens_num=caif_tokens_num,
                    **sampler_kwargs
                )
                next_tokens = (
                    next_tokens * (1 - ended_sequences.long())
                    + self.lm.config.eos_token_id * ended_sequences.long()
                ).long()

            else:
                next_tokens_caif = self.caif_sampler(
                    input_ids[caif_mask],
                    outputs.logits[caif_mask],
                    caif_tokens_num=caif_tokens_num,
                    **sampler_kwargs
                )
                next_tokens_ordinary = self.ordinary_sampler(
                    input_ids[ordinary_mask],
                    outputs.logits[ordinary_mask],
                    caif_tokens_num=caif_tokens_num,
                    **sampler_kwargs
                )
                next_tokens_caif = (
                    next_tokens_caif * (1 - ended_sequences[caif_mask].long())
                    + self.lm.config.eos_token_id
                    * ended_sequences[caif_mask].long()
                ).long()
                next_tokens_ordinary = (
                    next_tokens_ordinary
                    * (1 - ended_sequences[ordinary_mask].long())
                    + self.lm.config.eos_token_id
                    * ended_sequences[ordinary_mask].long()
                ).long()

                next_tokens = torch.ones(batch_size).long().to(self.device)
                next_tokens[caif_mask] = next_tokens_caif
                next_tokens[ordinary_mask] = next_tokens_ordinary
        else:
            if is_caif_step:
                next_tokens_sampler = self.caif_sampler
            else:
                next_tokens_sampler = self.ordinary_sampler

            next_tokens = next_tokens_sampler(
                input_ids,
                outputs.logits,
                caif_tokens_num=caif_tokens_num,
                **sampler_kwargs
            )

            next_tokens = (
                next_tokens * (1 - ended_sequences.long())
                + self.lm.config.eos_token_id * ended_sequences.long()
            ).long()

        input_ids = torch.cat(
            [input_ids, next_tokens[:, None].to(self.device)], dim=-1
        )

        ended_sequences += next_tokens == self.lm.config.eos_token_id

        return input_ids, past, ended_sequences

    def get_input_ids(self, input_prompt, num_samples):
        input_ids = torch.tensor([[self.lm.config.bos_token_id]])
        if input_prompt is not None:
            input_prompt = self.tokenizer(
                input_prompt, return_tensors="pt"
            ).input_ids
            input_ids = torch.cat([input_ids, input_prompt], 1)
        input_ids = input_ids.repeat(num_samples, 1).to(self.device)
        past = None
        ended_sequences = torch.zeros(
            input_ids.shape[0], device=self.device
        ).bool()

        return input_ids, past, ended_sequences

    @staticmethod
    def sample(unscaled_probs, values):
        samples = torch.multinomial(unscaled_probs, 1)
        return torch.take_along_dim(values, samples, dim=1)