Implement Min P as a sampler option in HF loaders (#4449)

2023-11-02 14:32:51 -05:00 · 2023-11-02 14:32:51 -05:00 · 367e5e6e43
commit 367e5e6e43
parent fcb7017b7a
7 changed files with 43 additions and 1 deletions
--- a/modules/sampler_hijack.py
+++ b/modules/sampler_hijack.py
@ -12,6 +12,34 @@ from transformers.generation.logits_process import (

 global_scores = None

+class MinPLogitsWarper(LogitsWarper):
+    def __init__(self, min_p: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
+        if min_p < 0 or min_p > 1.0:
+            raise ValueError(f"`min_p` has to be a float >= 0 and <= 1, but is {min_p}")
+        self.min_p = min_p
+        self.filter_value = filter_value
+        self.min_tokens_to_keep = min_tokens_to_keep
+
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+        # Convert logits to probabilities
+        probs = torch.softmax(scores, dim=-1)
+        # Get the probability of the top token for each sequence in the batch
+        top_probs, _ = probs.max(dim=-1, keepdim=True)
+        # Calculate the actual min_p threshold by scaling min_p with the top token's probability
+        scaled_min_p = self.min_p * top_probs
+        # Create a mask for tokens that have a probability less than the scaled min_p
+        tokens_to_remove = probs < scaled_min_p
+
+        sorted_indices = torch.argsort(scores, descending=True, dim=-1)
+        sorted_indices_to_remove = torch.gather(tokens_to_remove, dim=-1, index=sorted_indices)
+
+        if self.min_tokens_to_keep > 1:
+            # Keep at least min_tokens_to_keep
+            sorted_indices_to_remove[..., : self.min_tokens_to_keep] = False
+
+        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+        scores = scores.masked_fill(indices_to_remove, self.filter_value)
+        return scores

 class TailFreeLogitsWarper(LogitsWarper):
    def __init__(self, tfs: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
@ -190,6 +218,8 @@ def get_logits_warper_patch(self, generation_config):
            warpers_to_add.append(TailFreeLogitsWarper(tfs=generation_config.tfs, min_tokens_to_keep=min_tokens_to_keep))
        if generation_config.top_a is not None and 0.0 <= generation_config.top_a <= 1.0:
            warpers_to_add.append(TopALogitsWarper(top_a=generation_config.top_a, min_tokens_to_keep=min_tokens_to_keep))
+        if generation_config.min_p is not None and 0.0 <= generation_config.min_p <= 1.0:
+            warpers_to_add.append(MinPLogitsWarper(min_p=generation_config.min_p, min_tokens_to_keep=min_tokens_to_keep))

    if warpers and isinstance(warpers[-1], LogitNormalization):
        warpers = warpers[:-1] + warpers_to_add + [warpers[-1]]
@ -223,6 +253,7 @@ def get_logits_processor_patch(self, **kwargs):
 def generation_config_init_patch(self, **kwargs):
    self.__init___old(**kwargs)
    self.tfs = kwargs.pop("tfs", 1.0)
+    self.min_p = kwargs.pop("min_p", 0.0)
    self.top_a = kwargs.pop("top_a", 0.0)
    self.mirostat_mode = kwargs.pop("mirostat_mode", 0)
    self.mirostat_eta = kwargs.pop("mirostat_eta", 0.1)