Implement CFG for ExLlama_HF (#3666)

2023-08-24 16:27:36 -03:00 · 2023-08-24 16:27:36 -03:00 · d6934bc7bc
commit d6934bc7bc
parent 2b675533f7
8 changed files with 122 additions and 26 deletions
--- a/modules/exllama_hf.py
+++ b/modules/exllama_hf.py
@ -29,10 +29,16 @@ class ExllamaHF(PreTrainedModel):
        super().__init__(PretrainedConfig())
        self.ex_config = config
        self.ex_model = ExLlama(self.ex_config)
-        self.ex_cache = ExLlamaCache(self.ex_model)
        self.generation_config = GenerationConfig()
        self.lora = None

+        self.ex_cache = ExLlamaCache(self.ex_model)
+        self.past_seq = None
+
+        if shared.args.cfg_cache:
+            self.ex_cache_negative = ExLlamaCache(self.ex_model)
+            self.past_seq_negative = None
+
    def _validate_model_class(self):
        pass

@ -47,25 +53,46 @@ class ExllamaHF(PreTrainedModel):
        return torch.device(0)

    def __call__(self, *args, **kwargs):
-        input_ids = args[0] if len(args) > 0 else kwargs['input_ids']
        use_cache = kwargs.get('use_cache', True)
        labels = kwargs.get('labels', None)
-        cache = kwargs.get('past_key_values', None)
-        seq = input_ids[0].tolist()
+        past_key_values = kwargs.get('past_key_values', None)

-        if labels is None:
-            if cache is None:
-                self.ex_cache.current_seq_len = 0
-                cache = self.ex_cache
-                self.ex_model.forward(torch.tensor([seq[:-1]], dtype=torch.long), cache, preprocess_only=True, lora=self.lora)
+        if len(args) > 0:
+            if not shared.args.cfg_cache:
+                logger.error("Please enable the cfg-cache option to use CFG with ExLlama_HF.")
+                return

-            logits = self.ex_model.forward(torch.tensor([seq[-1:]], dtype=torch.long), cache, lora=self.lora).to(input_ids.device)
+            input_ids = args[0]
+            is_negative = True
+            past_seq = self.past_seq_negative
+            ex_cache = self.ex_cache_negative
        else:
-            if cache is None:
-                self.ex_cache.current_seq_len = 0
-                cache = self.ex_cache
+            input_ids = kwargs['input_ids']
+            is_negative = False
+            past_seq = self.past_seq
+            ex_cache = self.ex_cache

-            logits = self.ex_model.forward(torch.tensor([seq], dtype=torch.long), cache, last_id_only=False, lora=self.lora)
+        seq = input_ids[0].tolist()
+        if is_negative and past_key_values is not None:
+            seq = past_key_values + seq
+
+        seq_tensor = torch.tensor(seq)
+
+        # Make the forward call
+        if labels is None:
+            if past_seq is None or not torch.equal(past_seq, seq_tensor[:-1]):
+                ex_cache.current_seq_len = 0
+                self.ex_model.forward(torch.tensor([seq[:-1]], dtype=torch.long), ex_cache, preprocess_only=True, lora=self.lora)
+
+            logits = self.ex_model.forward(torch.tensor([seq[-1:]], dtype=torch.long), ex_cache, lora=self.lora).to(input_ids.device)
+        else:
+            ex_cache.current_seq_len = 0
+            logits = self.ex_model.forward(torch.tensor([seq], dtype=torch.long), ex_cache, last_id_only=False, lora=self.lora)
+
+        if is_negative:
+            self.past_seq_negative = seq_tensor
+        else:
+            self.past_seq = seq_tensor

        loss = None
        if labels is not None:
@ -80,7 +107,7 @@ class ExllamaHF(PreTrainedModel):
            shift_labels = shift_labels.to(shift_logits.device)
            loss = loss_fct(shift_logits, shift_labels)

-        return CausalLMOutputWithPast(logits=logits, past_key_values=cache if use_cache else None, loss=loss)
+        return CausalLMOutputWithPast(logits=logits, past_key_values=seq if use_cache else None, loss=loss)

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs):