Implement CFG for ExLlama_HF (#3666)

This commit is contained in:
oobabooga 2023-08-24 16:27:36 -03:00 committed by GitHub
parent 2b675533f7
commit d6934bc7bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 122 additions and 26 deletions

View file

@ -147,6 +147,7 @@ parser.add_argument('--disable_exllama', action='store_true', help='Disable ExLl
# ExLlama
parser.add_argument('--gpu-split', type=str, help="Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. 20,7,7")
parser.add_argument('--max_seq_len', type=int, default=2048, help="Maximum sequence length.")
parser.add_argument('--cfg-cache', action='store_true', help="ExLlama_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader, but not necessary for CFG with base ExLlama.")
# DeepSpeed
parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.')