Add Support for Static NTK RoPE scaling for exllama/exllama_hf (#2955)

2023-07-04 00:13:16 -04:00 · 2023-07-04 00:13:16 -04:00 · 10c8c197bf
commit 10c8c197bf
parent 1610d5ffb2
7 changed files with 18 additions and 2 deletions
--- a/modules/shared.py
+++ b/modules/shared.py
@ -150,6 +150,7 @@ parser.add_argument('--desc_act', action='store_true', help='For models that don
 parser.add_argument('--gpu-split', type=str, help="Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. 20,7,7")
 parser.add_argument('--max_seq_len', type=int, default=2048, help="Maximum sequence length.")
 parser.add_argument('--compress_pos_emb', type=int, default=1, help="Positional embeddings compression factor. Should typically be set to max_seq_len / 2048.")
+parser.add_argument('--alpha_value', type=int, default=1, help="Positional embeddings alpha factor for NTK RoPE scaling. Same as above. Use either this or compress_pos_emb, not both.")

 # FlexGen
 parser.add_argument('--flexgen', action='store_true', help='DEPRECATED')