Add RoPE scaling support for transformers (including dynamic NTK)

https://github.com/huggingface/transformers/pull/24653
2023-08-08 21:24:28 -07:00 · 2023-08-08 21:24:28 -07:00 · d8fb506aff
commit d8fb506aff
parent f4caaf337a
5 changed files with 16 additions and 9 deletions
--- a/modules/shared.py
+++ b/modules/shared.py
@ -164,7 +164,7 @@ parser.add_argument('--rwkv-cuda-on', action='store_true', help='RWKV: Compile t

 # RoPE
 parser.add_argument('--compress_pos_emb', type=int, default=1, help="Positional embeddings compression factor. Should typically be set to max_seq_len / 2048.")
-parser.add_argument('--alpha_value', type=int, default=1, help="Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both.")
+parser.add_argument('--alpha_value', type=int, default=1, help="Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both.")

 # Gradio
 parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.')