Add RoPE scaling support for transformers (including dynamic NTK)

https://github.com/huggingface/transformers/pull/24653
This commit is contained in:
oobabooga 2023-08-08 21:24:28 -07:00
parent f4caaf337a
commit d8fb506aff
5 changed files with 16 additions and 9 deletions

View file

@ -164,7 +164,7 @@ parser.add_argument('--rwkv-cuda-on', action='store_true', help='RWKV: Compile t
# RoPE
parser.add_argument('--compress_pos_emb', type=int, default=1, help="Positional embeddings compression factor. Should typically be set to max_seq_len / 2048.")
parser.add_argument('--alpha_value', type=int, default=1, help="Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both.")
parser.add_argument('--alpha_value', type=int, default=1, help="Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both.")
# Gradio
parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.')