Add RoPE scaling support for transformers (including dynamic NTK)
https://github.com/huggingface/transformers/pull/24653
This commit is contained in:
parent
f4caaf337a
commit
d8fb506aff
5 changed files with 16 additions and 9 deletions
|
@ -164,7 +164,7 @@ parser.add_argument('--rwkv-cuda-on', action='store_true', help='RWKV: Compile t
|
|||
|
||||
# RoPE
|
||||
parser.add_argument('--compress_pos_emb', type=int, default=1, help="Positional embeddings compression factor. Should typically be set to max_seq_len / 2048.")
|
||||
parser.add_argument('--alpha_value', type=int, default=1, help="Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both.")
|
||||
parser.add_argument('--alpha_value', type=int, default=1, help="Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both.")
|
||||
|
||||
# Gradio
|
||||
parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue