Add Support for Static NTK RoPE scaling for exllama/exllama_hf (#2955)
This commit is contained in:
parent
1610d5ffb2
commit
10c8c197bf
7 changed files with 18 additions and 2 deletions
|
@ -63,9 +63,11 @@ def list_model_elements():
|
|||
'llama_cpp_seed',
|
||||
'gpu_split',
|
||||
'max_seq_len',
|
||||
'compress_pos_emb'
|
||||
'compress_pos_emb',
|
||||
'alpha_value'
|
||||
]
|
||||
|
||||
|
||||
for i in range(torch.cuda.device_count()):
|
||||
elements.append(f'gpu_memory_{i}')
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue