README updates and improvements (#3198)

This commit is contained in:
Eve 2023-07-25 17:58:13 -04:00 committed by GitHub
parent b09e4f10fd
commit f653546484
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 38 additions and 37 deletions

View file

@ -153,8 +153,6 @@ parser.add_argument('--desc_act', action='store_true', help='For models that don
# ExLlama
parser.add_argument('--gpu-split', type=str, help="Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. 20,7,7")
parser.add_argument('--max_seq_len', type=int, default=2048, help="Maximum sequence length.")
parser.add_argument('--compress_pos_emb', type=int, default=1, help="Positional embeddings compression factor. Should typically be set to max_seq_len / 2048.")
parser.add_argument('--alpha_value', type=int, default=1, help="Positional embeddings alpha factor for NTK RoPE scaling. Same as above. Use either this or compress_pos_emb, not both.")
# FlexGen
parser.add_argument('--flexgen', action='store_true', help='DEPRECATED')
@ -171,6 +169,10 @@ parser.add_argument('--local_rank', type=int, default=0, help='DeepSpeed: Option
parser.add_argument('--rwkv-strategy', type=str, default=None, help='RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8".')
parser.add_argument('--rwkv-cuda-on', action='store_true', help='RWKV: Compile the CUDA kernel for better performance.')
# RoPE
parser.add_argument('--compress_pos_emb', type=int, default=1, help="Positional embeddings compression factor. Should typically be set to max_seq_len / 2048.")
parser.add_argument('--alpha_value', type=int, default=1, help="Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both.")
# Gradio
parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.')
parser.add_argument('--listen-host', type=str, help='The hostname that the server will use.')