Remove flexgen 2
This commit is contained in:
parent
75c2dd38cf
commit
77d2e9f060
4 changed files with 1 additions and 16 deletions
|
|
@ -155,11 +155,6 @@ parser.add_argument('--desc_act', action='store_true', help='For models that don
|
|||
parser.add_argument('--gpu-split', type=str, help="Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. 20,7,7")
|
||||
parser.add_argument('--max_seq_len', type=int, default=2048, help="Maximum sequence length.")
|
||||
|
||||
# FlexGen
|
||||
parser.add_argument('--percent', type=int, nargs="+", default=[0, 100, 100, 0, 100, 0], help='FlexGen: allocation percentages. Must be 6 numbers separated by spaces (default: 0, 100, 100, 0, 100, 0).')
|
||||
parser.add_argument("--compress-weight", action="store_true", help="FlexGen: activate weight compression.")
|
||||
parser.add_argument("--pin-weight", type=str2bool, nargs="?", const=True, default=True, help="FlexGen: whether to pin weights (setting this to False reduces CPU memory by 20%%).")
|
||||
|
||||
# DeepSpeed
|
||||
parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.')
|
||||
parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory to use for ZeRO-3 NVME offloading.')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue