dynatemp_low, dynatemp_high, dynatemp_exponent parameters (#5209)
This commit is contained in:
parent
dc1df22a2b
commit
29c2693ea0
9 changed files with 51 additions and 27 deletions
|
@ -156,7 +156,9 @@ def transformers_samplers():
|
|||
'temperature',
|
||||
'temperature_last',
|
||||
'dynamic_temperature',
|
||||
'dynamic_temperature_low',
|
||||
'dynatemp_low',
|
||||
'dynatemp_high',
|
||||
'dynatemp_exponent',
|
||||
'top_p',
|
||||
'min_p',
|
||||
'top_k',
|
||||
|
@ -223,7 +225,9 @@ loaders_samplers = {
|
|||
'temperature',
|
||||
'temperature_last',
|
||||
'dynamic_temperature',
|
||||
'dynamic_temperature_low',
|
||||
'dynatemp_low',
|
||||
'dynatemp_high',
|
||||
'dynatemp_exponent',
|
||||
'top_p',
|
||||
'min_p',
|
||||
'top_k',
|
||||
|
@ -277,7 +281,9 @@ loaders_samplers = {
|
|||
'temperature',
|
||||
'temperature_last',
|
||||
'dynamic_temperature',
|
||||
'dynamic_temperature_low',
|
||||
'dynatemp_low',
|
||||
'dynatemp_high',
|
||||
'dynatemp_exponent',
|
||||
'top_p',
|
||||
'min_p',
|
||||
'top_k',
|
||||
|
|
|
@ -6,6 +6,7 @@ import yaml
|
|||
|
||||
from modules import shared
|
||||
from modules.loaders import loaders_samplers
|
||||
from modules.logging_colors import logger
|
||||
|
||||
|
||||
def default_preset():
|
||||
|
@ -13,7 +14,9 @@ def default_preset():
|
|||
'temperature': 1,
|
||||
'temperature_last': False,
|
||||
'dynamic_temperature': False,
|
||||
'dynamic_temperature_low': 0.1,
|
||||
'dynatemp_low': 1,
|
||||
'dynatemp_high': 1,
|
||||
'dynatemp_exponent': 1,
|
||||
'top_p': 1,
|
||||
'min_p': 0,
|
||||
'top_k': 0,
|
||||
|
@ -48,11 +51,15 @@ def presets_params():
|
|||
def load_preset(name):
|
||||
generate_params = default_preset()
|
||||
if name not in ['None', None, '']:
|
||||
with open(Path(f'presets/{name}.yaml'), 'r') as infile:
|
||||
preset = yaml.safe_load(infile)
|
||||
path = Path(f'presets/{name}.yaml')
|
||||
if path.exists():
|
||||
with open(path, 'r') as infile:
|
||||
preset = yaml.safe_load(infile)
|
||||
|
||||
for k in preset:
|
||||
generate_params[k] = preset[k]
|
||||
for k in preset:
|
||||
generate_params[k] = preset[k]
|
||||
else:
|
||||
logger.error(f"The preset \"{name}\" does not exist under \"{path}\". Using the default parameters.")
|
||||
|
||||
return generate_params
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ global_scores = None
|
|||
|
||||
|
||||
class TemperatureLogitsWarperWithDynatemp(LogitsWarper):
|
||||
def __init__(self, temperature: float, dynamic_temperature: bool, dynamic_temperature_low: float):
|
||||
def __init__(self, temperature: float, dynamic_temperature: bool, dynatemp_low: float, dynatemp_high: float, dynatemp_exponent: float):
|
||||
if not isinstance(temperature, float) or not (temperature > 0):
|
||||
except_msg = (
|
||||
f"`temperature` (={temperature}) has to be a strictly positive float, otherwise your next token "
|
||||
|
@ -29,7 +29,9 @@ class TemperatureLogitsWarperWithDynatemp(LogitsWarper):
|
|||
|
||||
self.temperature = temperature
|
||||
self.dynamic_temperature = dynamic_temperature
|
||||
self.dynamic_temperature_low = dynamic_temperature_low
|
||||
self.dynatemp_low = dynatemp_low
|
||||
self.dynatemp_high = dynatemp_high
|
||||
self.dynatemp_exponent = dynatemp_exponent
|
||||
|
||||
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
|
||||
|
||||
|
@ -40,9 +42,9 @@ class TemperatureLogitsWarperWithDynatemp(LogitsWarper):
|
|||
|
||||
# Dynamic temperature
|
||||
else:
|
||||
min_temp = self.dynamic_temperature_low
|
||||
max_temp = self.temperature
|
||||
exponent_val = 1.0
|
||||
min_temp = self.dynatemp_low
|
||||
max_temp = self.dynatemp_high
|
||||
exponent_val = self.dynatemp_exponent
|
||||
|
||||
# Convert logits to probabilities
|
||||
probs = torch.softmax(scores, dim=-1)
|
||||
|
@ -82,7 +84,7 @@ class TemperatureLogitsWarperWithDynatemp(LogitsWarper):
|
|||
|
||||
# max_prob_token_id = torch.argmax(scores, dim=-1) # Get the token ID with the highest probability
|
||||
# max_prob_token = shared.tokenizer.convert_ids_to_tokens(int(max_prob_token_id)) # Convert ID to token
|
||||
# print("--- T=", float(dyn_temp), "token=", max_prob_token, "min=", min_temp, "max=", max_temp)
|
||||
# print("--- T=", float(dyn_temp), "token=", max_prob_token, "min=", min_temp, "max=", max_temp, "exponent=", exponent_val)
|
||||
|
||||
return scores
|
||||
|
||||
|
@ -292,7 +294,13 @@ def get_logits_warper_patch(self, generation_config):
|
|||
warpers = self._get_logits_warper_old(generation_config)
|
||||
for i in range(len(warpers)):
|
||||
if warpers[i].__class__.__name__ == 'TemperatureLogitsWarper':
|
||||
warpers[i] = TemperatureLogitsWarperWithDynatemp(temperature, generation_config.dynamic_temperature, generation_config.dynamic_temperature_low)
|
||||
warpers[i] = TemperatureLogitsWarperWithDynatemp(
|
||||
temperature,
|
||||
generation_config.dynamic_temperature,
|
||||
generation_config.dynatemp_low,
|
||||
generation_config.dynatemp_high,
|
||||
generation_config.dynatemp_exponent
|
||||
)
|
||||
|
||||
warpers_to_add = LogitsProcessorList()
|
||||
min_tokens_to_keep = 2 if generation_config.num_beams > 1 else 1
|
||||
|
@ -361,7 +369,9 @@ def generation_config_init_patch(self, **kwargs):
|
|||
self.__init___old(**kwargs)
|
||||
self.min_p = kwargs.pop("min_p", 0.0)
|
||||
self.dynamic_temperature = kwargs.pop("dynamic_temperature", False)
|
||||
self.dynamic_temperature_low = kwargs.pop("dynamic_temperature_low", 0.1)
|
||||
self.dynatemp_low = kwargs.pop("dynatemp_low", 1)
|
||||
self.dynatemp_high = kwargs.pop("dynatemp_high", 1)
|
||||
self.dynatemp_exponent = kwargs.pop("dynatemp_exponent", 1)
|
||||
self.tfs = kwargs.pop("tfs", 1.0)
|
||||
self.top_a = kwargs.pop("top_a", 0.0)
|
||||
self.mirostat_mode = kwargs.pop("mirostat_mode", 0)
|
||||
|
|
|
@ -285,7 +285,7 @@ def get_reply_from_output_ids(output_ids, state, starting_from=0):
|
|||
|
||||
def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False):
|
||||
generate_params = {}
|
||||
for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynamic_temperature_low', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping']:
|
||||
for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping']:
|
||||
generate_params[k] = state[k]
|
||||
|
||||
if state['negative_prompt'] != '':
|
||||
|
|
|
@ -116,7 +116,9 @@ def list_interface_input_elements():
|
|||
'temperature',
|
||||
'temperature_last',
|
||||
'dynamic_temperature',
|
||||
'dynamic_temperature_low',
|
||||
'dynatemp_low',
|
||||
'dynatemp_high',
|
||||
'dynatemp_exponent',
|
||||
'top_p',
|
||||
'min_p',
|
||||
'top_k',
|
||||
|
|
|
@ -49,8 +49,10 @@ def create_ui(default_preset):
|
|||
shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
|
||||
shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau')
|
||||
shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta')
|
||||
shared.gradio['dynamic_temperature_low'] = gr.Slider(0.01, 5, value=generate_params['dynamic_temperature_low'], step=0.01, label='dynamic_temperature_low', info='Only used when dynamic_temperature is checked.')
|
||||
shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature')
|
||||
shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature'])
|
||||
shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature'])
|
||||
shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=generate_params['dynamic_temperature'])
|
||||
shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Makes temperature the last sampler instead of the first.')
|
||||
shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
|
||||
shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')
|
||||
|
@ -97,6 +99,7 @@ def create_event_handlers():
|
|||
shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()))
|
||||
shared.gradio['random_preset'].click(presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()))
|
||||
shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'))
|
||||
shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'))
|
||||
|
||||
|
||||
def get_truncation_length():
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue