Improve the UI tokenizer

2023-09-15 19:30:44 -07:00 · 2023-09-15 19:30:44 -07:00 · ef04138bc0
commit ef04138bc0
parent c3e4c9fdc2
4 changed files with 21 additions and 6 deletions
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@ -144,6 +144,17 @@ def get_encoded_length(prompt):
    return len(encode(prompt)[0])


+def get_token_ids(prompt):
+    tokens = encode(prompt)[0]
+    decoded_tokens = [shared.tokenizer.decode(i) for i in tokens]
+
+    output = ''
+    for row in list(zip(tokens, decoded_tokens)):
+        output += f"{str(int(row[0])).ljust(5)}  -  {row[1]}\n"
+
+    return output
+
+
 def get_max_prompt_length(state):
    return state['truncation_length'] - state['max_new_tokens']