diff --git a/README.md b/README.md
index e993e33..7968a56 100644
--- a/README.md
+++ b/README.md
@@ -139,9 +139,9 @@ Optionally, you can use the following command-line flags:
 | `--load-in-8bit`  | Load the model with 8-bit precision.|
 | `--auto-devices` | Automatically split the model across the available GPU(s) and CPU.|
 | `--disk` | If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. |
-| `--disk-cache-dir DISK_CACHE_DIR` | Directory which you want the disk cache to load to. |
+| `--disk-cache-dir DISK_CACHE_DIR` | Directory to save the disk cache to. Defaults to `cache/`. |
 | `--gpu-memory GPU_MEMORY` | Maximum GPU memory in GiB to allocate. This is useful if you get out of memory errors while trying to generate text. Must be an integer number. |
-| `--cpu-memory CPU_MEMORY`    | Maximum CPU memory in GiB to allocate for offloaded weights. Must be an integer number. |
+| `--cpu-memory CPU_MEMORY`    | Maximum CPU memory in GiB to allocate for offloaded weights. Must be an integer number. Defaults to 99 GiB.|
 | `--no-stream`   | Don't stream the text output in real time. This slightly improves the text generation performance.|
 | `--settings SETTINGS_FILE` | Load the default interface settings from this json file. See `settings-template.json` for an example.|
 | `--listen`   | Make the web UI reachable from your local network.|
diff --git a/html_generator.py b/html_generator.py
index 9b0ed3b..93540ff 100644
--- a/html_generator.py
+++ b/html_generator.py
@@ -6,6 +6,7 @@ This is a library for formatting GPT-4chan and chat outputs as nice HTML.
 
 import re
 from pathlib import Path
+import copy
 
 def generate_basic_html(s):
     s = '\n'.join([f'<p style="margin-bottom: 20px">{line}</p>' for line in s.split('\n')])
@@ -160,7 +161,7 @@ def generate_4chan_html(f):
 
     return output
 
-def generate_chat_html(history, name1, name2, character):
+def generate_chat_html(_history, name1, name2, character):
     css = """
     .chat {
       margin-left: auto;
@@ -233,6 +234,13 @@ def generate_chat_html(history, name1, name2, character):
             img = f'<img src="file/{i}">'
             break
 
+    history = copy.deepcopy(_history)
+    for i in range(len(history)):
+        if '<|BEGIN-VISIBLE-CHAT|>' in history[i][0]:
+            history[i][0] = history[i][0].replace('<|BEGIN-VISIBLE-CHAT|>', '')
+            history = history[i:]
+            break
+
     for i,_row in enumerate(history[::-1]):
         row = _row.copy()
         row[0] = re.sub(r"[\\]*\*", r"*", row[0])
diff --git a/server.py b/server.py
index b850ba2..69234ed 100644
--- a/server.py
+++ b/server.py
@@ -26,9 +26,9 @@ parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate
 parser.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision.')
 parser.add_argument('--auto-devices', action='store_true', help='Automatically split the model across the available GPU(s) and CPU.')
 parser.add_argument('--disk', action='store_true', help='If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.')
-parser.add_argument('--disk-cache-dir', type=str, help='Directory which you want the disk cache to load to.')
+parser.add_argument('--disk-cache-dir', type=str, help='Directory to save the disk cache to. Defaults to "cache/".')
 parser.add_argument('--gpu-memory', type=int, help='Maximum GPU memory in GiB to allocate. This is useful if you get out of memory errors while trying to generate text. Must be an integer number.')
-parser.add_argument('--cpu-memory', type=int, help='Maximum CPU memory in GiB to allocate for offloaded weights. Must be an integer number.')
+parser.add_argument('--cpu-memory', type=int, help='Maximum CPU memory in GiB to allocate for offloaded weights. Must be an integer number. Defaults to 99 GiB.')
 parser.add_argument('--no-stream', action='store_true', help='Don\'t stream the text output in real time. This slightly improves the text generation performance.')
 parser.add_argument('--settings', type=str, help='Load the default interface settings from this json file. See settings-template.json for an example.')
 parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.')
@@ -262,6 +262,7 @@ if args.chat or args.cai_chat:
             rows.pop(1)
 
         question = ''.join(rows)
+        question = question.replace('<|BEGIN-VISIBLE-CHAT|>', '')
         return question
 
     def chatbot_wrapper(text, tokens, inference_settings, selected_model, name1, name2, context, check, history_size):
@@ -336,6 +337,26 @@ if args.chat or args.cai_chat:
         global history
         history = json.loads(file.decode('utf-8'))['data']
 
+    def tokenize_example_dialogue(dialogue, name1, name2):
+        dialogue = re.sub('<START>', '', dialogue)
+        dialogue = re.sub('(\n|^)[Aa]non:', '\\1You:', dialogue)
+
+        idx = [m.start() for m in re.finditer(f"(^|\n)({name1}|{name2}):", dialogue)]
+        messages = []
+        for i in range(len(idx)-1):
+            messages.append(dialogue[idx[i]:idx[i+1]].strip())
+        history = []
+        entry = ['', '']
+        for i in messages:
+            if i.startswith(f'{name1}:'):
+                entry[0] = i[len(f'{name1}:'):].strip()
+            elif i.startswith(f'{name2}:'):
+                entry[1] = i[len(f'{name2}:'):].strip()
+                if not (len(entry[0]) == 0 and len(entry[1]) == 0):
+                    history.append(entry)
+                entry = ['', '']
+        return history
+
     def load_character(_character, name1, name2):
         global history, character
         context = ""
@@ -351,9 +372,11 @@ if args.chat or args.cai_chat:
                 context += f"Scenario: {data['world_scenario']}\n"
             context = f"{context.strip()}\n<START>\n"
             if 'example_dialogue' in data and data['example_dialogue'] != '':
-                context += f"{data['example_dialogue'].strip()}\n"
-            if 'char_greeting' in data:
-                history = [['', data['char_greeting']]]
+                history = tokenize_example_dialogue(data['example_dialogue'], name1, name2)
+            if 'char_greeting' in data and len(data['char_greeting'].strip()) > 0:
+                history += [['<|BEGIN-VISIBLE-CHAT|>', data['char_greeting']]]
+            else:
+                history += [['<|BEGIN-VISIBLE-CHAT|>', "Hello there!"]]
         else:
             character = None
             context = settings['context_pygmalion']