diff --git a/llm_server/routes/v1/generate_stream.py b/llm_server/routes/v1/generate_stream.py
index 6b96d99..43511db 100644
--- a/llm_server/routes/v1/generate_stream.py
+++ b/llm_server/routes/v1/generate_stream.py
@@ -63,29 +63,14 @@ def stream(ws):
                 except:
                     response_status_code = 0
 
-                # details = {}
-
-                # Initialize an empty byte string to store parts of the response
                 partial_response = b''
 
-                # Process each part of the response as it's received
                 for chunk in response.iter_content(chunk_size=1):
-                    # Add the chunk to the partial response
                     partial_response += chunk
-
-                    # If the partial response ends with a null character, parse it as JSON
                     if partial_response.endswith(b'\x00'):
-                        # Remove the null character and decode the byte string to a string
-                        json_str = partial_response[:-1].decode()
-
-                        # Parse the string as JSON
+                        json_str = partial_response[:-1].decode()  # Remove the null character and decode the byte string to a string
                         json_obj = json.loads(json_str)
-
-                        # Strip the input prompt from the response
-                        if generated_text:
-                            new = json_obj['text'][0].split(generated_text)[1]
-                        else:
-                            new = json_obj['text'][0].split(input_prompt)[1]
+                        new = json_obj['text'][0].split(input_prompt + generated_text)[1]
 
                         ws.send(json.dumps({
                             'event': 'text_stream',
@@ -95,9 +80,7 @@ def stream(ws):
                         message_num += 1
 
                         generated_text = generated_text + new
-
-                        # Reset the partial response
-                        partial_response = b''
+                        partial_response = b''  # Reset the partial response
 
                     # If there is no more data, break the loop
                     if not chunk: