als-apg · wenatuhs · Mar 2, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/src/osprey/interfaces/cli/direct_conversation.py b/src/osprey/interfaces/cli/direct_conversation.py
@@ -680,6 +680,12 @@ async def _process_user_input(self, user_input: str) -> bool:
                 # Create typed event handler for resume execution
                 handler = CLIEventHandler(console=self.console, verbose=self.show_streaming_updates)
 
+                # Streaming display mode configuration
+                from osprey.utils.config import get_streaming_mode
+
+                respond_streaming = get_streaming_mode("cli", "respond")
+                codegen_streaming = get_streaming_mode("cli", "python_code_generator")
+
                 # Track if we've streamed LLM response tokens
                 streamed_response = False
                 # Track code generation streaming
@@ -734,29 +740,37 @@ async def _process_user_input(self, user_input: str) -> bool:
 
                             # Route tokens by source node
                             if node_name == "python_code_generator":
-                                # Handle code generation streaming
-                                if not code_gen_active:
-                                    # Add role prefix (like respond does)
+                                if codegen_streaming == "disabled":
+                                    pass  # Skip code gen tokens entirely
+                                else:
+                                    # Handle code generation streaming (show mode)
+                                    if not code_gen_active:
+                                        # Add role prefix (like respond does)
+                                        self.console.print(
+                                            "\n[bold yellow]🤖 Assistant (Code Generator):[/bold yellow] ",
+                                            end="",
+                                        )
+                                        code_gen_active = True
+
+                                    # Stream token with dim color (shows it's thinking/intermediate)
                                     self.console.print(
-                                        "\n[bold yellow]🤖 Assistant (Code Generator):[/bold yellow] ",
-                                        end="",
+                                        f"[dim]{message_chunk.content}[/dim]", end=""
                                     )
-                                    code_gen_active = True
-
-                                # Stream token with dim color (shows it's thinking/intermediate)
-                                self.console.print(f"[dim]{message_chunk.content}[/dim]", end="")
 
-                                # Buffer for final panel (keep for potential future use)
-                                code_gen_buffer += message_chunk.content
+                                    # Buffer for final panel (keep for potential future use)
+                                    code_gen_buffer += message_chunk.content
                             else:
-                                # Handle response streaming (respond node)
-                                if not streamed_response:
-                                    self.console.print(
-                                        "\n[bold cyan]🤖 Assistant:[/bold cyan] ", end=""
-                                    )
-                                    handler.start_response_streaming()
-                                    streamed_response = True
-                                print(message_chunk.content, end="", flush=True)
+                                if respond_streaming == "disabled":
+                                    pass  # Skip — full response shown from state after completion
+                                else:
+                                    # Handle response streaming (respond node)
+                                    if not streamed_response:
+                                        self.console.print(
+                                            "\n[bold cyan]🤖 Assistant:[/bold cyan] ", end=""
+                                        )
+                                        handler.start_response_streaming()
+                                        streamed_response = True
+                                    print(message_chunk.content, end="", flush=True)
 
                             # Track current node for next iteration
                             previous_node = node_name
@@ -882,6 +896,12 @@ async def _execute_result(self, input_data: Any):
             original_level = root_logger.level
             root_logger.setLevel(logging.WARNING)
 
+            # Streaming display mode configuration
+            from osprey.utils.config import get_streaming_mode
+
+            respond_streaming = get_streaming_mode("cli", "respond")
+            codegen_streaming = get_streaming_mode("cli", "python_code_generator")
+
             # Track if we've streamed LLM response tokens
             streamed_response = False
             # Track code generation streaming
@@ -942,31 +962,39 @@ async def _execute_result(self, input_data: Any):
 
                             # Route tokens by source node
                             if node_name == "python_code_generator":
-                                # Handle code generation streaming
-                                if not code_gen_active:
-                                    # Add role prefix (like respond does)
+                                if codegen_streaming == "disabled":
+                                    pass  # Skip code gen tokens entirely
+                                else:
+                                    # Handle code generation streaming (show mode)
+                                    if not code_gen_active:
+                                        # Add role prefix (like respond does)
+                                        self.console.print(
+                                            "\n[bold yellow]🤖 Assistant (Code Generator):[/bold yellow] ",
+                                            end="",
+                                        )
+                                        code_gen_active = True
+
+                                    # Stream token with dim color (shows it's thinking/intermediate)
                                     self.console.print(
-                                        "\n[bold yellow]🤖 Assistant (Code Generator):[/bold yellow] ",
-                                        end="",
+                                        f"[dim]{message_chunk.content}[/dim]", end=""
                                     )
-                                    code_gen_active = True
-
-                                # Stream token with dim color (shows it's thinking/intermediate)
-                                self.console.print(f"[dim]{message_chunk.content}[/dim]", end="")
 
-                                # Buffer for final panel (keep for potential future use)
-                                code_gen_buffer += message_chunk.content
+                                    # Buffer for final panel (keep for potential future use)
+                                    code_gen_buffer += message_chunk.content
                             else:
-                                # Handle response streaming (respond node)
-                                if not streamed_response:
-                                    # Print header before first token
-                                    self.console.print(
-                                        "\n[bold cyan]🤖 Assistant:[/bold cyan] ", end=""
-                                    )
-                                    handler.start_response_streaming()
-                                    streamed_response = True
-                                # Print token directly to console (no newline, immediate flush)
-                                print(message_chunk.content, end="", flush=True)
+                                if respond_streaming == "disabled":
+                                    pass  # Skip — full response shown from state after completion
+                                else:
+                                    # Handle response streaming (respond node)
+                                    if not streamed_response:
+                                        # Print header before first token
+                                        self.console.print(
+                                            "\n[bold cyan]🤖 Assistant:[/bold cyan] ", end=""
+                                        )
+                                        handler.start_response_streaming()
+                                        streamed_response = True
+                                    # Print token directly to console (no newline, immediate flush)
+                                    print(message_chunk.content, end="", flush=True)
 
                             # Track current node for next iteration
                             previous_node = node_name

diff --git a/src/osprey/interfaces/tui/app.py b/src/osprey/interfaces/tui/app.py
@@ -806,6 +806,12 @@ async def process_with_agent(self, user_input: str) -> None:
             # Start event consumer before streaming
             consumer_task = asyncio.create_task(self._consume_events(user_input, chat_display))
 
+            # Streaming display mode configuration
+            from osprey.utils.config import get_streaming_mode
+
+            respond_streaming = get_streaming_mode("tui", "respond")
+            codegen_streaming = get_streaming_mode("tui", "python_code_generator")
+
             # Track if we've streamed LLM response tokens (to avoid duplicate display)
             streamed_response = False
             streamed_code = False  # Track code generation streaming
@@ -872,6 +878,9 @@ async def process_with_agent(self, user_input: str) -> None:
 
                             # Route based on source node
                             if node_name == "python_code_generator":
+                                if codegen_streaming == "disabled":
+                                    continue  # Skip code gen tokens entirely
+
                                 # CODE GENERATION STREAMING - Route to chat flow
                                 # Widget creation is now handled by CodeGenerationStartEvent
                                 # This section only appends tokens to the current widget
@@ -885,7 +894,10 @@ async def process_with_agent(self, user_input: str) -> None:
                                     python_block = chat_display.get_python_execution_block()
                                     if python_block:
                                         python_block.set_partial_output("Generating code...")
-                                    await chat_display.start_code_generation_message(attempt=1)
+                                    start_collapsed = codegen_streaming == "hide"
+                                    await chat_display.start_code_generation_message(
+                                        attempt=1, start_collapsed=start_collapsed
+                                    )
                                     streamed_code = True
                                     _previous_code_attempt = 1
 
@@ -894,6 +906,9 @@ async def process_with_agent(self, user_input: str) -> None:
                                     message_chunk.content
                                 )
                             else:
+                                if respond_streaming == "disabled":
+                                    continue  # Skip — full response shown from state
+
                                 # Response streaming (respond node or unknown source)
                                 # Start streaming message widget if not already started
                                 if not streamed_response:

diff --git a/src/osprey/interfaces/tui/event_handler.py b/src/osprey/interfaces/tui/event_handler.py
@@ -413,6 +413,13 @@ async def _handle_code_generation_start(self, attempt: int, is_retry: bool) -> N
             attempt: The attempt number (1-based)
             is_retry: Whether this is a retry attempt
         """
+        # Check streaming config — skip widget creation if disabled
+        from osprey.utils.config import get_streaming_mode
+
+        codegen_mode = get_streaming_mode("tui", "python_code_generator")
+        if codegen_mode == "disabled":
+            return
+
         # Finalize previous code generation widget if it exists
         if self.display._code_gen_message:
             full_code = await self.display.finalize_code_generation_message()
@@ -430,7 +437,10 @@ async def _handle_code_generation_start(self, attempt: int, is_retry: bool) -> N
             python_block.set_partial_output(status_text)
 
         # Create new collapsible code message
-        await self.display.start_code_generation_message(attempt=attempt)
+        start_collapsed = codegen_mode == "hide"
+        await self.display.start_code_generation_message(
+            attempt=attempt, start_collapsed=start_collapsed
+        )
 
     async def _handle_code_generated(self, code: str, attempt: int, success: bool) -> None:
         """Handle code generation completion - finalize widget.

diff --git a/src/osprey/interfaces/tui/widgets/chat_display.py b/src/osprey/interfaces/tui/widgets/chat_display.py
@@ -211,7 +211,9 @@ async def finalize_streaming_message(self) -> None:
 
     # --- Code Generation Streaming Methods ---
 
-    async def start_code_generation_message(self, attempt: int = 1) -> CollapsibleCodeMessage:
+    async def start_code_generation_message(
+        self, attempt: int = 1, start_collapsed: bool = False
+    ) -> CollapsibleCodeMessage:
         """Create and mount a new collapsible code message for streaming.
 
         Similar to start_streaming_message, but creates a CollapsibleCodeMessage
@@ -221,11 +223,14 @@ async def start_code_generation_message(self, attempt: int = 1) -> CollapsibleCo
 
         Args:
             attempt: The retry attempt number (1 for first, 2+ for retries).
+            start_collapsed: If True, start with content hidden (hide mode).
 
         Returns:
             The newly created CollapsibleCodeMessage widget.
         """
-        self._code_gen_message = CollapsibleCodeMessage(attempt=attempt)
+        self._code_gen_message = CollapsibleCodeMessage(
+            attempt=attempt, start_collapsed=start_collapsed
+        )
         await self.mount(self._code_gen_message)
         self.scroll_end(animate=False)
         return self._code_gen_message

diff --git a/src/osprey/interfaces/tui/widgets/messages.py b/src/osprey/interfaces/tui/widgets/messages.py
@@ -98,17 +98,21 @@ class CollapsibleCodeMessage(Static):
     the generated code anytime.
     """
 
-    def __init__(self, attempt: int = 1, **kwargs):
+    def __init__(self, attempt: int = 1, start_collapsed: bool = False, **kwargs):
         """Initialize a collapsible code message.
 
         Args:
             attempt: The retry attempt number (1 for first attempt, 2+ for retries).
+            start_collapsed: If True, start with content hidden (hide mode).
+                When False (default/show mode), content is visible during streaming
+                and auto-collapses after finalization.
 
         The message starts with content visible during streaming and
         transitions to collapsed state after finalization.
         """
         super().__init__(**kwargs)
         self._attempt = attempt
+        self._start_collapsed = start_collapsed
         self._content_buffer: list[str] = []
         self._markdown_stream: Any = None
         self._is_collapsed = False
@@ -136,6 +140,14 @@ def on_mount(self) -> None:
         toggle = self.query_one("#code-toggle", Static)
         toggle.can_focus = True
 
+        # Hide mode: start collapsed immediately
+        if self._start_collapsed:
+            content = self.query_one("#code-content", Markdown)
+            content.display = False
+            self._is_collapsed = True
+            label = f"code #{self._attempt}" if self._attempt > 1 else "code"
+            toggle.update(f"{label} (click to show)")
+
     def get_markdown_widget(self) -> Markdown:
         """Get the Markdown widget for streaming.
 

diff --git a/src/osprey/templates/project/config.yml.j2 b/src/osprey/templates/project/config.yml.j2
@@ -337,6 +337,44 @@ cli:
   # banner: |
   #   Your custom ASCII art here
 
+  # Streaming display configuration
+  # Controls how LLM streaming output is displayed per node
+  # Options: show | disabled  (CLI has no collapse, so "hide" maps to "disabled")
+  # streaming:
+  #   respond: show                   # Final response (show | disabled)
+  #   python_code_generator: disabled # Code generation (show | disabled)
+
+# ============================================================
+# TUI CONFIGURATION
+# ============================================================
+# Customize the terminal UI (Textual) appearance and behavior
+
+tui:
+  # Streaming display configuration
+  # Controls how LLM streaming output is displayed per node
+  # Options: show | hide | disabled
+  #   show     - expanded during streaming, auto-collapses after completion
+  #   hide     - collapsed from the start, user can expand to watch live
+  #   disabled - not displayed at all
+  streaming:
+    respond: show                   # Final response (show | disabled; hide not allowed)
+    python_code_generator: hide     # Code generation collapsed by default
+
+# ============================================================
+# OPEN WEBUI CONFIGURATION
+# ============================================================
+# Customize the Open WebUI pipeline behavior
+
+openwebui:
+  # Streaming display configuration
+  # Controls how LLM streaming output is displayed per node
+  # Options: show | hide | disabled
+  #   show     - tokens stream live, code block visible
+  #   hide     - buffered during streaming, collapsed <details> block after completion
+  #   disabled - not displayed at all
+  streaming:
+    respond: show                   # Final response (show | disabled; hide not allowed)
+    python_code_generator: hide     # Code generation collapsed by default
 
 # Note: Theme system logging uses the existing 'base' logger color
 # No additional logging configuration needed