mirror of
https://github.com/OpenBMB/ChatDev.git
synced 2026-06-09 17:02:17 +00:00
feat: preserve workflow session on browser refresh with reconnect and message replay
- Decouple WebSocket connection from session lifecycle: workflows continue running after disconnect
- Message buffering with ring buffer (max 1000) for chat history replay on reconnect
- Session garbage collection: 24-hour TTL for terminal sessions via background asyncio task
- Multi-tab support: last tab wins, old WebSocket closed on new connection for same session
- Cancel now sends explicit WebSocket message instead of relying on disconnect detection
- Replace hardcoded API keys and BASE_URL with ${API_KEY}/${BASE_URL} placeholders in yaml configs
This commit is contained in:
parent
b23950d035
commit
64bb16a884
1
.gitignore
vendored
1
.gitignore
vendored
@ -27,3 +27,4 @@ node_modules/
|
|||||||
data/
|
data/
|
||||||
temp/
|
temp/
|
||||||
WareHouse/
|
WareHouse/
|
||||||
|
|
||||||
|
|||||||
@ -182,7 +182,8 @@
|
|||||||
"alert_download_failed": "Failed to download file, please try again.",
|
"alert_download_failed": "Failed to download file, please try again.",
|
||||||
"alert_download_logs_failed": "Download failed, please try again later",
|
"alert_download_logs_failed": "Download failed, please try again later",
|
||||||
"no_initial_instructions": "No initial instructions provided",
|
"no_initial_instructions": "No initial instructions provided",
|
||||||
"workflow_cancelled": "Workflow cancelled"
|
"workflow_cancelled": "Workflow cancelled",
|
||||||
|
"reconnected": "Reconnected to existing session"
|
||||||
},
|
},
|
||||||
"form_generator": {
|
"form_generator": {
|
||||||
"advanced_settings": "Advanced Settings",
|
"advanced_settings": "Advanced Settings",
|
||||||
|
|||||||
@ -160,7 +160,8 @@
|
|||||||
"alert_download_failed": "下载文件失败,请重试。",
|
"alert_download_failed": "下载文件失败,请重试。",
|
||||||
"alert_download_logs_failed": "下载失败,请稍后重试",
|
"alert_download_logs_failed": "下载失败,请稍后重试",
|
||||||
"no_initial_instructions": "未提供初始说明",
|
"no_initial_instructions": "未提供初始说明",
|
||||||
"workflow_cancelled": "工作流已取消"
|
"workflow_cancelled": "工作流已取消",
|
||||||
|
"reconnected": "已重新连接到现有会话"
|
||||||
},
|
},
|
||||||
"components": {
|
"components": {
|
||||||
"workflow_edge": {
|
"workflow_edge": {
|
||||||
|
|||||||
@ -759,7 +759,7 @@ const clearUploadedAttachments = () => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Reset the WebSocket connection and related state
|
// Reset the WebSocket connection and related state
|
||||||
const resetConnectionState = ({ closeSocket = true } = {}) => {
|
const resetConnectionState = ({ closeSocket = true, keepSession = false } = {}) => {
|
||||||
if (closeSocket && ws) {
|
if (closeSocket && ws) {
|
||||||
try {
|
try {
|
||||||
ws.close()
|
ws.close()
|
||||||
@ -769,20 +769,29 @@ const resetConnectionState = ({ closeSocket = true } = {}) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ws = null
|
ws = null
|
||||||
sessionId = null
|
|
||||||
isConnectionReady.value = false
|
isConnectionReady.value = false
|
||||||
shouldGlow.value = false
|
|
||||||
isWorkflowRunning.value = false
|
if (!keepSession) {
|
||||||
activeNodes.value = []
|
sessionId = null
|
||||||
|
isWorkflowRunning.value = false
|
||||||
|
activeNodes.value = []
|
||||||
|
shouldGlow.value = false
|
||||||
|
clearUploadedAttachments()
|
||||||
|
chatMessages.value = []
|
||||||
|
nodesLoadingMessagesMap.clear()
|
||||||
|
nameToSpriteMap.value.clear()
|
||||||
|
nodeSpriteMap.value.clear()
|
||||||
|
}
|
||||||
|
|
||||||
if (attachmentHoverTimeout) {
|
if (attachmentHoverTimeout) {
|
||||||
clearTimeout(attachmentHoverTimeout)
|
clearTimeout(attachmentHoverTimeout)
|
||||||
attachmentHoverTimeout = null
|
attachmentHoverTimeout = null
|
||||||
}
|
}
|
||||||
clearUploadedAttachments()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Button state management
|
// Button state management
|
||||||
const isWorkflowRunning = ref(false)
|
const isWorkflowRunning = ref(false)
|
||||||
|
const isReconnecting = ref(false)
|
||||||
|
|
||||||
// Active node list
|
// Active node list
|
||||||
const activeNodes = ref([])
|
const activeNodes = ref([])
|
||||||
@ -1432,12 +1441,28 @@ const sendHumanInput = () => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Establish a WebSocket connection
|
// Establish a WebSocket connection
|
||||||
const establishWebSocketConnection = () => {
|
const establishWebSocketConnection = (options = {}) => {
|
||||||
// Reset any previous state before creating a new socket
|
let { sessionId: reconnectSid } = options
|
||||||
resetConnectionState()
|
|
||||||
|
|
||||||
if (!selectedFile.value) {
|
// If no explicit sessionId, check URL for an existing session
|
||||||
return
|
if (!reconnectSid) {
|
||||||
|
const urlSession = route.query?.session
|
||||||
|
if (urlSession && typeof urlSession === 'string' && urlSession.trim()) {
|
||||||
|
reconnectSid = urlSession.trim()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const reconnecting = !!reconnectSid
|
||||||
|
|
||||||
|
if (reconnecting) {
|
||||||
|
isReconnecting.value = true
|
||||||
|
resetConnectionState({ closeSocket: true, keepSession: true })
|
||||||
|
status.value = 'Connecting...'
|
||||||
|
} else {
|
||||||
|
resetConnectionState()
|
||||||
|
if (!selectedFile.value) {
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const apiBase = import.meta.env.VITE_API_BASE_URL || ''
|
const apiBase = import.meta.env.VITE_API_BASE_URL || ''
|
||||||
@ -1457,7 +1482,9 @@ const establishWebSocketConnection = () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const wsUrl = `${scheme}//${host}/ws`
|
const wsUrl = reconnecting
|
||||||
|
? `${scheme}//${host}/ws?session_id=${encodeURIComponent(reconnectSid)}`
|
||||||
|
: `${scheme}//${host}/ws`
|
||||||
const socket = new WebSocket(wsUrl)
|
const socket = new WebSocket(wsUrl)
|
||||||
ws = socket
|
ws = socket
|
||||||
|
|
||||||
@ -1485,12 +1512,15 @@ const establishWebSocketConnection = () => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
isConnectionReady.value = true
|
isConnectionReady.value = true
|
||||||
shouldGlow.value = true
|
|
||||||
status.value = 'Waiting for launch...'
|
|
||||||
|
|
||||||
nextTick(() => {
|
// For new connections, set initial state; reconnections are handled by session_resumed
|
||||||
taskInputRef.value?.focus()
|
if (!isReconnecting.value) {
|
||||||
})
|
shouldGlow.value = true
|
||||||
|
status.value = 'Waiting for launch...'
|
||||||
|
nextTick(() => {
|
||||||
|
taskInputRef.value?.focus()
|
||||||
|
})
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
processMessage(msg)
|
processMessage(msg)
|
||||||
}
|
}
|
||||||
@ -1522,6 +1552,11 @@ const establishWebSocketConnection = () => {
|
|||||||
|
|
||||||
// Watch for file selection changes
|
// Watch for file selection changes
|
||||||
watch(selectedFile, (newFile) => {
|
watch(selectedFile, (newFile) => {
|
||||||
|
// When reconnecting, selectedFile is set by session_resumed; skip the normal flow
|
||||||
|
if (isReconnecting.value) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
taskPrompt.value = ''
|
taskPrompt.value = ''
|
||||||
fileSearchQuery.value = newFile || ''
|
fileSearchQuery.value = newFile || ''
|
||||||
isFileSearchDirty.value = false
|
isFileSearchDirty.value = false
|
||||||
@ -1555,10 +1590,18 @@ watch(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
onMounted(() => {
|
onMounted(async () => {
|
||||||
document.addEventListener('click', handleClickOutside)
|
document.addEventListener('click', handleClickOutside)
|
||||||
document.addEventListener('keydown', handleKeydown)
|
document.addEventListener('keydown', handleKeydown)
|
||||||
loadWorkflows()
|
await loadWorkflows()
|
||||||
|
// If URL contains a session id, the watch on selectedFile (triggered by
|
||||||
|
// applyWorkflowFromRoute inside loadWorkflows) will call establishWebSocketConnection,
|
||||||
|
// which auto-detects the session param and reconnects.
|
||||||
|
// Fallback: if session is present but no workflow was in URL, connect directly.
|
||||||
|
const sessionParam = route.query?.session
|
||||||
|
if (sessionParam && typeof sessionParam === 'string' && sessionParam.trim() && !selectedFile.value) {
|
||||||
|
establishWebSocketConnection({ sessionId: sessionParam.trim() })
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
onUnmounted(() => {
|
onUnmounted(() => {
|
||||||
@ -1836,6 +1879,15 @@ const launchWorkflow = async () => {
|
|||||||
|
|
||||||
status.value = 'Running...'
|
status.value = 'Running...'
|
||||||
isWorkflowRunning.value = true
|
isWorkflowRunning.value = true
|
||||||
|
|
||||||
|
// Persist session id in URL for reconnection after refresh
|
||||||
|
router.push({
|
||||||
|
query: {
|
||||||
|
...route.query,
|
||||||
|
workflow: selectedFile.value,
|
||||||
|
session: sessionId
|
||||||
|
}
|
||||||
|
})
|
||||||
} else {
|
} else {
|
||||||
const error = await response.json().catch(() => ({}))
|
const error = await response.json().catch(() => ({}))
|
||||||
console.error('Failed to launch workflow:', error)
|
console.error('Failed to launch workflow:', error)
|
||||||
@ -2025,6 +2077,68 @@ const animateSpriteAlongEdge = (edge) => {
|
|||||||
const processMessage = async (msg) => {
|
const processMessage = async (msg) => {
|
||||||
console.log('Message: ', msg)
|
console.log('Message: ', msg)
|
||||||
|
|
||||||
|
// Session resumed after reconnection — sync final UI state
|
||||||
|
if (msg.type === 'session_resumed') {
|
||||||
|
const data = msg.data
|
||||||
|
sessionId = data.session_id
|
||||||
|
|
||||||
|
// Restore workflow selection without clearing chat (messages were already replayed)
|
||||||
|
// Set selectedFile BEFORE clearing isReconnecting so the watch skips
|
||||||
|
if (data.yaml_file) {
|
||||||
|
selectedFile.value = data.yaml_file
|
||||||
|
fileSearchQuery.value = data.yaml_file
|
||||||
|
// Load YAML data and sprites (but don't clear chat)
|
||||||
|
try {
|
||||||
|
const yamlContentString = await fetchWorkflowYAML(data.yaml_file)
|
||||||
|
const parsedYaml = yaml.load(yamlContentString)
|
||||||
|
workflowYaml.value = parsedYaml || {}
|
||||||
|
|
||||||
|
const yamlNodes = Array.isArray(parsedYaml?.graph?.nodes) ? parsedYaml.graph.nodes : []
|
||||||
|
for (const node of yamlNodes) {
|
||||||
|
if (node.id && !nodeSpriteMap.value.has(node.id)) {
|
||||||
|
const spritePath = spriteFetcher.fetchSprite(node.id, 'D', 1)
|
||||||
|
nodeSpriteMap.value.set(node.id, spritePath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load YAML on reconnect:', e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
isReconnecting.value = false
|
||||||
|
|
||||||
|
// Restore workflow status
|
||||||
|
const statusMap = {
|
||||||
|
'idle': 'Connected',
|
||||||
|
'running': 'Running...',
|
||||||
|
'waiting_for_input': 'Waiting for input...',
|
||||||
|
'completed': 'Completed',
|
||||||
|
'error': 'Error',
|
||||||
|
'cancelled': 'Cancelled',
|
||||||
|
}
|
||||||
|
status.value = statusMap[data.status] || 'Connected'
|
||||||
|
|
||||||
|
if (data.status === 'running' || data.status === 'waiting_for_input') {
|
||||||
|
isWorkflowRunning.value = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.status === 'waiting_for_input') {
|
||||||
|
shouldGlow.value = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.status === 'completed' || data.status === 'error' || data.status === 'cancelled') {
|
||||||
|
sessionIdToDownload = sessionId
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.current_node_id && !activeNodes.value.includes(data.current_node_id)) {
|
||||||
|
activeNodes.value.push(data.current_node_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
isConnectionReady.value = true
|
||||||
|
addChatNotification(t('launch.reconnected'))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Prompt for human input
|
// Prompt for human input
|
||||||
if (msg.type === 'human_input_required') {
|
if (msg.type === 'human_input_required') {
|
||||||
const fullMessage = msg.data.task_description + '\n\n' + msg.data.input
|
const fullMessage = msg.data.task_description + '\n\n' + msg.data.input
|
||||||
@ -2184,6 +2298,14 @@ const processMessage = async (msg) => {
|
|||||||
sessionIdToDownload = sessionId
|
sessionIdToDownload = sessionId
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Workflow cancelled (e.g., from server-side cancellation)
|
||||||
|
if (msg.type === 'workflow_cancelled') {
|
||||||
|
addChatNotification(msg.data?.message || t('launch.workflow_cancelled'))
|
||||||
|
status.value = 'Cancelled'
|
||||||
|
isWorkflowRunning.value = false
|
||||||
|
sessionIdToDownload = sessionId
|
||||||
|
}
|
||||||
|
|
||||||
// Handle direct error messages (e.g., workflow execution errors)
|
// Handle direct error messages (e.g., workflow execution errors)
|
||||||
if (msg.type === 'error') {
|
if (msg.type === 'error') {
|
||||||
const errorMessage = msg.data?.message || 'Unknown error occurred'
|
const errorMessage = msg.data?.message || 'Unknown error occurred'
|
||||||
@ -2199,6 +2321,14 @@ const cancelWorkflow = () => {
|
|||||||
if (!isWorkflowRunning.value || !ws) {
|
if (!isWorkflowRunning.value || !ws) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Send cancel request through WebSocket so the server stops the workflow
|
||||||
|
try {
|
||||||
|
ws.send(JSON.stringify({ type: 'cancel' }))
|
||||||
|
} catch (sendError) {
|
||||||
|
console.warn('Failed to send cancel message:', sendError)
|
||||||
|
}
|
||||||
|
|
||||||
addChatNotification(t('launch.workflow_cancelled'))
|
addChatNotification(t('launch.workflow_cancelled'))
|
||||||
status.value = 'Cancelled'
|
status.value = 'Cancelled'
|
||||||
isWorkflowRunning.value = false
|
isWorkflowRunning.value = false
|
||||||
@ -2214,12 +2344,6 @@ const cancelWorkflow = () => {
|
|||||||
nodesLoadingMessagesMap.delete(nodeId)
|
nodesLoadingMessagesMap.delete(nodeId)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
|
||||||
ws.close()
|
|
||||||
} catch (closeError) {
|
|
||||||
console.warn('Failed to close WebSocket:', closeError)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Download logs
|
// Download logs
|
||||||
|
|||||||
@ -8,12 +8,12 @@ router = APIRouter()
|
|||||||
|
|
||||||
|
|
||||||
@router.websocket("/ws")
|
@router.websocket("/ws")
|
||||||
async def websocket_endpoint(websocket: WebSocket):
|
async def websocket_endpoint(websocket: WebSocket, session_id: str = ""):
|
||||||
manager = get_websocket_manager()
|
manager = get_websocket_manager()
|
||||||
session_id = await manager.connect(websocket)
|
sid = await manager.connect(websocket, session_id=session_id or None)
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
message = await websocket.receive_text()
|
message = await websocket.receive_text()
|
||||||
await manager.handle_message(session_id, message)
|
await manager.handle_message(sid, message)
|
||||||
except WebSocketDisconnect:
|
except WebSocketDisconnect:
|
||||||
manager.disconnect(session_id)
|
manager.disconnect(sid)
|
||||||
|
|||||||
@ -29,12 +29,22 @@ class MessageHandler:
|
|||||||
await self._handle_ping(session_id, websocket_manager)
|
await self._handle_ping(session_id, websocket_manager)
|
||||||
elif message_type == "get_status":
|
elif message_type == "get_status":
|
||||||
await self._handle_get_status(session_id, websocket_manager)
|
await self._handle_get_status(session_id, websocket_manager)
|
||||||
|
elif message_type == "cancel":
|
||||||
|
await self._handle_cancel(session_id, websocket_manager)
|
||||||
else:
|
else:
|
||||||
await websocket_manager.send_message(
|
await websocket_manager.send_message(
|
||||||
session_id,
|
session_id,
|
||||||
{"type": "error", "data": {"message": f"Unknown message type: {message_type}"}},
|
{"type": "error", "data": {"message": f"Unknown message type: {message_type}"}},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def _handle_cancel(self, session_id: str, websocket_manager):
|
||||||
|
if self.workflow_run_service:
|
||||||
|
self.workflow_run_service.request_cancel(session_id, reason="User requested cancellation")
|
||||||
|
await websocket_manager.send_message(
|
||||||
|
session_id,
|
||||||
|
{"type": "input_received", "data": {"message": "Cancellation requested"}},
|
||||||
|
)
|
||||||
|
|
||||||
async def _handle_human_input(self, session_id: str, data: Dict[str, Any], websocket_manager):
|
async def _handle_human_input(self, session_id: str, data: Dict[str, Any], websocket_manager):
|
||||||
try:
|
try:
|
||||||
payload = data.get("data", {}) or {}
|
payload = data.get("data", {}) or {}
|
||||||
|
|||||||
@ -56,6 +56,16 @@ class WorkflowSession:
|
|||||||
cancel_event: Event = field(default_factory=Event)
|
cancel_event: Event = field(default_factory=Event)
|
||||||
cancel_reason: Optional[str] = None
|
cancel_reason: Optional[str] = None
|
||||||
|
|
||||||
|
# Message buffer for reconnection replay
|
||||||
|
message_buffer: list = field(default_factory=list)
|
||||||
|
|
||||||
|
MAX_BUFFER_SIZE: int = 1000
|
||||||
|
|
||||||
|
def append_message(self, message: Dict[str, Any]) -> None:
|
||||||
|
if len(self.message_buffer) >= self.MAX_BUFFER_SIZE:
|
||||||
|
self.message_buffer.pop(0)
|
||||||
|
self.message_buffer.append(message)
|
||||||
|
|
||||||
|
|
||||||
class WorkflowSessionStore:
|
class WorkflowSessionStore:
|
||||||
"""In-memory registry that tracks workflow session metadata."""
|
"""In-memory registry that tracks workflow session metadata."""
|
||||||
@ -129,3 +139,20 @@ class WorkflowSessionStore:
|
|||||||
def get_artifact_queue(self, session_id: str) -> Optional[ArtifactEventQueue]:
|
def get_artifact_queue(self, session_id: str) -> Optional[ArtifactEventQueue]:
|
||||||
session = self._sessions.get(session_id)
|
session = self._sessions.get(session_id)
|
||||||
return session.artifact_queue if session else None
|
return session.artifact_queue if session else None
|
||||||
|
|
||||||
|
def get_session_snapshot(self, session_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
session = self._sessions.get(session_id)
|
||||||
|
if not session:
|
||||||
|
return None
|
||||||
|
return {
|
||||||
|
"session_id": session.session_id,
|
||||||
|
"yaml_file": session.yaml_file,
|
||||||
|
"task_prompt": session.task_prompt,
|
||||||
|
"status": session.status.value,
|
||||||
|
"current_node_id": session.current_node_id,
|
||||||
|
"created_at": session.created_at,
|
||||||
|
"updated_at": session.updated_at,
|
||||||
|
"waiting_for_input": session.waiting_for_input,
|
||||||
|
"error_message": session.error_message,
|
||||||
|
"message_count": len(session.message_buffer),
|
||||||
|
}
|
||||||
|
|||||||
@ -39,6 +39,8 @@ def _encode_ws_message(message: Any) -> str:
|
|||||||
|
|
||||||
|
|
||||||
class WebSocketManager:
|
class WebSocketManager:
|
||||||
|
SESSION_TTL_SECONDS = 24 * 60 * 60 # 24 hours
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
@ -50,6 +52,7 @@ class WebSocketManager:
|
|||||||
self.active_connections: Dict[str, WebSocket] = {}
|
self.active_connections: Dict[str, WebSocket] = {}
|
||||||
self.connection_timestamps: Dict[str, float] = {}
|
self.connection_timestamps: Dict[str, float] = {}
|
||||||
self._owner_loop: Optional[asyncio.AbstractEventLoop] = None
|
self._owner_loop: Optional[asyncio.AbstractEventLoop] = None
|
||||||
|
self._gc_task: Optional[asyncio.Task] = None
|
||||||
self.session_store = session_store or WorkflowSessionStore()
|
self.session_store = session_store or WorkflowSessionStore()
|
||||||
self.session_controller = session_controller or SessionExecutionController(self.session_store)
|
self.session_controller = session_controller or SessionExecutionController(self.session_store)
|
||||||
self.attachment_service = attachment_service or AttachmentService()
|
self.attachment_service = attachment_service or AttachmentService()
|
||||||
@ -70,11 +73,55 @@ class WebSocketManager:
|
|||||||
# worker threads can safely schedule sends via run_coroutine_threadsafe.
|
# worker threads can safely schedule sends via run_coroutine_threadsafe.
|
||||||
if self._owner_loop is None:
|
if self._owner_loop is None:
|
||||||
self._owner_loop = asyncio.get_running_loop()
|
self._owner_loop = asyncio.get_running_loop()
|
||||||
|
|
||||||
|
# --- Reconnect to existing session ---
|
||||||
|
if session_id and self.session_store.has_session(session_id):
|
||||||
|
# If an old WebSocket is still tied to this session, close it first
|
||||||
|
if session_id in self.active_connections:
|
||||||
|
old_ws = self.active_connections[session_id]
|
||||||
|
try:
|
||||||
|
await old_ws.close(code=1000, reason="Replaced by new connection")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.active_connections[session_id] = websocket
|
||||||
|
self.connection_timestamps[session_id] = time.time()
|
||||||
|
logging.info("WebSocket reconnected to existing session: %s", session_id)
|
||||||
|
|
||||||
|
# Always start the GC loop (idempotent)
|
||||||
|
self._start_gc()
|
||||||
|
|
||||||
|
# Send connection confirmation
|
||||||
|
await self._send_raw(
|
||||||
|
session_id,
|
||||||
|
{"type": "connection", "data": {"session_id": session_id, "status": "connected"}},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Replay all buffered messages (snapshot to avoid including messages
|
||||||
|
# that arrive during replay)
|
||||||
|
session = self.session_store.get_session(session_id)
|
||||||
|
if session:
|
||||||
|
messages_to_replay = list(session.message_buffer)
|
||||||
|
for msg in messages_to_replay:
|
||||||
|
await self._send_raw(session_id, msg)
|
||||||
|
|
||||||
|
# Send session state snapshot
|
||||||
|
snapshot = self.session_store.get_session_snapshot(session_id)
|
||||||
|
if snapshot:
|
||||||
|
await self._send_raw(session_id, {"type": "session_resumed", "data": snapshot})
|
||||||
|
|
||||||
|
return session_id
|
||||||
|
|
||||||
|
# --- New connection ---
|
||||||
if not session_id:
|
if not session_id:
|
||||||
session_id = str(uuid.uuid4())
|
session_id = str(uuid.uuid4())
|
||||||
self.active_connections[session_id] = websocket
|
self.active_connections[session_id] = websocket
|
||||||
self.connection_timestamps[session_id] = time.time()
|
self.connection_timestamps[session_id] = time.time()
|
||||||
logging.info("WebSocket connected: %s", session_id)
|
logging.info("WebSocket connected: %s", session_id)
|
||||||
|
|
||||||
|
# Always start the GC loop (idempotent)
|
||||||
|
self._start_gc()
|
||||||
|
|
||||||
await self.send_message(
|
await self.send_message(
|
||||||
session_id,
|
session_id,
|
||||||
{
|
{
|
||||||
@ -85,24 +132,19 @@ class WebSocketManager:
|
|||||||
return session_id
|
return session_id
|
||||||
|
|
||||||
def disconnect(self, session_id: str) -> None:
|
def disconnect(self, session_id: str) -> None:
|
||||||
session = self.session_store.get_session(session_id)
|
|
||||||
if session and session.status in {SessionStatus.RUNNING, SessionStatus.WAITING_FOR_INPUT}:
|
|
||||||
self.workflow_run_service.request_cancel(
|
|
||||||
session_id,
|
|
||||||
reason="WebSocket disconnected",
|
|
||||||
)
|
|
||||||
if session_id in self.active_connections:
|
if session_id in self.active_connections:
|
||||||
del self.active_connections[session_id]
|
del self.active_connections[session_id]
|
||||||
if session_id in self.connection_timestamps:
|
if session_id in self.connection_timestamps:
|
||||||
del self.connection_timestamps[session_id]
|
del self.connection_timestamps[session_id]
|
||||||
self.session_controller.cleanup_session(session_id)
|
logging.info("WebSocket disconnected (session preserved): %s", session_id)
|
||||||
remaining_session = self.session_store.get_session(session_id)
|
|
||||||
if remaining_session and remaining_session.executor is None:
|
|
||||||
self.session_store.pop_session(session_id)
|
|
||||||
self.attachment_service.cleanup_session(session_id)
|
|
||||||
logging.info("WebSocket disconnected: %s", session_id)
|
|
||||||
|
|
||||||
async def send_message(self, session_id: str, message: Dict[str, Any]) -> None:
|
async def send_message(self, session_id: str, message: Dict[str, Any]) -> None:
|
||||||
|
# Buffer business messages for reconnection replay (exclude transport messages)
|
||||||
|
if message.get("type") not in ("connection", "pong"):
|
||||||
|
session = self.session_store.get_session(session_id)
|
||||||
|
if session:
|
||||||
|
session.append_message(message)
|
||||||
|
|
||||||
if session_id in self.active_connections:
|
if session_id in self.active_connections:
|
||||||
websocket = self.active_connections[session_id]
|
websocket = self.active_connections[session_id]
|
||||||
try:
|
try:
|
||||||
@ -110,7 +152,16 @@ class WebSocketManager:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
logging.error("Failed to send message to %s: %s", session_id, exc)
|
logging.error("Failed to send message to %s: %s", session_id, exc)
|
||||||
# self.disconnect(session_id)
|
|
||||||
|
async def _send_raw(self, session_id: str, message: Dict[str, Any]) -> None:
|
||||||
|
"""Send a message without buffering. Used for replay and connection management."""
|
||||||
|
if session_id in self.active_connections:
|
||||||
|
websocket = self.active_connections[session_id]
|
||||||
|
try:
|
||||||
|
await websocket.send_text(_encode_ws_message(message))
|
||||||
|
except Exception as exc:
|
||||||
|
traceback.print_exc()
|
||||||
|
logging.error("Failed to send raw message to %s: %s", session_id, exc)
|
||||||
|
|
||||||
def send_message_sync(self, session_id: str, message: Dict[str, Any]) -> None:
|
def send_message_sync(self, session_id: str, message: Dict[str, Any]) -> None:
|
||||||
"""Send a WebSocket message from any thread (including worker threads).
|
"""Send a WebSocket message from any thread (including worker threads).
|
||||||
@ -174,3 +225,26 @@ class WebSocketManager:
|
|||||||
session_id,
|
session_id,
|
||||||
{"type": "error", "data": {"message": str(exc)}},
|
{"type": "error", "data": {"message": str(exc)}},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _start_gc(self) -> None:
|
||||||
|
"""Start the background GC task if not already running."""
|
||||||
|
if self._gc_task is not None and not self._gc_task.done():
|
||||||
|
return
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
self._gc_task = loop.create_task(self._gc_loop())
|
||||||
|
|
||||||
|
async def _gc_loop(self) -> None:
|
||||||
|
"""Periodically clean up terminal sessions older than TTL."""
|
||||||
|
TERMINAL = {SessionStatus.COMPLETED, SessionStatus.ERROR, SessionStatus.CANCELLED}
|
||||||
|
while True:
|
||||||
|
await asyncio.sleep(3600) # run every hour
|
||||||
|
now = time.time()
|
||||||
|
to_remove = []
|
||||||
|
for sid, session in self.session_store._sessions.items():
|
||||||
|
if session.status in TERMINAL:
|
||||||
|
if now - session.updated_at > self.SESSION_TTL_SECONDS:
|
||||||
|
to_remove.append(sid)
|
||||||
|
for sid in to_remove:
|
||||||
|
self.session_store.pop_session(sid)
|
||||||
|
self.attachment_service.cleanup_session(sid)
|
||||||
|
logging.info("GC: removed expired session %s", sid)
|
||||||
|
|||||||
@ -264,8 +264,6 @@ class WorkflowRunService:
|
|||||||
session_ref.executor = None
|
session_ref.executor = None
|
||||||
session_ref.graph = None
|
session_ref.graph = None
|
||||||
self.session_controller.cleanup_session(session_id)
|
self.session_controller.cleanup_session(session_id)
|
||||||
if session_id not in websocket_manager.active_connections:
|
|
||||||
self.session_store.pop_session(session_id)
|
|
||||||
|
|
||||||
def _build_initial_task_input(
|
def _build_initial_task_input(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user