#!/usr/bin/env python3
"""
Test for tool call response handling in GLM-5.1.

Tests the multi-turn flow:
1. Send a prompt that triggers a tool call
2. Send back the tool result
3. Verify the model can see and use the tool response

This reproduces the issue where tool responses appear blank to the model.
"""

import os
import json
import httpx
from datetime import datetime


API_BASE = os.environ.get("VLLM_API_BASE", "http://95.179.247.150/v1")
API_KEY = os.environ.get("VLLM_API_KEY", "none")
MODEL = os.environ.get("VLLM_MODEL", "HuggingFaceTB/SmolLM3-3B")


def timestamp():
    return datetime.now().strftime("%H:%M:%S.%f")[:-3]


def test_tool_call_response_flow(streaming: bool = True):
    """
    Test the full tool call -> response -> follow-up flow.
    
    This simulates:
    1. User asks for weather
    2. Model calls get_weather tool
    3. We send back the weather data
    4. Model should see and use that data
    """
    
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "Get the current weather for a location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "City and state, e.g. 'New York, NY'"
                        }
                    },
                    "required": ["location"]
                }
            }
        }
    ]
    
    # Initial request that should trigger a tool call
    messages = [
        {
            "role": "user",
            "content": "What's the weather like in Tokyo right now?"
        }
    ]
    
    mode = "STREAMING" if streaming else "NON-STREAMING"
    print(f"\n{'='*60}")
    print(f"TEST: Tool call response flow ({mode})")
    print(f"API: {API_BASE}")
    print(f"Model: {MODEL}")
    print(f"{'='*60}\n")
    
    with httpx.Client(timeout=120.0) as client:
        # Step 1: Send initial request, expect tool call
        print(f"[{timestamp()}] Step 1: Sending initial request...")
        
        if streaming:
            tool_calls = []
            tool_call_id = None
            tool_call_name = None
            accumulated_args = ""
            
            with client.stream(
                "POST",
                f"{API_BASE}/chat/completions",
                headers={
                    "Authorization": f"Bearer {API_KEY}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": MODEL,
                    "messages": messages,
                    "tools": tools,
                    "tool_choice": "auto",
                    "stream": True,
                    "max_tokens": 512,
                "chat_template_kwargs": {"enable_thinking": False},
                "logprobs": True,
                "top_logprobs": 5
                }
            ) as response:
                print(f"[{timestamp()}] Response status: {response.status_code}")
                
                for line in response.iter_lines():
                    if not line or line == "data: [DONE]":
                        continue
                    
                    if line.startswith("data: "):
                        try:
                            chunk = json.loads(line[6:])
                            if chunk.get("choices"):
                                delta = chunk["choices"][0].get("delta", {})
                                
                                if delta.get("tool_calls"):
                                    for tc in delta["tool_calls"]:
                                        idx = tc.get("index", 0)
                                        
                                        if tc.get("id"):
                                            tool_call_id = tc["id"]
                                        
                                        if tc.get("function", {}).get("name"):
                                            tool_call_name = tc["function"]["name"]
                                            print(f"[{timestamp()}] Tool call: {tool_call_name}")
                                        
                                        if tc.get("function", {}).get("arguments"):
                                            accumulated_args += tc["function"]["arguments"]
                                
                                if delta.get("content"):
                                    print(f"[{timestamp()}] Content: {delta['content'][:100]}")
                                    
                        except json.JSONDecodeError as e:
                            print(f"[{timestamp()}] JSON error: {e}")
            
            if tool_call_name:
                tool_calls.append({
                    "id": tool_call_id or "call_0",
                    "type": "function",
                    "function": {
                        "name": tool_call_name,
                        "arguments": accumulated_args
                    }
                })
        else:
            # Non-streaming
            response = client.post(
                f"{API_BASE}/chat/completions",
                headers={
                    "Authorization": f"Bearer {API_KEY}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": MODEL,
                    "messages": messages,
                    "tools": tools,
                    "tool_choice": "auto",
                    "stream": False,
                    "max_tokens": 512,
                "chat_template_kwargs": {"enable_thinking": False},
                "logprobs": True,
                "top_logprobs": 5
                }
            )
            
            result = response.json()
            print(f"[{timestamp()}] Response status: {response.status_code}")
            
            tool_calls = []
            if result.get("choices"):
                message = result["choices"][0].get("message", {})
                if message.get("tool_calls"):
                    tool_calls = message["tool_calls"]
                    for tc in tool_calls:
                        print(f"[{timestamp()}] Tool call: {tc['function']['name']}")
                        print(f"[{timestamp()}] Args: {tc['function']['arguments']}")
        
        # Check if we got a tool call
        if not tool_calls:
            print(f"\n[{timestamp()}] No tool call received - model didn't call the tool")
            return {"success": False, "reason": "no_tool_call"}
        
        # Step 2: Parse tool call and prepare response
        tc = tool_calls[0]
        tc_id = tc.get("id", "call_0")
        tc_name = tc["function"]["name"]
        tc_args = json.loads(tc["function"]["arguments"])
        
        print(f"\n[{timestamp()}] Step 2: Tool call received")
        print(f"  Name: {tc_name}")
        print(f"  Args: {tc_args}")
        
        # Simulate tool execution
        tool_result = {
            "location": tc_args.get("location", "Unknown"),
            "temperature": "22°C",
            "condition": "Partly cloudy",
            "humidity": "65%",
            "wind": "15 km/h NE"
        }
        
        # Step 3: Send the tool response back
        messages.append({
            "role": "assistant",
            "tool_calls": tool_calls
        })
        messages.append({
            "role": "tool",
            "tool_call_id": tc_id,
            "content": json.dumps(tool_result)
        })
        
        print(f"\n[{timestamp()}] Step 3: Sending tool response...")
        print(f"  Tool call ID: {tc_id}")
        print(f"  Tool result: {json.dumps(tool_result, indent=2)}")
        
        # Step 4: Get the model's follow-up response
        if streaming:
            final_response = ""
            print(f"\n[{timestamp()}] Step 4: Receiving model's follow-up (streaming)...")
            
            with client.stream(
                "POST",
                f"{API_BASE}/chat/completions",
                headers={
                    "Authorization": f"Bearer {API_KEY}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": MODEL,
                    "messages": messages,
                    "tools": tools,
                    "stream": True,
                    "max_tokens": 512,
                "chat_template_kwargs": {"enable_thinking": False},
                "logprobs": True,
                "top_logprobs": 5
                }
            ) as response:
                for line in response.iter_lines():
                    if not line or line == "data: [DONE]":
                        continue
                    
                    if line.startswith("data: "):
                        try:
                            chunk = json.loads(line[6:])
                            if chunk.get("choices"):
                                delta = chunk["choices"][0].get("delta", {})
                                if delta.get("content"):
                                    content = delta["content"]
                                    final_response += content
                                    print(f"[{timestamp()}] Content: {content}", end="", flush=True)
                        except json.JSONDecodeError:
                            pass
            
            print()  # newline after streaming output
        else:
            print(f"\n[{timestamp()}] Step 4: Receiving model's follow-up (non-streaming)...")
            
            response = client.post(
                f"{API_BASE}/chat/completions",
                headers={
                    "Authorization": f"Bearer {API_KEY}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": MODEL,
                    "messages": messages,
                    "tools": tools,
                    "stream": False,
                    "max_tokens": 512,
                "chat_template_kwargs": {"enable_thinking": False},
                "logprobs": True,
                "top_logprobs": 5
                }
            )
            
            result = response.json()
            final_response = ""
            if result.get("choices"):
                final_response = result["choices"][0].get("message", {}).get("content", "")
        
        print(f"\n[{timestamp()}] Final response:\n{final_response}")
        
        # Check if the model used the tool data
        success = True
        issues = []
        
        # The response should mention the weather data
        if "22" not in final_response and "22°C" not in final_response:
            issues.append("Temperature (22°C) not mentioned in response")
            success = False
        
        if "cloudy" not in final_response.lower() and "partly cloudy" not in final_response.lower():
            issues.append("Condition (Partly cloudy) not mentioned in response")
            success = False
        
        # Check for signs the model didn't see the data
        blank_indicators = [
            "i don't have",
            "i cannot access",
            "i'm unable to",
            "i am unable to",
            "don't have access",
            "don't have real-time",
            "cannot provide real-time"
        ]
        
        for indicator in blank_indicators:
            if indicator in final_response.lower():
                issues.append(f"Model seems unaware of tool result (found: '{indicator}')")
                success = False
                break
        
        print(f"\n{'='*60}")
        if success:
            print("✓ PASS: Model correctly used tool response data")
        else:
            print("✗ FAIL: Model did not use tool response correctly")
            for issue in issues:
                print(f"  - {issue}")
        print(f"{'='*60}\n")
        
        return {
            "success": success,
            "issues": issues,
            "final_response": final_response
        }


def test_tool_response_with_debug_info():
    """
    Test with detailed logging to capture exactly what the model sees.
    """
    
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_time",
                "description": "Get the current time",
                "parameters": {
                    "type": "object",
                    "properties": {},
                    "required": []
                }
            }
        }
    ]
    
    print(f"\n{'='*60}")
    print(f"TEST: Tool response with debug info (non-streaming)")
    print(f"{'='*60}\n")
    
    messages = [
        {"role": "user", "content": "What time is it?"}
    ]
    
    with httpx.Client(timeout=120.0) as client:
        # Get tool call
        print(f"[{timestamp()}] Sending initial request...")
        response = client.post(
            f"{API_BASE}/chat/completions",
            headers={
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": MODEL,
                "messages": messages,
                "tools": tools,
                "tool_choice": "auto",
                "stream": False,
                "max_tokens": 256,
                "chat_template_kwargs": {"enable_thinking": False},
                "logprobs": True,
                "top_logprobs": 5
            }
        )
        
        result = response.json()
        
        if not result.get("choices") or not result["choices"][0].get("message", {}).get("tool_calls"):
            print("No tool call - skipping test")
            return
        
        tool_call = result["choices"][0]["message"]["tool_calls"][0]
        tc_id = tool_call["id"]
        
        print(f"[{timestamp()}] Tool call: {tool_call['function']['name']}")
        print(f"[{timestamp()}] Tool call ID: {tc_id}")
        
        # Add tool response
        messages.append({
            "role": "assistant",
            "tool_calls": [tool_call]
        })
        messages.append({
            "role": "tool",
            "tool_call_id": tc_id,
            "content": "The current time is 3:45 PM on Thursday, April 9, 2026."
        })
        
        # Debug: print the full messages array we're about to send
        print(f"\n[{timestamp()}] Sending follow-up with these messages:")
        print(json.dumps(messages, indent=2))
        
        # Get follow-up
        response2 = client.post(
            f"{API_BASE}/chat/completions",
            headers={
                "Authorization": f"Bearer {API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": MODEL,
                "messages": messages,
                "tools": tools,
                "stream": False,
                "max_tokens": 256,
                "chat_template_kwargs": {"enable_thinking": False},
                "logprobs": True,
                "top_logprobs": 5
            }
        )
        
        result2 = response2.json()
        print(f"\n[{timestamp()}] Full response:")
        print(json.dumps(result2, indent=2))
        
        if result2.get("choices"):
            content = result2["choices"][0].get("message", {}).get("content", "")
            
            print(f"\n[{timestamp()}] Model response content: {content}")
            
            # Check if time is mentioned
            if "3:45" in content or "3:45 PM" in content:
                print("\n✓ Model used the tool response (time mentioned)")
            else:
                print("\n✗ Model may not have seen the tool response (time not mentioned)")


def main():
    print("\n" + "="*60)
    print("GLM-5.1 Tool Call Response Tests")
    print("="*60)
    
    # Test non-streaming first (simpler to debug)
    print("\n--- Test 1: Non-streaming tool response flow ---")
    test_tool_call_response_flow(streaming=False)
    
    # Test streaming
    print("\n--- Test 2: Streaming tool response flow ---")
    test_tool_call_response_flow(streaming=True)
    
    # Debug test
    print("\n--- Test 3: Debug info test ---")
    test_tool_response_with_debug_info()
    
    print("\nAll tests complete.")


if __name__ == "__main__":
    main()