#!/bin/bash # Conversation Runner - Loops through turns and calls the vLLM facade # Usage: # ./conversation-runner.sh [conversation-file] # ./conversation-runner.sh # MODEL=model_name API_URL=http://... ./conversation-runner.sh [conversation-file] # # Arguments: # $1 - Conversation file (default: conversations/coding-session.json) # $2 - Model name (optional, overrides JSON/env) # $3 - API URL (optional, overrides JSON/env) # # Environment variables: # MODEL - Model name (overrides JSON, overridden by arg) # API_URL - Endpoint URL (overrides JSON, overridden by arg) set -e # Config FACADE_URL="http://localhost:42069/v1/chat/completions" CONVERSATION_FILE="${1:-conversations/coding-session.json}" OUTPUT_DIR="output" HISTORY_FILE="$OUTPUT_DIR/history.json" START_TURN="${START_TURN:-0}" # Optional: start from a specific turn (0-indexed) # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' # No Color # Create output directory mkdir -p "$OUTPUT_DIR" # Check jq if ! command -v jq &> /dev/null; then echo -e "${RED}Error: jq is required${NC}" exit 1 fi # Check conversation file if [[ ! -f "$CONVERSATION_FILE" ]]; then echo -e "${RED}Error: Conversation file not found: $CONVERSATION_FILE${NC}" exit 1 fi # Extract model - priority: arg > env > JSON if [[ -n "${2:-}" ]]; then MODEL="$2" elif [[ -n "${MODEL:-}" ]]; then : # Use MODEL from environment else MODEL=$(jq -r '.model' "$CONVERSATION_FILE") fi # Extract API URL - priority: arg > env > JSON if [[ -n "${3:-}" ]]; then API_URL_OVERRIDE="$3" elif [[ -n "${API_URL:-}" ]]; then API_URL_OVERRIDE="$API_URL" else API_URL_OVERRIDE="" fi TURN_COUNT=$(jq '.turns | length' "$CONVERSATION_FILE") echo -e "${CYAN}═══════════════════════════════════════════════════════════════${NC}" echo -e "${CYAN} Conversation Runner${NC}" echo -e "${CYAN}═══════════════════════════════════════════════════════════════${NC}" echo -e " Model: ${YELLOW}$MODEL${NC}" echo -e " Turns: ${YELLOW}$TURN_COUNT${NC}" echo -e " Facade URL: ${YELLOW}$FACADE_URL${NC}" if [[ -n "$API_URL_OVERRIDE" ]]; then echo -e " Backend URL: ${YELLOW}$API_URL_OVERRIDE${NC}" fi echo -e " Conversation: ${YELLOW}$CONVERSATION_FILE${NC}" echo -e "${CYAN}───────────────────────────────────────────────────────────────${NC}" echo # Initialize or resume conversation history if [[ "$START_TURN" -gt 0 ]] && [[ -f "$HISTORY_FILE" ]]; then echo -e "${YELLOW}Resuming from turn $((START_TURN + 1)) with existing history${NC}" echo else # Fresh start echo '[]' > "$HISTORY_FILE" fi # Stats TOTAL_PROMPT_TOKENS=0 TOTAL_COMPLETION_TOKENS=0 TOTAL_TOKENS=0 START_TIME=$(date +%s) # Function to add message to history add_to_history() { local role="$1" local content="$2" local temp_file=$(mktemp) jq --arg role "$role" --arg content "$content" \ '. += [{"role": $role, "content": $content}]' \ "$HISTORY_FILE" > "$temp_file" mv "$temp_file" "$HISTORY_FILE" } # Function to call the facade call_facade() { local history_file="$1" local payload_file="$OUTPUT_DIR/.payload.json" # Build the request payload and write to file # (avoids "argument list too long" error for large histories) jq -n \ --arg model "$MODEL" \ --slurpfile messages "$history_file" \ '{ model: $model, messages: $messages[0], max_tokens: 4096, stream: false }' > "$payload_file" # Make the request using the file curl -s -X POST "$FACADE_URL" \ -H "Content-Type: application/json" \ --data-binary "@$payload_file" } # Main loop for ((i=START_TURN; i /dev/null 2>&1; then echo -e "${RED}Error:${NC} $(echo "$RESPONSE" | jq -r '.error.message')" echo "Stopping due to error." exit 1 fi # Extract assistant response ASSISTANT_CONTENT=$(echo "$RESPONSE" | jq -r '.choices[0].message.content') # Extract usage stats PROMPT_TOKENS=$(echo "$RESPONSE" | jq -r '.usage.prompt_tokens // 0') COMPLETION_TOKENS=$(echo "$RESPONSE" | jq -r '.usage.completion_tokens // 0') TOTAL_TURN_TOKENS=$((PROMPT_TOKENS + COMPLETION_TOKENS)) # Accumulate stats TOTAL_PROMPT_TOKENS=$((TOTAL_PROMPT_TOKENS + PROMPT_TOKENS)) TOTAL_COMPLETION_TOKENS=$((TOTAL_COMPLETION_TOKENS + COMPLETION_TOKENS)) TOTAL_TOKENS=$((TOTAL_TOKENS + TOTAL_TURN_TOKENS)) # Print assistant response (truncated) RESPONSE_PREVIEW=$(echo "$ASSISTANT_CONTENT" | head -c 300) if [[ ${#ASSISTANT_CONTENT} -gt 300 ]]; then RESPONSE_PREVIEW="${RESPONSE_PREVIEW}..." fi echo -e "${YELLOW}Assistant:${NC} $RESPONSE_PREVIEW" echo # Print stats echo -e "${CYAN}Stats:${NC} prompt_tokens=${PROMPT_TOKENS}, completion_tokens=${COMPLETION_TOKENS}, time=${TURN_TIME_MS}ms" echo # Add assistant response to history add_to_history "assistant" "$ASSISTANT_CONTENT" # Save individual turn output TURN_FILE="$OUTPUT_DIR/turn_$(printf '%03d' $TURN_NUM).json" echo "$RESPONSE" > "$TURN_FILE" echo -e "${CYAN}───────────────────────────────────────────────────────────────${NC}" echo done # Final stats END_TIME=$(date +%s) ELAPSED=$((END_TIME - START_TIME)) echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}" echo -e "${GREEN} Conversation Complete${NC}" echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}" echo -e " Total Turns: ${YELLOW}$TURN_COUNT${NC}" echo -e " Total Prompt Tokens: ${YELLOW}$TOTAL_PROMPT_TOKENS${NC}" echo -e " Total Completion: ${YELLOW}$TOTAL_COMPLETION_TOKENS${NC}" echo -e " Total Tokens: ${YELLOW}$TOTAL_TOKENS${NC}" echo -e " Elapsed Time: ${YELLOW}${ELAPSED}s${NC}" echo -e " History saved to: ${YELLOW}$HISTORY_FILE${NC}" echo -e "${GREEN}───────────────────────────────────────────────────────────────${NC}" # Print token usage breakdown echo echo -e "${CYAN}Token Usage Summary:${NC}" echo -e " Messages in history: $(jq 'length' "$HISTORY_FILE")" echo -e " User messages: $(jq '[.[] | select(.role == "user")] | length' "$HISTORY_FILE")" echo -e " Assistant messages: $(jq '[.[] | select(.role == "assistant")] | length' "$HISTORY_FILE")"