226 lines
8.5 KiB
Bash
Executable File
226 lines
8.5 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Conversation Runner - Loops through turns and calls the vLLM facade
|
|
# Usage:
|
|
# ./conversation-runner.sh [conversation-file]
|
|
# ./conversation-runner.sh <conversation-file> <model> <api-url>
|
|
# MODEL=model_name API_URL=http://... ./conversation-runner.sh [conversation-file]
|
|
#
|
|
# Arguments:
|
|
# $1 - Conversation file (default: conversations/coding-session.json)
|
|
# $2 - Model name (optional, overrides JSON/env)
|
|
# $3 - API URL (optional, overrides JSON/env)
|
|
#
|
|
# Environment variables:
|
|
# MODEL - Model name (overrides JSON, overridden by arg)
|
|
# API_URL - Endpoint URL (overrides JSON, overridden by arg)
|
|
|
|
set -e
|
|
|
|
# Config
|
|
FACADE_URL="http://localhost:42069/v1/chat/completions"
|
|
CONVERSATION_FILE="${1:-conversations/coding-session.json}"
|
|
OUTPUT_DIR="output"
|
|
HISTORY_FILE="$OUTPUT_DIR/history.json"
|
|
START_TURN="${START_TURN:-0}" # Optional: start from a specific turn (0-indexed)
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Create output directory
|
|
mkdir -p "$OUTPUT_DIR"
|
|
|
|
# Check jq
|
|
if ! command -v jq &> /dev/null; then
|
|
echo -e "${RED}Error: jq is required${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
# Check conversation file
|
|
if [[ ! -f "$CONVERSATION_FILE" ]]; then
|
|
echo -e "${RED}Error: Conversation file not found: $CONVERSATION_FILE${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
# Extract model - priority: arg > env > JSON
|
|
if [[ -n "${2:-}" ]]; then
|
|
MODEL="$2"
|
|
elif [[ -n "${MODEL:-}" ]]; then
|
|
: # Use MODEL from environment
|
|
else
|
|
MODEL=$(jq -r '.model' "$CONVERSATION_FILE")
|
|
fi
|
|
|
|
# Extract API URL - priority: arg > env > JSON
|
|
if [[ -n "${3:-}" ]]; then
|
|
API_URL_OVERRIDE="$3"
|
|
elif [[ -n "${API_URL:-}" ]]; then
|
|
API_URL_OVERRIDE="$API_URL"
|
|
else
|
|
API_URL_OVERRIDE=""
|
|
fi
|
|
|
|
TURN_COUNT=$(jq '.turns | length' "$CONVERSATION_FILE")
|
|
|
|
echo -e "${CYAN}═══════════════════════════════════════════════════════════════${NC}"
|
|
echo -e "${CYAN} Conversation Runner${NC}"
|
|
echo -e "${CYAN}═══════════════════════════════════════════════════════════════${NC}"
|
|
echo -e " Model: ${YELLOW}$MODEL${NC}"
|
|
echo -e " Turns: ${YELLOW}$TURN_COUNT${NC}"
|
|
echo -e " Facade URL: ${YELLOW}$FACADE_URL${NC}"
|
|
if [[ -n "$API_URL_OVERRIDE" ]]; then
|
|
echo -e " Backend URL: ${YELLOW}$API_URL_OVERRIDE${NC}"
|
|
fi
|
|
echo -e " Conversation: ${YELLOW}$CONVERSATION_FILE${NC}"
|
|
echo -e "${CYAN}───────────────────────────────────────────────────────────────${NC}"
|
|
echo
|
|
|
|
# Initialize or resume conversation history
|
|
if [[ "$START_TURN" -gt 0 ]] && [[ -f "$HISTORY_FILE" ]]; then
|
|
echo -e "${YELLOW}Resuming from turn $((START_TURN + 1)) with existing history${NC}"
|
|
echo
|
|
else
|
|
# Fresh start
|
|
echo '[]' > "$HISTORY_FILE"
|
|
fi
|
|
|
|
# Stats
|
|
TOTAL_PROMPT_TOKENS=0
|
|
TOTAL_COMPLETION_TOKENS=0
|
|
TOTAL_TOKENS=0
|
|
START_TIME=$(date +%s)
|
|
|
|
# Function to add message to history
|
|
add_to_history() {
|
|
local role="$1"
|
|
local content="$2"
|
|
|
|
local temp_file=$(mktemp)
|
|
jq --arg role "$role" --arg content "$content" \
|
|
'. += [{"role": $role, "content": $content}]' \
|
|
"$HISTORY_FILE" > "$temp_file"
|
|
mv "$temp_file" "$HISTORY_FILE"
|
|
}
|
|
|
|
# Function to call the facade
|
|
call_facade() {
|
|
local history_file="$1"
|
|
local payload_file="$OUTPUT_DIR/.payload.json"
|
|
|
|
# Build the request payload and write to file
|
|
# (avoids "argument list too long" error for large histories)
|
|
jq -n \
|
|
--arg model "$MODEL" \
|
|
--slurpfile messages "$history_file" \
|
|
'{
|
|
model: $model,
|
|
messages: $messages[0],
|
|
max_tokens: 4096,
|
|
stream: false
|
|
}' > "$payload_file"
|
|
|
|
# Make the request using the file
|
|
curl -s -X POST "$FACADE_URL" \
|
|
-H "Content-Type: application/json" \
|
|
--data-binary "@$payload_file"
|
|
}
|
|
|
|
# Main loop
|
|
for ((i=START_TURN; i<TURN_COUNT; i++)); do
|
|
TURN_NUM=$((i + 1))
|
|
|
|
# Get the turn content
|
|
TURN_CONTENT=$(jq -r ".turns[$i]" "$CONVERSATION_FILE")
|
|
|
|
# Print turn header
|
|
echo -e "${BLUE}┌─────────────────────────────────────────────────────────────┐${NC}"
|
|
echo -e "${BLUE}│ Turn ${YELLOW}$TURN_NUM${BLUE} of ${YELLOW}$TURN_COUNT${BLUE} │${NC}"
|
|
echo -e "${BLUE}└─────────────────────────────────────────────────────────────┘${NC}"
|
|
|
|
# Print user prompt (truncated if too long)
|
|
PROMPT_PREVIEW=$(echo "$TURN_CONTENT" | head -c 200)
|
|
if [[ ${#TURN_CONTENT} -gt 200 ]]; then
|
|
PROMPT_PREVIEW="${PROMPT_PREVIEW}..."
|
|
fi
|
|
echo -e "${GREEN}User:${NC} $PROMPT_PREVIEW"
|
|
echo
|
|
|
|
# Add user message to history
|
|
add_to_history "user" "$TURN_CONTENT"
|
|
|
|
# Call the facade
|
|
TURN_START=$(date +%s%N)
|
|
RESPONSE=$(call_facade "$HISTORY_FILE")
|
|
TURN_END=$(date +%s%N)
|
|
TURN_TIME_MS=$(( (TURN_END - TURN_START) / 1000000 ))
|
|
|
|
# Check for error
|
|
if echo "$RESPONSE" | jq -e '.error' > /dev/null 2>&1; then
|
|
echo -e "${RED}Error:${NC} $(echo "$RESPONSE" | jq -r '.error.message')"
|
|
echo "Stopping due to error."
|
|
exit 1
|
|
fi
|
|
|
|
# Extract assistant response
|
|
ASSISTANT_CONTENT=$(echo "$RESPONSE" | jq -r '.choices[0].message.content')
|
|
|
|
# Extract usage stats
|
|
PROMPT_TOKENS=$(echo "$RESPONSE" | jq -r '.usage.prompt_tokens // 0')
|
|
COMPLETION_TOKENS=$(echo "$RESPONSE" | jq -r '.usage.completion_tokens // 0')
|
|
TOTAL_TURN_TOKENS=$((PROMPT_TOKENS + COMPLETION_TOKENS))
|
|
|
|
# Accumulate stats
|
|
TOTAL_PROMPT_TOKENS=$((TOTAL_PROMPT_TOKENS + PROMPT_TOKENS))
|
|
TOTAL_COMPLETION_TOKENS=$((TOTAL_COMPLETION_TOKENS + COMPLETION_TOKENS))
|
|
TOTAL_TOKENS=$((TOTAL_TOKENS + TOTAL_TURN_TOKENS))
|
|
|
|
# Print assistant response (truncated)
|
|
RESPONSE_PREVIEW=$(echo "$ASSISTANT_CONTENT" | head -c 300)
|
|
if [[ ${#ASSISTANT_CONTENT} -gt 300 ]]; then
|
|
RESPONSE_PREVIEW="${RESPONSE_PREVIEW}..."
|
|
fi
|
|
echo -e "${YELLOW}Assistant:${NC} $RESPONSE_PREVIEW"
|
|
echo
|
|
|
|
# Print stats
|
|
echo -e "${CYAN}Stats:${NC} prompt_tokens=${PROMPT_TOKENS}, completion_tokens=${COMPLETION_TOKENS}, time=${TURN_TIME_MS}ms"
|
|
echo
|
|
|
|
# Add assistant response to history
|
|
add_to_history "assistant" "$ASSISTANT_CONTENT"
|
|
|
|
# Save individual turn output
|
|
TURN_FILE="$OUTPUT_DIR/turn_$(printf '%03d' $TURN_NUM).json"
|
|
echo "$RESPONSE" > "$TURN_FILE"
|
|
|
|
echo -e "${CYAN}───────────────────────────────────────────────────────────────${NC}"
|
|
echo
|
|
done
|
|
|
|
# Final stats
|
|
END_TIME=$(date +%s)
|
|
ELAPSED=$((END_TIME - START_TIME))
|
|
|
|
echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}"
|
|
echo -e "${GREEN} Conversation Complete${NC}"
|
|
echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}"
|
|
echo -e " Total Turns: ${YELLOW}$TURN_COUNT${NC}"
|
|
echo -e " Total Prompt Tokens: ${YELLOW}$TOTAL_PROMPT_TOKENS${NC}"
|
|
echo -e " Total Completion: ${YELLOW}$TOTAL_COMPLETION_TOKENS${NC}"
|
|
echo -e " Total Tokens: ${YELLOW}$TOTAL_TOKENS${NC}"
|
|
echo -e " Elapsed Time: ${YELLOW}${ELAPSED}s${NC}"
|
|
echo -e " History saved to: ${YELLOW}$HISTORY_FILE${NC}"
|
|
echo -e "${GREEN}───────────────────────────────────────────────────────────────${NC}"
|
|
|
|
# Print token usage breakdown
|
|
echo
|
|
echo -e "${CYAN}Token Usage Summary:${NC}"
|
|
echo -e " Messages in history: $(jq 'length' "$HISTORY_FILE")"
|
|
echo -e " User messages: $(jq '[.[] | select(.role == "user")] | length' "$HISTORY_FILE")"
|
|
echo -e " Assistant messages: $(jq '[.[] | select(.role == "assistant")] | length' "$HISTORY_FILE")"
|