#!/bin/bash # Analyze Context Size # Loops through history.json and calculates what the actual token count would be # without context paging compression set -e HISTORY_FILE="${1:-output/history.json}" TOKEN_COUNTER="./token-counter" ENCODING="cl100k_base" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' # Check dependencies if [[ ! -f "$HISTORY_FILE" ]]; then echo -e "${RED}Error: History file not found: $HISTORY_FILE${NC}" exit 1 fi if [[ ! -x "$TOKEN_COUNTER" ]]; then echo -e "${RED}Error: Token counter not found: $TOKEN_COUNTER${NC}" exit 1 fi # Count messages MSG_COUNT=$(jq 'length' "$HISTORY_FILE") USER_MSGS=$(jq '[.[] | select(.role == "user")] | length' "$HISTORY_FILE") ASST_MSGS=$(jq '[.[] | select(.role == "assistant")] | length' "$HISTORY_FILE") echo -e "${CYAN}═══════════════════════════════════════════════════════════════${NC}" echo -e "${CYAN} Context Analysis${NC}" echo -e "${CYAN}═══════════════════════════════════════════════════════════════${NC}" echo -e " History file: ${YELLOW}$HISTORY_FILE${NC}" echo -e " Total messages: ${YELLOW}$MSG_COUNT${NC} (User: $USER_MSGS, Assistant: $ASST_MSGS)" echo -e "${CYAN}───────────────────────────────────────────────────────────────${NC}" echo # Accumulators TOTAL_TOKENS=0 USER_TOKENS=0 ASST_TOKENS=0 TURN_NUM=0 # Arrays for per-turn tracking declare -a TURN_TOKENS declare -a TURN_ROLES echo -e "${BLUE}Processing messages...${NC}" echo # Process each message jq -c '.[]' "$HISTORY_FILE" | while read -r msg; do ROLE=$(echo "$msg" | jq -r '.role') CONTENT=$(echo "$msg" | jq -r '.content') # Count tokens using the rust binary TOKEN_COUNT=$(echo "$CONTENT" | "$TOKEN_COUNTER" "$ENCODING" 2>/dev/null || echo "0") # Accumulate echo "$ROLE|$TOKEN_COUNT" done > /tmp/token-analysis.txt # Now process the results TOTAL_TOKENS=0 USER_TOKENS=0 ASST_TOKENS=0 MSG_NUM=0 echo -e "${BLUE}┌────────────────────────────────────────────────────────────────────────────┐${NC}" echo -e "${BLUE}│ Msg # │ Role │ Tokens │ Running Total │ % of Total │${NC}" echo -e "${BLUE}├────────────────────────────────────────────────────────────────────────────┤${NC}" while IFS='|' read -r ROLE TOKEN_COUNT; do MSG_NUM=$((MSG_NUM + 1)) TOTAL_TOKENS=$((TOTAL_TOKENS + TOKEN_COUNT)) if [[ "$ROLE" == "user" ]]; then USER_TOKENS=$((USER_TOKENS + TOKEN_COUNT)) else ASST_TOKENS=$((ASST_TOKENS + TOKEN_COUNT)) fi # Calculate percentage PCT=$(echo "scale=2; $TOKEN_COUNT * 100 / 65536" | bc 2>/dev/null || echo "0") # Print every 10th message and the last few if [[ $((MSG_NUM % 10)) -eq 0 ]] || [[ $MSG_NUM -gt $((MSG_COUNT - 5)) ]]; then printf "${CYAN}│ %5d │ %-8s │ %8d │ %14d │ %6.2f%% │${NC}\n" \ "$MSG_NUM" "$ROLE" "$TOKEN_COUNT" "$TOTAL_TOKENS" "$PCT" fi done < /tmp/token-analysis.txt echo -e "${BLUE}└────────────────────────────────────────────────────────────────────────────┘${NC}" echo # Final stats CONTEXT_LIMIT=65536 WOULD_OVERFLOW=0 if [[ $TOTAL_TOKENS -gt $CONTEXT_LIMIT ]]; then WOULD_OVERFLOW=1 fi OVERAGE=$((TOTAL_TOKENS - CONTEXT_LIMIT)) if [[ $OVERAGE -lt 0 ]]; then OVERAGE=0 fi echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}" echo -e "${GREEN} Summary${NC}" echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}" echo -e " Total Tokens: ${YELLOW}$TOTAL_TOKENS${NC}" echo -e " User Tokens: ${YELLOW}$USER_TOKENS${NC}" echo -e " Assistant Tokens: ${YELLOW}$ASST_TOKENS${NC}" echo -e " Context Limit: ${YELLOW}$CONTEXT_LIMIT${NC}" echo if [[ $WOULD_OVERFLOW -eq 1 ]]; then echo -e " ${RED}⚠ WOULD HAVE OVERFLOWED!${NC}" echo -e " Overage: ${RED}$OVERAGE tokens${NC}" echo -e " Over limit by: ${RED}$(echo "scale=1; $OVERAGE * 100 / $CONTEXT_LIMIT" | bc)%${NC}" else echo -e " ${GREEN}✓ Within context limit${NC}" echo -e " Headroom: ${GREEN}$((CONTEXT_LIMIT - TOTAL_TOKENS)) tokens${NC}" fi echo # Calculate what the facade actually sent (last turn's prompt_tokens) LAST_TURN_FILE=$(ls -t output/turn_*.json 2>/dev/null | head -1) if [[ -f "$LAST_TURN_FILE" ]]; then LAST_PROMPT_TOKENS=$(jq -r '.usage.prompt_tokens // 0' "$LAST_TURN_FILE") SAVED=$((TOTAL_TOKENS - LAST_PROMPT_TOKENS)) echo -e "${CYAN}───────────────────────────────────────────────────────────────${NC}" echo -e "${CYAN} Context Paging Effectiveness${NC}" echo -e "${CYAN}───────────────────────────────────────────────────────────────${NC}" echo -e " Raw history tokens: ${YELLOW}$TOTAL_TOKENS${NC}" echo -e " Last request tokens: ${YELLOW}$LAST_PROMPT_TOKENS${NC}" if [[ $SAVED -gt 0 ]]; then SAVED_PCT=$(echo "scale=1; $SAVED * 100 / $TOTAL_TOKENS" | bc) echo -e " Tokens saved: ${GREEN}$SAVED (${SAVED_PCT}%)${NC}" else echo -e " Tokens saved: ${YELLOW}$SAVED${NC}" fi fi echo -e "${GREEN}───────────────────────────────────────────────────────────────${NC}" # Cleanup rm -f /tmp/token-analysis.txt