Files
context-paging/analyze-context.sh
2026-03-28 09:01:07 +00:00

159 lines
6.6 KiB
Bash
Executable File

#!/bin/bash
# Analyze Context Size
# Loops through history.json and calculates what the actual token count would be
# without context paging compression
set -e
HISTORY_FILE="${1:-output/history.json}"
TOKEN_COUNTER="./token-counter"
ENCODING="cl100k_base"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
# Check dependencies
if [[ ! -f "$HISTORY_FILE" ]]; then
echo -e "${RED}Error: History file not found: $HISTORY_FILE${NC}"
exit 1
fi
if [[ ! -x "$TOKEN_COUNTER" ]]; then
echo -e "${RED}Error: Token counter not found: $TOKEN_COUNTER${NC}"
exit 1
fi
# Count messages
MSG_COUNT=$(jq 'length' "$HISTORY_FILE")
USER_MSGS=$(jq '[.[] | select(.role == "user")] | length' "$HISTORY_FILE")
ASST_MSGS=$(jq '[.[] | select(.role == "assistant")] | length' "$HISTORY_FILE")
echo -e "${CYAN}═══════════════════════════════════════════════════════════════${NC}"
echo -e "${CYAN} Context Analysis${NC}"
echo -e "${CYAN}═══════════════════════════════════════════════════════════════${NC}"
echo -e " History file: ${YELLOW}$HISTORY_FILE${NC}"
echo -e " Total messages: ${YELLOW}$MSG_COUNT${NC} (User: $USER_MSGS, Assistant: $ASST_MSGS)"
echo -e "${CYAN}───────────────────────────────────────────────────────────────${NC}"
echo
# Accumulators
TOTAL_TOKENS=0
USER_TOKENS=0
ASST_TOKENS=0
TURN_NUM=0
# Arrays for per-turn tracking
declare -a TURN_TOKENS
declare -a TURN_ROLES
echo -e "${BLUE}Processing messages...${NC}"
echo
# Process each message
jq -c '.[]' "$HISTORY_FILE" | while read -r msg; do
ROLE=$(echo "$msg" | jq -r '.role')
CONTENT=$(echo "$msg" | jq -r '.content')
# Count tokens using the rust binary
TOKEN_COUNT=$(echo "$CONTENT" | "$TOKEN_COUNTER" "$ENCODING" 2>/dev/null || echo "0")
# Accumulate
echo "$ROLE|$TOKEN_COUNT"
done > /tmp/token-analysis.txt
# Now process the results
TOTAL_TOKENS=0
USER_TOKENS=0
ASST_TOKENS=0
MSG_NUM=0
echo -e "${BLUE}┌────────────────────────────────────────────────────────────────────────────┐${NC}"
echo -e "${BLUE}│ Msg # │ Role │ Tokens │ Running Total │ % of Total │${NC}"
echo -e "${BLUE}├────────────────────────────────────────────────────────────────────────────┤${NC}"
while IFS='|' read -r ROLE TOKEN_COUNT; do
MSG_NUM=$((MSG_NUM + 1))
TOTAL_TOKENS=$((TOTAL_TOKENS + TOKEN_COUNT))
if [[ "$ROLE" == "user" ]]; then
USER_TOKENS=$((USER_TOKENS + TOKEN_COUNT))
else
ASST_TOKENS=$((ASST_TOKENS + TOKEN_COUNT))
fi
# Calculate percentage
PCT=$(echo "scale=2; $TOKEN_COUNT * 100 / 65536" | bc 2>/dev/null || echo "0")
# Print every 10th message and the last few
if [[ $((MSG_NUM % 10)) -eq 0 ]] || [[ $MSG_NUM -gt $((MSG_COUNT - 5)) ]]; then
printf "${CYAN}│ %5d │ %-8s │ %8d │ %14d │ %6.2f%% │${NC}\n" \
"$MSG_NUM" "$ROLE" "$TOKEN_COUNT" "$TOTAL_TOKENS" "$PCT"
fi
done < /tmp/token-analysis.txt
echo -e "${BLUE}└────────────────────────────────────────────────────────────────────────────┘${NC}"
echo
# Final stats
CONTEXT_LIMIT=65536
WOULD_OVERFLOW=0
if [[ $TOTAL_TOKENS -gt $CONTEXT_LIMIT ]]; then
WOULD_OVERFLOW=1
fi
OVERAGE=$((TOTAL_TOKENS - CONTEXT_LIMIT))
if [[ $OVERAGE -lt 0 ]]; then
OVERAGE=0
fi
echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}"
echo -e "${GREEN} Summary${NC}"
echo -e "${GREEN}═══════════════════════════════════════════════════════════════${NC}"
echo -e " Total Tokens: ${YELLOW}$TOTAL_TOKENS${NC}"
echo -e " User Tokens: ${YELLOW}$USER_TOKENS${NC}"
echo -e " Assistant Tokens: ${YELLOW}$ASST_TOKENS${NC}"
echo -e " Context Limit: ${YELLOW}$CONTEXT_LIMIT${NC}"
echo
if [[ $WOULD_OVERFLOW -eq 1 ]]; then
echo -e " ${RED}⚠ WOULD HAVE OVERFLOWED!${NC}"
echo -e " Overage: ${RED}$OVERAGE tokens${NC}"
echo -e " Over limit by: ${RED}$(echo "scale=1; $OVERAGE * 100 / $CONTEXT_LIMIT" | bc)%${NC}"
else
echo -e " ${GREEN}✓ Within context limit${NC}"
echo -e " Headroom: ${GREEN}$((CONTEXT_LIMIT - TOTAL_TOKENS)) tokens${NC}"
fi
echo
# Calculate what the facade actually sent (last turn's prompt_tokens)
LAST_TURN_FILE=$(ls -t output/turn_*.json 2>/dev/null | head -1)
if [[ -f "$LAST_TURN_FILE" ]]; then
LAST_PROMPT_TOKENS=$(jq -r '.usage.prompt_tokens // 0' "$LAST_TURN_FILE")
SAVED=$((TOTAL_TOKENS - LAST_PROMPT_TOKENS))
echo -e "${CYAN}───────────────────────────────────────────────────────────────${NC}"
echo -e "${CYAN} Context Paging Effectiveness${NC}"
echo -e "${CYAN}───────────────────────────────────────────────────────────────${NC}"
echo -e " Raw history tokens: ${YELLOW}$TOTAL_TOKENS${NC}"
echo -e " Last request tokens: ${YELLOW}$LAST_PROMPT_TOKENS${NC}"
if [[ $SAVED -gt 0 ]]; then
SAVED_PCT=$(echo "scale=1; $SAVED * 100 / $TOTAL_TOKENS" | bc)
echo -e " Tokens saved: ${GREEN}$SAVED (${SAVED_PCT}%)${NC}"
else
echo -e " Tokens saved: ${YELLOW}$SAVED${NC}"
fi
fi
echo -e "${GREEN}───────────────────────────────────────────────────────────────${NC}"
# Cleanup
rm -f /tmp/token-analysis.txt