Source code for toksum.cli

"""
Command-line interface for toksum.

This module provides a comprehensive command-line interface for the toksum library,
allowing users to count tokens and estimate costs for various LLM models directly
from the terminal.

The CLI supports:
    - Token counting for text input or files
    - Cost estimation with detailed breakdowns
    - Listing all supported models by provider
    - Verbose output with detailed information
    - Support for both input and output token pricing

Examples:
    Basic token counting:
    
    .. code-block:: bash
    
        toksum "Hello, world!" gpt-4
        toksum --file input.txt claude-3-opus-20240229
    
    Cost estimation:
    
    .. code-block:: bash
    
        toksum --cost "Your text here" gpt-4
        toksum --cost --output-tokens "Response text" gpt-4
    
    List supported models:
    
    .. code-block:: bash
    
        toksum --list-models
    
    Verbose output:
    
    .. code-block:: bash
    
        toksum --verbose --cost --file large_document.txt gpt-4

Functions:
    main: Main CLI entry point that handles argument parsing and execution
    list_models: Display all supported models organized by provider

The CLI provides comprehensive error handling and user-friendly output formatting
for both simple token counting and detailed cost analysis workflows.
"""

import argparse
import sys
from typing import List, Dict, Any

from .core import TokenCounter, count_tokens, get_supported_models, estimate_cost
from .exceptions import UnsupportedModelError, TokenizationError


[docs] def main() -> None: """ Main CLI entry point. Parses command-line arguments and executes the appropriate toksum functionality. Supports token counting, cost estimation, model listing, and file input processing. The function handles: - Argument parsing and validation - Text input from command line or file - Token counting for specified models - Cost estimation with input/output token differentiation - Model listing with provider organization - Comprehensive error handling and user feedback - Verbose output formatting Command-line Arguments: text (str, optional): Text to count tokens for model (str, optional): Model name (required unless using --list-models) --file, -f (str): Read text from file instead of command line --list-models, -l: List all supported models by provider --cost, -c: Show cost estimation along with token count --output-tokens: Calculate cost for output tokens instead of input --verbose, -v: Show detailed output with additional information Exit Codes: 0: Success 1: Error (unsupported model, file not found, tokenization failure, etc.) Raises: SystemExit: On error conditions or user interruption Examples: Basic usage: .. code-block:: bash toksum "Hello, world!" gpt-4 toksum --file document.txt claude-3-opus-20240229 With cost estimation: .. code-block:: bash toksum --cost --verbose "Long text content" gpt-4 toksum --cost --output-tokens "Response text" gpt-4 List models: .. code-block:: bash toksum --list-models """ parser = argparse.ArgumentParser( description="Count tokens for various LLM models", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: toksum "Hello, world!" gpt-4 toksum --file input.txt claude-3-opus-20240229 toksum --list-models toksum --cost "Your text here" gpt-4 """ ) parser.add_argument( "text", nargs="?", help="Text to count tokens for (use --file to read from file)" ) parser.add_argument( "model", nargs="?", help="Model name (e.g., gpt-4, claude-3-opus-20240229)" ) parser.add_argument( "--file", "-f", help="Read text from file instead of command line argument" ) parser.add_argument( "--list-models", "-l", action="store_true", help="List all supported models" ) parser.add_argument( "--cost", "-c", action="store_true", help="Show cost estimation along with token count" ) parser.add_argument( "--output-tokens", action="store_true", help="Calculate cost for output tokens instead of input tokens" ) parser.add_argument( "--verbose", "-v", action="store_true", help="Show verbose output" ) args = parser.parse_args() try: if args.list_models: list_models() return if not args.model: parser.error("Model name is required unless using --list-models") # Get text input if args.file: try: with open(args.file, 'r', encoding='utf-8') as f: text = f.read() if args.verbose: print(f"Read {len(text)} characters from {args.file}") except FileNotFoundError: print(f"Error: File '{args.file}' not found", file=sys.stderr) sys.exit(1) except Exception as e: print(f"Error reading file: {e}", file=sys.stderr) sys.exit(1) elif args.text: text = args.text else: parser.error("Either provide text as argument or use --file option") # Count tokens try: token_count = count_tokens(text, args.model) if args.verbose: print(f"Model: {args.model}") print(f"Text length: {len(text)} characters") print(f"Token count: {token_count}") else: print(token_count) # Show cost estimation if requested if args.cost: input_cost = estimate_cost(token_count, args.model, input_tokens=True) output_cost = estimate_cost(token_count, args.model, input_tokens=False) if input_cost > 0 or output_cost > 0: if args.verbose: print(f"Estimated input cost: ${input_cost:.6f}") print(f"Estimated output cost: ${output_cost:.6f}") else: cost = output_cost if args.output_tokens else input_cost print(f"${cost:.6f}") else: if args.verbose: print("Cost estimation not available for this model") except UnsupportedModelError as e: print(f"Error: {e}", file=sys.stderr) if args.verbose: print("\nUse --list-models to see supported models", file=sys.stderr) sys.exit(1) except TokenizationError as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) except KeyboardInterrupt: print("\nInterrupted by user", file=sys.stderr) sys.exit(1) except Exception as e: print(f"Unexpected error: {e}", file=sys.stderr) if args.verbose: import traceback traceback.print_exc() sys.exit(1)
[docs] def list_models() -> None: """ List all supported models organized by provider. Displays a comprehensive list of all supported models grouped by their respective providers (OpenAI, Anthropic, Google, Meta, etc.). The output includes model counts per provider and a total count across all providers. The function: - Retrieves all supported models using get_supported_models() - Groups models by provider with clear section headers - Sorts models alphabetically within each provider - Shows model counts for each provider and overall total - Formats output for easy readability Output Format: .. code-block:: text Supported models: ================================================== OPENAI (25 models): ------------------------------ gpt-3.5-turbo gpt-4 gpt-4o ... ANTHROPIC (12 models): ------------------------------ claude-3-haiku-20240307 claude-3-opus-20240229 ... Total: 200+ models Note: This function is typically called when the --list-models CLI flag is used. It provides users with a complete overview of available models for token counting and cost estimation. """ models = get_supported_models() print("Supported models:") print("=" * 50) for provider, model_list in models.items(): print(f"\n{provider.upper()} ({len(model_list)} models):") print("-" * 30) for model in sorted(model_list): print(f" {model}") print(f"\nTotal: {sum(len(models) for models in models.values())} models")
if __name__ == "__main__": main()