Source code for toksum.cli

"""
Command-line interface for toksum.

This module provides a comprehensive command-line interface for the toksum library,
allowing users to count tokens and estimate costs for various LLM models directly
from the terminal.

The CLI supports:
    - Token counting for text input or files
    - Cost estimation with detailed breakdowns
    - Listing all supported models by provider
    - Verbose output with detailed information
    - Support for both input and output token pricing

Examples:
    Basic token counting:
    
    .. code-block:: bash
    
        toksum "Hello, world!" gpt-4
        toksum --file input.txt claude-3-opus-20240229
    
    Cost estimation:
    
    .. code-block:: bash
    
        toksum --cost "Your text here" gpt-4
        toksum --cost --output-tokens "Response text" gpt-4
    
    List supported models:
    
    .. code-block:: bash
    
        toksum --list-models
    
    Verbose output:
    
    .. code-block:: bash
    
        toksum --verbose --cost --file large_document.txt gpt-4

Functions:
    main: Main CLI entry point that handles argument parsing and execution
    list_models: Display all supported models organized by provider

The CLI provides comprehensive error handling and user-friendly output formatting
for both simple token counting and detailed cost analysis workflows.
"""

import argparse
import sys
from typing import List, Dict, Any

from .core import TokenCounter, count_tokens, get_supported_models, estimate_cost
from .exceptions import UnsupportedModelError, TokenizationError



[docs]
def main() -> None:
    """
    Main CLI entry point.
    
    Parses command-line arguments and executes the appropriate toksum functionality.
    Supports token counting, cost estimation, model listing, and file input processing.
    
    The function handles:
        - Argument parsing and validation
        - Text input from command line or file
        - Token counting for specified models
        - Cost estimation with input/output token differentiation
        - Model listing with provider organization
        - Comprehensive error handling and user feedback
        - Verbose output formatting
    
    Command-line Arguments:
        text (str, optional): Text to count tokens for
        model (str, optional): Model name (required unless using --list-models)
        --file, -f (str): Read text from file instead of command line
        --list-models, -l: List all supported models by provider
        --cost, -c: Show cost estimation along with token count
        --output-tokens: Calculate cost for output tokens instead of input
        --verbose, -v: Show detailed output with additional information
    
    Exit Codes:
        0: Success
        1: Error (unsupported model, file not found, tokenization failure, etc.)
    
    Raises:
        SystemExit: On error conditions or user interruption
    
    Examples:
        Basic usage:
        
        .. code-block:: bash
        
            toksum "Hello, world!" gpt-4
            toksum --file document.txt claude-3-opus-20240229
        
        With cost estimation:
        
        .. code-block:: bash
        
            toksum --cost --verbose "Long text content" gpt-4
            toksum --cost --output-tokens "Response text" gpt-4
        
        List models:
        
        .. code-block:: bash
        
            toksum --list-models
    """
    parser = argparse.ArgumentParser(
        description="Count tokens for various LLM models",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  toksum "Hello, world!" gpt-4
  toksum --file input.txt claude-3-opus-20240229
  toksum --list-models
  toksum --cost "Your text here" gpt-4
        """
    )
    
    parser.add_argument(
        "text",
        nargs="?",
        help="Text to count tokens for (use --file to read from file)"
    )
    
    parser.add_argument(
        "model",
        nargs="?",
        help="Model name (e.g., gpt-4, claude-3-opus-20240229)"
    )
    
    parser.add_argument(
        "--file", "-f",
        help="Read text from file instead of command line argument"
    )
    
    parser.add_argument(
        "--list-models", "-l",
        action="store_true",
        help="List all supported models"
    )
    
    parser.add_argument(
        "--cost", "-c",
        action="store_true",
        help="Show cost estimation along with token count"
    )
    
    parser.add_argument(
        "--output-tokens",
        action="store_true",
        help="Calculate cost for output tokens instead of input tokens"
    )
    
    parser.add_argument(
        "--verbose", "-v",
        action="store_true",
        help="Show verbose output"
    )
    
    args = parser.parse_args()
    
    try:
        if args.list_models:
            list_models()
            return
        
        if not args.model:
            parser.error("Model name is required unless using --list-models")
        
        # Get text input
        if args.file:
            try:
                with open(args.file, 'r', encoding='utf-8') as f:
                    text = f.read()
                if args.verbose:
                    print(f"Read {len(text)} characters from {args.file}")
            except FileNotFoundError:
                print(f"Error: File '{args.file}' not found", file=sys.stderr)
                sys.exit(1)
            except Exception as e:
                print(f"Error reading file: {e}", file=sys.stderr)
                sys.exit(1)
        elif args.text:
            text = args.text
        else:
            parser.error("Either provide text as argument or use --file option")
        
        # Count tokens
        try:
            token_count = count_tokens(text, args.model)
            
            if args.verbose:
                print(f"Model: {args.model}")
                print(f"Text length: {len(text)} characters")
                print(f"Token count: {token_count}")
            else:
                print(token_count)
            
            # Show cost estimation if requested
            if args.cost:
                input_cost = estimate_cost(token_count, args.model, input_tokens=True)
                output_cost = estimate_cost(token_count, args.model, input_tokens=False)
                
                if input_cost > 0 or output_cost > 0:
                    if args.verbose:
                        print(f"Estimated input cost: ${input_cost:.6f}")
                        print(f"Estimated output cost: ${output_cost:.6f}")
                    else:
                        cost = output_cost if args.output_tokens else input_cost
                        print(f"${cost:.6f}")
                else:
                    if args.verbose:
                        print("Cost estimation not available for this model")
        
        except UnsupportedModelError as e:
            print(f"Error: {e}", file=sys.stderr)
            if args.verbose:
                print("\nUse --list-models to see supported models", file=sys.stderr)
            sys.exit(1)
        
        except TokenizationError as e:
            print(f"Error: {e}", file=sys.stderr)
            sys.exit(1)
    
    except KeyboardInterrupt:
        print("\nInterrupted by user", file=sys.stderr)
        sys.exit(1)
    
    except Exception as e:
        print(f"Unexpected error: {e}", file=sys.stderr)
        if args.verbose:
            import traceback
            traceback.print_exc()
        sys.exit(1)




[docs]
def list_models() -> None:
    """
    List all supported models organized by provider.
    
    Displays a comprehensive list of all supported models grouped by their
    respective providers (OpenAI, Anthropic, Google, Meta, etc.). The output
    includes model counts per provider and a total count across all providers.
    
    The function:
        - Retrieves all supported models using get_supported_models()
        - Groups models by provider with clear section headers
        - Sorts models alphabetically within each provider
        - Shows model counts for each provider and overall total
        - Formats output for easy readability
    
    Output Format:
        .. code-block:: text
        
            Supported models:
            ==================================================
            
            OPENAI (25 models):
            ------------------------------
              gpt-3.5-turbo
              gpt-4
              gpt-4o
              ...
            
            ANTHROPIC (12 models):
            ------------------------------
              claude-3-haiku-20240307
              claude-3-opus-20240229
              ...
            
            Total: 200+ models
    
    Note:
        This function is typically called when the --list-models CLI flag is used.
        It provides users with a complete overview of available models for token
        counting and cost estimation.
    """
    models = get_supported_models()
    
    print("Supported models:")
    print("=" * 50)
    
    for provider, model_list in models.items():
        print(f"\n{provider.upper()} ({len(model_list)} models):")
        print("-" * 30)
        
        for model in sorted(model_list):
            print(f"  {model}")
    
    print(f"\nTotal: {sum(len(models) for models in models.values())} models")



if __name__ == "__main__":
    main()