Created
October 29, 2024 11:15
-
-
Save PriNova/e09301dcd53d556980e05c378fd1c92a to your computer and use it in GitHub Desktop.
TokenCounting with chunking and debouncing strategy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Types for our token counter | |
| interface TokenCount { | |
| whitespaceCount: number; | |
| tiktokenCount: number; | |
| } | |
| interface SelectionState { | |
| start: number; | |
| end: number; | |
| text: string; | |
| } | |
| // Cache for token counts | |
| class TokenCache { | |
| private cache: Map<string, TokenCount>; | |
| private maxSize: number; | |
| constructor(maxSize: number = 1000) { | |
| this.cache = new Map(); | |
| this.maxSize = maxSize; | |
| } | |
| get(key: string): TokenCount | undefined { | |
| return this.cache.get(key); | |
| } | |
| set(key: string, value: TokenCount): void { | |
| if (this.cache.size >= this.maxSize) { | |
| // Remove oldest entry when cache is full | |
| const firstKey = this.cache.keys().next().value; | |
| this.cache.delete(firstKey); | |
| } | |
| this.cache.set(key, value); | |
| } | |
| } | |
| // Main token counter class | |
| class OptimizedTokenCounter { | |
| private lastSelection: SelectionState | null = null; | |
| private tokenCache: TokenCache; | |
| private debounceTimeout: NodeJS.Timeout | null = null; | |
| private readonly debounceMs: number; | |
| private onUpdate: (count: TokenCount) => void; | |
| constructor( | |
| onUpdate: (count: TokenCount) => void, | |
| debounceMs: number = 250, | |
| maxCacheSize: number = 1000 | |
| ) { | |
| this.onUpdate = onUpdate; | |
| this.debounceMs = debounceMs; | |
| this.tokenCache = new TokenCache(maxCacheSize); | |
| } | |
| // Calculate tokens with chunking for large texts | |
| private async calculateTokens(text: string): Promise<TokenCount> { | |
| // Check cache first | |
| const cached = this.tokenCache.get(text); | |
| if (cached) { | |
| return cached; | |
| } | |
| // Simple whitespace count (can be done synchronously) | |
| const whitespaceCount = text.trim().split(/\s+/).length; | |
| // Tiktoken count (assuming async tiktoken implementation) | |
| // You'll need to import your actual tiktoken library | |
| const tiktokenCount = await this.calculateTiktokenCount(text); | |
| const result = { whitespaceCount, tiktokenCount }; | |
| // Cache the result | |
| this.tokenCache.set(text, result); | |
| return result; | |
| } | |
| // Handle text selection update | |
| public handleSelectionUpdate(selection: SelectionState): void { | |
| // Cancel previous timeout if it exists | |
| if (this.debounceTimeout) { | |
| clearTimeout(this.debounceTimeout); | |
| } | |
| // Quick check if selection hasn't changed | |
| if ( | |
| this.lastSelection && | |
| this.lastSelection.start === selection.start && | |
| this.lastSelection.end === selection.end | |
| ) { | |
| return; | |
| } | |
| this.lastSelection = selection; | |
| // Debounce the token calculation | |
| this.debounceTimeout = setTimeout(async () => { | |
| try { | |
| const count = await this.calculateTokens(selection.text); | |
| this.onUpdate(count); | |
| } catch (error) { | |
| console.error('Error calculating tokens:', error); | |
| } | |
| }, this.debounceMs); | |
| } | |
| // Example tiktoken calculation (replace with actual implementation) | |
| private async calculateTiktokenCount(text: string): Promise<number> { | |
| // Implement your tiktoken counting logic here | |
| // This is just a placeholder | |
| return new Promise((resolve) => { | |
| resolve(Math.floor(text.length * 0.6)); // Dummy implementation | |
| }); | |
| } | |
| } | |
| // Usage example | |
| const setupTokenCounter = (editorElement: HTMLElement) => { | |
| const updateUI = (count: TokenCount) => { | |
| console.log(`Whitespace tokens: ${count.whitespaceCount}`); | |
| console.log(`Tiktoken tokens: ${count.tiktokenCount}`); | |
| // Update your UI here | |
| }; | |
| const tokenCounter = new OptimizedTokenCounter(updateUI); | |
| // Add selection change listener | |
| editorElement.addEventListener('selectionchange', (event) => { | |
| const selection = window.getSelection(); | |
| if (!selection) return; | |
| const range = selection.getRangeAt(0); | |
| const selectedText = range.toString(); | |
| tokenCounter.handleSelectionUpdate({ | |
| start: range.startOffset, | |
| end: range.endOffset, | |
| text: selectedText | |
| }); | |
| }); | |
| return tokenCounter; | |
| }; | |
| // Example React component usage | |
| import React, { useEffect, useRef } from 'react'; | |
| const CodeEditor: React.FC = () => { | |
| const editorRef = useRef<HTMLDivElement>(null); | |
| useEffect(() => { | |
| if (editorRef.current) { | |
| const tokenCounter = setupTokenCounter(editorRef.current); | |
| // Cleanup | |
| return () => { | |
| // Add any necessary cleanup | |
| }; | |
| } | |
| }, []); | |
| return ( | |
| <div | |
| ref={editorRef} | |
| className="w-full h-96 overflow-auto border border-gray-300 p-4" | |
| contentEditable | |
| > | |
| {/* Your editor content here */} | |
| </div> | |
| ); | |
| }; | |
| export default CodeEditor; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment