Plugin Directory

Changeset 3457500


Ignore:
Timestamp:
02/09/2026 09:50:18 PM (7 weeks ago)
Author:
visiblefirst
Message:

Release 3.2.25 - Fix llms.txt token limit for large sites

Location:
visiblefirst
Files:
6 edited
1 copied

Legend:

Unmodified
Added
Removed
  • visiblefirst/tags/3.2.25/includes/class-visibl-llms-generator.php

    r3457294 r3457500  
    623623
    624624    /**
     625     * Truncate crawl data to fit within Claude's token limits
     626     *
     627     * @param array $crawl_data Full crawl data
     628     * @return array Truncated crawl data
     629     */
     630    private static function truncate_crawl_data($crawl_data) {
     631        $max_pages = 100;  // Limit to 100 most important pages
     632        $max_json_chars = 150000;  // ~40k tokens for content, leaving room for system prompt
     633
     634        $truncated = $crawl_data;
     635
     636        // 1. Prioritize and limit pages
     637        if (!empty($truncated['pages']) && count($truncated['pages']) > $max_pages) {
     638            // Sort by priority: nav pages first, then by post type (page > post)
     639            usort($truncated['pages'], function($a, $b) {
     640                // Nav pages first
     641                if (($a['in_nav'] ?? false) !== ($b['in_nav'] ?? false)) {
     642                    return ($b['in_nav'] ?? false) ? 1 : -1;
     643                }
     644                // Pages before posts
     645                $type_order = ['page' => 0, 'post' => 1];
     646                $a_order = $type_order[$a['post_type'] ?? 'post'] ?? 2;
     647                $b_order = $type_order[$b['post_type'] ?? 'post'] ?? 2;
     648                return $a_order - $b_order;
     649            });
     650
     651            $truncated['pages'] = array_slice($truncated['pages'], 0, $max_pages);
     652            $truncated['_truncated_pages'] = true;
     653            $truncated['_original_page_count'] = count($crawl_data['pages']);
     654        }
     655
     656        // 2. Limit industry signals to top 20
     657        if (!empty($truncated['industry_signals']) && count($truncated['industry_signals']) > 20) {
     658            $truncated['industry_signals'] = array_slice($truncated['industry_signals'], 0, 20);
     659        }
     660
     661        // 3. Limit categories to 30
     662        if (!empty($truncated['categories']) && count($truncated['categories']) > 30) {
     663            $truncated['categories'] = array_slice($truncated['categories'], 0, 30);
     664        }
     665
     666        // 4. Limit products/services to 50
     667        if (!empty($truncated['products_or_services']) && count($truncated['products_or_services']) > 50) {
     668            $truncated['products_or_services'] = array_slice($truncated['products_or_services'], 0, 50);
     669        }
     670
     671        // 5. Remove verbose schema data if still too large
     672        $json = json_encode($truncated, JSON_UNESCAPED_SLASHES);
     673        if (strlen($json) > $max_json_chars && !empty($truncated['schema_data'])) {
     674            $truncated['schema_data'] = ['_truncated' => 'Schema data removed due to size limits'];
     675            $json = json_encode($truncated, JSON_UNESCAPED_SLASHES);
     676        }
     677
     678        // 6. Final check - if still too large, reduce pages further
     679        while (strlen($json) > $max_json_chars && count($truncated['pages']) > 20) {
     680            $truncated['pages'] = array_slice($truncated['pages'], 0, (int)(count($truncated['pages']) * 0.7));
     681            $json = json_encode($truncated, JSON_UNESCAPED_SLASHES);
     682        }
     683
     684        return $truncated;
     685    }
     686
     687    /**
    625688     * Phase 2: AI Enhancement
    626689     *
     
    634697        $system_prompt = self::get_system_prompt();
    635698
     699        // Truncate crawl data to fit within token limits
     700        $truncated_data = self::truncate_crawl_data($crawl_data);
     701
    636702        // Build the user message with crawl data
    637         $user_message = "Here is the crawl data for {$crawl_data['site_url']}:\n\n" .
    638                         json_encode($crawl_data, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
     703        $user_message = "Here is the crawl data for {$truncated_data['site_url']}:\n\n" .
     704                        json_encode($truncated_data, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
    639705
    640706        // Call Claude API via VF AI Gateway
  • visiblefirst/tags/3.2.25/readme.txt

    r3457315 r3457500  
    55Tested up to: 6.9
    66Requires PHP: 7.4
    7 Stable tag: 3.2.24
     7Stable tag: 3.2.25
    88License: GPLv2 or later
    99License URI: https://www.gnu.org/licenses/gpl-2.0.html
     
    394394== Changelog ==
    395395
     396= 3.2.25 =
     397* FIX: llms.txt generation now handles large sites by truncating crawl data to fit API token limits
     398* Prevents "prompt too long" errors on sites with extensive content
     399
    396400= 3.2.24 =
    397401* WordPress.org Plugin Directory launch
     
    427431== Upgrade Notice ==
    428432
     433= 3.2.25 =
     434Fixes llms.txt generation errors on large sites.
     435
    429436= 3.2.24 =
    430437Now available on WordPress.org! Install directly from your WordPress dashboard. All features included free.
  • visiblefirst/tags/3.2.25/visiblefirst.php

    r3457294 r3457500  
    33 * Plugin Name: VisibleFirst
    44 * Description: AI + SEO + Social visibility in one plugin. Complete visibility optimization for WordPress.
    5  * Version: 3.2.24
     5 * Version: 3.2.25
    66 * Author: VisibleFirst
    77 * Author URI: https://visiblefirst.com
     
    1616
    1717// Plugin constants
    18 define('VISIBL_VERSION', '3.2.24');
     18define('VISIBL_VERSION', '3.2.25');
    1919define('VISIBL_PLUGIN_DIR', plugin_dir_path(__FILE__));
    2020define('VISIBL_PLUGIN_URL', plugin_dir_url(__FILE__));
  • visiblefirst/trunk/includes/class-visibl-llms-generator.php

    r3457294 r3457500  
    623623
    624624    /**
     625     * Truncate crawl data to fit within Claude's token limits
     626     *
     627     * @param array $crawl_data Full crawl data
     628     * @return array Truncated crawl data
     629     */
     630    private static function truncate_crawl_data($crawl_data) {
     631        $max_pages = 100;  // Limit to 100 most important pages
     632        $max_json_chars = 150000;  // ~40k tokens for content, leaving room for system prompt
     633
     634        $truncated = $crawl_data;
     635
     636        // 1. Prioritize and limit pages
     637        if (!empty($truncated['pages']) && count($truncated['pages']) > $max_pages) {
     638            // Sort by priority: nav pages first, then by post type (page > post)
     639            usort($truncated['pages'], function($a, $b) {
     640                // Nav pages first
     641                if (($a['in_nav'] ?? false) !== ($b['in_nav'] ?? false)) {
     642                    return ($b['in_nav'] ?? false) ? 1 : -1;
     643                }
     644                // Pages before posts
     645                $type_order = ['page' => 0, 'post' => 1];
     646                $a_order = $type_order[$a['post_type'] ?? 'post'] ?? 2;
     647                $b_order = $type_order[$b['post_type'] ?? 'post'] ?? 2;
     648                return $a_order - $b_order;
     649            });
     650
     651            $truncated['pages'] = array_slice($truncated['pages'], 0, $max_pages);
     652            $truncated['_truncated_pages'] = true;
     653            $truncated['_original_page_count'] = count($crawl_data['pages']);
     654        }
     655
     656        // 2. Limit industry signals to top 20
     657        if (!empty($truncated['industry_signals']) && count($truncated['industry_signals']) > 20) {
     658            $truncated['industry_signals'] = array_slice($truncated['industry_signals'], 0, 20);
     659        }
     660
     661        // 3. Limit categories to 30
     662        if (!empty($truncated['categories']) && count($truncated['categories']) > 30) {
     663            $truncated['categories'] = array_slice($truncated['categories'], 0, 30);
     664        }
     665
     666        // 4. Limit products/services to 50
     667        if (!empty($truncated['products_or_services']) && count($truncated['products_or_services']) > 50) {
     668            $truncated['products_or_services'] = array_slice($truncated['products_or_services'], 0, 50);
     669        }
     670
     671        // 5. Remove verbose schema data if still too large
     672        $json = json_encode($truncated, JSON_UNESCAPED_SLASHES);
     673        if (strlen($json) > $max_json_chars && !empty($truncated['schema_data'])) {
     674            $truncated['schema_data'] = ['_truncated' => 'Schema data removed due to size limits'];
     675            $json = json_encode($truncated, JSON_UNESCAPED_SLASHES);
     676        }
     677
     678        // 6. Final check - if still too large, reduce pages further
     679        while (strlen($json) > $max_json_chars && count($truncated['pages']) > 20) {
     680            $truncated['pages'] = array_slice($truncated['pages'], 0, (int)(count($truncated['pages']) * 0.7));
     681            $json = json_encode($truncated, JSON_UNESCAPED_SLASHES);
     682        }
     683
     684        return $truncated;
     685    }
     686
     687    /**
    625688     * Phase 2: AI Enhancement
    626689     *
     
    634697        $system_prompt = self::get_system_prompt();
    635698
     699        // Truncate crawl data to fit within token limits
     700        $truncated_data = self::truncate_crawl_data($crawl_data);
     701
    636702        // Build the user message with crawl data
    637         $user_message = "Here is the crawl data for {$crawl_data['site_url']}:\n\n" .
    638                         json_encode($crawl_data, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
     703        $user_message = "Here is the crawl data for {$truncated_data['site_url']}:\n\n" .
     704                        json_encode($truncated_data, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
    639705
    640706        // Call Claude API via VF AI Gateway
  • visiblefirst/trunk/readme.txt

    r3457315 r3457500  
    55Tested up to: 6.9
    66Requires PHP: 7.4
    7 Stable tag: 3.2.24
     7Stable tag: 3.2.25
    88License: GPLv2 or later
    99License URI: https://www.gnu.org/licenses/gpl-2.0.html
     
    394394== Changelog ==
    395395
     396= 3.2.25 =
     397* FIX: llms.txt generation now handles large sites by truncating crawl data to fit API token limits
     398* Prevents "prompt too long" errors on sites with extensive content
     399
    396400= 3.2.24 =
    397401* WordPress.org Plugin Directory launch
     
    427431== Upgrade Notice ==
    428432
     433= 3.2.25 =
     434Fixes llms.txt generation errors on large sites.
     435
    429436= 3.2.24 =
    430437Now available on WordPress.org! Install directly from your WordPress dashboard. All features included free.
  • visiblefirst/trunk/visiblefirst.php

    r3457294 r3457500  
    33 * Plugin Name: VisibleFirst
    44 * Description: AI + SEO + Social visibility in one plugin. Complete visibility optimization for WordPress.
    5  * Version: 3.2.24
     5 * Version: 3.2.25
    66 * Author: VisibleFirst
    77 * Author URI: https://visiblefirst.com
     
    1616
    1717// Plugin constants
    18 define('VISIBL_VERSION', '3.2.24');
     18define('VISIBL_VERSION', '3.2.25');
    1919define('VISIBL_PLUGIN_DIR', plugin_dir_path(__FILE__));
    2020define('VISIBL_PLUGIN_URL', plugin_dir_url(__FILE__));
Note: See TracChangeset for help on using the changeset viewer.