Changeset 3480216
- Timestamp:
- 03/11/2026 01:47:37 PM (4 weeks ago)
- Location:
- mescio-for-agents/trunk
- Files:
-
- 4 edited
-
includes/class-llms-endpoints.php (modified) (6 diffs)
-
includes/class-markdown-generator.php (modified) (1 diff)
-
mescio-for-agents.php (modified) (2 diffs)
-
readme.txt (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
mescio-for-agents/trunk/includes/class-llms-endpoints.php
r3480063 r3480216 238 238 $lines[] = ''; 239 239 foreach ( $pages as $page ) { 240 $page = self::resolve_translated_post( $page ); 240 241 $desc = Mescio_For_Agents_Markdown::get_excerpt( $page ); 241 242 $lines[] = '- [' . get_the_title( $page ) . '](' . get_permalink( $page->ID ) . ')' … … 259 260 $lines[] = ''; 260 261 foreach ( $posts as $post ) { 262 $post = self::resolve_translated_post( $post ); 261 263 $desc = Mescio_For_Agents_Markdown::get_excerpt( $post ); 262 264 $date = get_the_date( 'Y-m-d', $post ); … … 281 283 $by_cat = []; 282 284 foreach ( $products as $product ) { 283 $cats = get_the_terms( $product->ID, 'product_cat' ); 284 $cat = ( $cats && ! is_wp_error( $cats ) ) ? $cats[0]->name : 'Products'; 285 $product = self::resolve_translated_post( $product ); 286 $cats = get_the_terms( $product->ID, 'product_cat' ); 287 $cat = ( $cats && ! is_wp_error( $cats ) ) ? $cats[0]->name : 'Products'; 285 288 $by_cat[ $cat ][] = $product; 286 289 } … … 324 327 $lines[] = ''; 325 328 foreach ( $cpt_posts as $cpt_post ) { 326 $desc = Mescio_For_Agents_Markdown::get_excerpt( $cpt_post ); 327 $lines[] = '- [' . get_the_title( $cpt_post ) . '](' . get_permalink( $cpt_post->ID ) . ')' 329 $cpt_post = self::resolve_translated_post( $cpt_post ); 330 $desc = Mescio_For_Agents_Markdown::get_excerpt( $cpt_post ); 331 $lines[] = '- [' . get_the_title( $cpt_post ) . '](' . get_permalink( $cpt_post->ID ) . ')' 328 332 . ( $desc ? ': ' . $desc : '' ); 329 333 } … … 332 336 333 337 return implode( "\n", $lines ); 338 } 339 340 /** 341 * Given a post in the default language, return its translated version 342 * matching the current language context (WPML or Polylang). 343 * Falls back to the original post if no translation exists. 344 */ 345 private static function resolve_translated_post( WP_Post $post ): WP_Post { 346 // WPML 347 if ( function_exists( 'icl_object_id' ) ) { 348 $lang = apply_filters( 'wpml_current_language', null ); // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound 349 if ( $lang ) { 350 $translated_id = icl_object_id( $post->ID, $post->post_type, false, $lang ); 351 if ( $translated_id && $translated_id !== $post->ID ) { 352 $translated = get_post( $translated_id ); 353 if ( $translated instanceof WP_Post ) return $translated; 354 } 355 } 356 } 357 // Polylang 358 if ( function_exists( 'pll_current_language' ) && function_exists( 'pll_get_post' ) ) { 359 $lang = pll_current_language( 'slug' ); 360 if ( $lang ) { 361 $translated_id = pll_get_post( $post->ID, $lang ); 362 if ( $translated_id && $translated_id !== $post->ID ) { 363 $translated = get_post( $translated_id ); 364 if ( $translated instanceof WP_Post ) return $translated; 365 } 366 } 367 } 368 return $post; 334 369 } 335 370 … … 367 402 368 403 foreach ( $all_posts as $post ) { 404 $post = self::resolve_translated_post( $post ); 369 405 $lines[] = Mescio_For_Agents_Markdown::post_to_markdown( $post ); 370 406 $lines[] = ''; -
mescio-for-agents/trunk/includes/class-markdown-generator.php
r3480063 r3480216 316 316 /** 317 317 * Return the AI description or fall back to WP excerpt. 318 * 319 * On multilingual sites (WPML, Polylang) the $post object may be the 320 * default-language post even when the index is being built for a translated 321 * URL. We resolve the correct translated post ID so the excerpt is always 322 * in the right language. 323 * 324 * Shortcodes (WPBakery, WoodMart, etc.) are stripped before returning so 325 * that builder markup never leaks into the llms.txt index. 318 326 */ 319 327 public static function get_excerpt( WP_Post $post ): string { 328 // Resolve the translated post if a language plugin is active. 329 $post = self::maybe_get_translated_post( $post ); 330 320 331 $ai = get_post_meta( $post->ID, self::META_DESCRIPTION, true ); 321 if ( $ai ) return wp_strip_all_tags( $ai ); 322 if ( $post->post_excerpt ) return wp_strip_all_tags( $post->post_excerpt ); 323 // Auto-trim from content 324 $plain = wp_strip_all_tags( $post->post_content ); 332 if ( $ai ) return self::strip_shortcode_noise( wp_strip_all_tags( $ai ) ); 333 334 if ( $post->post_excerpt ) { 335 return self::strip_shortcode_noise( wp_strip_all_tags( $post->post_excerpt ) ); 336 } 337 338 // Auto-trim from content — strip shortcode tags first, then HTML. 339 $plain = self::strip_shortcode_noise( $post->post_content ); 340 $plain = wp_strip_all_tags( $plain ); 325 341 $plain = preg_replace( '/\s+/', ' ', $plain ); 326 342 $words = explode( ' ', trim( $plain ) ); 327 343 return implode( ' ', array_slice( $words, 0, 40 ) ); 344 } 345 346 /** 347 * Strip page-builder shortcode tags (and their content) from a raw string. 348 * Used by get_excerpt() so that llms.txt index lines are clean. 349 * 350 * This mirrors the logic in html_to_markdown() but operates on raw 351 * post_content / post_excerpt before do_shortcode() is ever called. 352 */ 353 private static function strip_shortcode_noise( string $text ): string { 354 $builders = [ 'vc_', 'et_pb_', 'fl_', 'fusion_', 'cs_', 'x_', 'woodmart_' ]; 355 foreach ( $builders as $prefix ) { 356 for ( $i = 0; $i < 10; $i++ ) { 357 $before = $text; 358 // Self-closing 359 $text = preg_replace( '/\[' . $prefix . '[^\]]*\/\]/s', '', $text ); 360 // Open+content+close pairs 361 $text = preg_replace( '/\[' . $prefix . '[^\]]*\].*?\[\/' . $prefix . '[^\]]*\]/s', '', $text ); 362 // Orphaned open/close tags 363 $text = preg_replace( '/\[\/?(' . $prefix . ')[^\]]*\]/', '', $text ); 364 if ( $text === $before ) break; 365 } 366 } 367 // SiteOrigin 368 $text = preg_replace( '/\[\/?so[w]?_[^\]]*\]/s', '', $text ); 369 // Generic: any shortcode tag longer than 200 chars (base64 blobs) 370 $text = preg_replace( '/\[[^\]]{200,}\]/', '', $text ); 371 // Any remaining shortcode bracket pairs 372 $text = preg_replace( '/\[[^\]]+\]/', '', $text ); 373 374 return trim( $text ); 375 } 376 377 /** 378 * If WPML or Polylang is active, return the translated version of $post 379 * matching the current language context. Falls back to $post unchanged. 380 */ 381 private static function maybe_get_translated_post( WP_Post $post ): WP_Post { 382 // WPML 383 if ( function_exists( 'icl_object_id' ) ) { 384 $lang = apply_filters( 'wpml_current_language', null ); // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound 385 $translated = $lang ? icl_object_id( $post->ID, $post->post_type, false, $lang ) : null; 386 if ( $translated && $translated !== $post->ID ) { 387 $p = get_post( $translated ); 388 if ( $p instanceof WP_Post ) return $p; 389 } 390 } 391 // Polylang 392 if ( function_exists( 'pll_current_language' ) && function_exists( 'pll_get_post' ) ) { 393 $lang = pll_current_language( 'slug' ); 394 $translated = $lang ? pll_get_post( $post->ID, $lang ) : null; 395 if ( $translated && $translated !== $post->ID ) { 396 $p = get_post( $translated ); 397 if ( $p instanceof WP_Post ) return $p; 398 } 399 } 400 return $post; 328 401 } 329 402 -
mescio-for-agents/trunk/mescio-for-agents.php
r3480063 r3480216 4 4 * Plugin URI: https://wordpress.org/plugins/mescio-for-agents/ 5 5 * Description: Mescio for Agents serves your posts, pages and WooCommerce products as clean Markdown to AI agents and GPT crawlers — using HTTP content negotiation (Accept: text/markdown). Human visitors never notice a thing. 6 * Version: 1.6. 26 * Version: 1.6.3 7 7 * Requires at least: 6.0 8 8 * Requires PHP: 8.0 … … 52 52 53 53 /** Plugin version — must match the Version header above. */ 54 const VERSION = '1.6. 2';54 const VERSION = '1.6.3'; 55 55 56 56 /** Post types served by default (filterable via mescio_enabled_post_types). */ -
mescio-for-agents/trunk/readme.txt
r3480063 r3480216 5 5 Tested up to: 6.9 6 6 Requires PHP: 8.0 7 Stable tag: 1.6. 27 Stable tag: 1.6.3 8 8 License: GPLv2 or later 9 9 License URI: https://www.gnu.org/licenses/gpl-2.0.html … … 133 133 == Changelog == 134 134 135 = 1.6.2 = 136 * Fix: page builder shortcodes (WPBakery, WoodMart, Divi, ecc.) ora rimossi correttamente prima dell'esecuzione — elimina CSS/attributi blob dall'output Markdown 137 * Fix: content negotiation (Accept: text/markdown) ora funziona correttamente su siti multilingua con WPML/Polylang 135 = 1.6.3 = 136 * Fix: shortcode builder tags (WPBakery, WoodMart, ecc.) rimossi anche dagli excerpt nel llms.txt index 137 * Fix: llms.txt ora serve titoli, excerpt e permalink nella lingua corretta su siti multilingua (WPML/Polylang) 138 * Fix: llms-full.txt ora serve il contenuto completo dei post nella lingua corretta (WPML/Polylang) 139 138 140 139 141 = 1.6.1 =
Note: See TracChangeset
for help on using the changeset viewer.