Changeset 3484551
- Timestamp:
- 03/17/2026 08:19:04 AM (2 weeks ago)
- Location:
- intufind
- Files:
-
- 36 added
- 3 edited
-
tags/1.4.0 (added)
-
tags/1.4.0/admin (added)
-
tags/1.4.0/admin/class-intufind-admin.php (added)
-
tags/1.4.0/admin/class-intufind-components.php (added)
-
tags/1.4.0/admin/css (added)
-
tags/1.4.0/admin/css/intufind-admin.css (added)
-
tags/1.4.0/admin/js (added)
-
tags/1.4.0/admin/js/intufind-admin.js (added)
-
tags/1.4.0/admin/partials (added)
-
tags/1.4.0/admin/partials/chat-display.php (added)
-
tags/1.4.0/admin/partials/recommendations-display.php (added)
-
tags/1.4.0/admin/partials/search-display.php (added)
-
tags/1.4.0/admin/partials/settings-display.php (added)
-
tags/1.4.0/admin/partials/status-display.php (added)
-
tags/1.4.0/admin/partials/sync-display.php (added)
-
tags/1.4.0/includes (added)
-
tags/1.4.0/includes/class-intufind-api.php (added)
-
tags/1.4.0/includes/class-intufind-chat-widget.php (added)
-
tags/1.4.0/includes/class-intufind-content-extractor.php (added)
-
tags/1.4.0/includes/class-intufind-exclusions.php (added)
-
tags/1.4.0/includes/class-intufind-list-columns.php (added)
-
tags/1.4.0/includes/class-intufind-mcp.php (added)
-
tags/1.4.0/includes/class-intufind-plugin.php (added)
-
tags/1.4.0/includes/class-intufind-recommendations-override.php (added)
-
tags/1.4.0/includes/class-intufind-search-override.php (added)
-
tags/1.4.0/includes/class-intufind-search-widget.php (added)
-
tags/1.4.0/includes/class-intufind-shortcodes.php (added)
-
tags/1.4.0/includes/class-intufind-sync-status.php (added)
-
tags/1.4.0/includes/class-intufind-sync.php (added)
-
tags/1.4.0/includes/integrations (added)
-
tags/1.4.0/includes/integrations/class-intufind-facetwp.php (added)
-
tags/1.4.0/intufind.php (added)
-
tags/1.4.0/languages (added)
-
tags/1.4.0/languages/intufind.pot (added)
-
tags/1.4.0/readme.txt (added)
-
tags/1.4.0/uninstall.php (added)
-
trunk/includes/class-intufind-content-extractor.php (modified) (7 diffs)
-
trunk/intufind.php (modified) (2 diffs)
-
trunk/readme.txt (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
intufind/trunk/includes/class-intufind-content-extractor.php
r3463908 r3484551 65 65 66 66 // Primary content. 67 $primary_content = $this-> clean_content( $post->post_content );67 $primary_content = $this->html_to_markdown( $post->post_content ); 68 68 if ( ! empty( $primary_content ) ) { 69 69 $content_parts[] = $primary_content; … … 138 138 139 139 // Product description. 140 $description = $this-> clean_content( $product->get_description() );140 $description = $this->html_to_markdown( $product->get_description() ); 141 141 if ( ! empty( $description ) ) { 142 142 $content_parts[] = $description; … … 349 349 350 350 case 'wysiwyg': 351 return $this-> clean_content( $field_value );351 return $this->html_to_markdown( $field_value ); 352 352 353 353 case 'select': … … 631 631 */ 632 632 private function extract_divi_content( $post ) { 633 // Divi content is in shortcodes - clean the content to get text.634 633 $content = $post->post_content; 635 634 … … 638 637 $content = preg_replace( '/\[\/et_pb_[^\]]*\]/', '', $content ); 639 638 640 return $this-> clean_content( $content );639 return $this->html_to_markdown( $content ); 641 640 } 642 641 … … 758 757 759 758 /** 760 * Clean content by stripping HTML, shortcodes, and extra whitespace. 759 * Convert HTML content to markdown, preserving document structure 760 * (headings, lists, bold, italic, links) for AI comprehension. 761 * 762 * @param string $content Raw HTML content. 763 * @return string Markdown-formatted content. 764 */ 765 private function html_to_markdown( $content ) { 766 if ( empty( $content ) ) { 767 return ''; 768 } 769 770 $content = strip_shortcodes( $content ); 771 772 // Block-level elements: convert before stripping tags. 773 // Headings. 774 $content = preg_replace_callback( 775 '/<h([1-6])[^>]*>(.*?)<\/h\1>/si', 776 function ( $m ) { 777 return "\n\n" . str_repeat( '#', (int) $m[1] ) . ' ' . trim( wp_strip_all_tags( $m[2] ) ) . "\n\n"; 778 }, 779 $content 780 ); 781 782 // Blockquotes. 783 $content = preg_replace_callback( 784 '/<blockquote[^>]*>(.*?)<\/blockquote>/si', 785 function ( $m ) { 786 $text = trim( wp_strip_all_tags( $m[1] ) ); 787 $lines = explode( "\n", $text ); 788 return "\n\n" . implode( "\n", array_map( fn( $l ) => '> ' . trim( $l ), $lines ) ) . "\n\n"; 789 }, 790 $content 791 ); 792 793 // List items — unordered. 794 $content = preg_replace_callback( 795 '/<ul[^>]*>(.*?)<\/ul>/si', 796 function ( $m ) { 797 $items = array(); 798 preg_match_all( '/<li[^>]*>(.*?)<\/li>/si', $m[1], $li ); 799 foreach ( $li[1] as $item ) { 800 $items[] = '- ' . trim( wp_strip_all_tags( $item ) ); 801 } 802 return "\n\n" . implode( "\n", $items ) . "\n\n"; 803 }, 804 $content 805 ); 806 807 // List items — ordered. 808 $content = preg_replace_callback( 809 '/<ol[^>]*>(.*?)<\/ol>/si', 810 function ( $m ) { 811 $items = array(); 812 $n = 1; 813 preg_match_all( '/<li[^>]*>(.*?)<\/li>/si', $m[1], $li ); 814 foreach ( $li[1] as $item ) { 815 $items[] = $n . '. ' . trim( wp_strip_all_tags( $item ) ); 816 ++$n; 817 } 818 return "\n\n" . implode( "\n", $items ) . "\n\n"; 819 }, 820 $content 821 ); 822 823 // Paragraphs and line breaks. 824 $content = preg_replace( '/<\/p>\s*<p[^>]*>/si', "\n\n", $content ); 825 $content = preg_replace( '/<p[^>]*>/si', "\n\n", $content ); 826 $content = preg_replace( '/<\/p>/si', "\n\n", $content ); 827 $content = preg_replace( '/<br\s*\/?>/si', "\n", $content ); 828 $content = preg_replace( '/<hr\s*\/?>/si', "\n\n---\n\n", $content ); 829 830 // Inline elements. 831 $content = preg_replace( '/<(strong|b)[^>]*>(.*?)<\/\1>/si', '**$2**', $content ); 832 $content = preg_replace( '/<(em|i)[^>]*>(.*?)<\/\1>/si', '*$2*', $content ); 833 $content = preg_replace( '/<code[^>]*>(.*?)<\/code>/si', '`$1`', $content ); 834 835 // Links. 836 $content = preg_replace_callback( 837 '/<a[^>]+href=["\']([^"\']+)["\'][^>]*>(.*?)<\/a>/si', 838 function ( $m ) { 839 $text = trim( wp_strip_all_tags( $m[2] ) ); 840 return $text ? "[$text]($m[1])" : ''; 841 }, 842 $content 843 ); 844 845 // Images — extract alt text. 846 $content = preg_replace_callback( 847 '/<img[^>]+alt=["\']([^"\']*)["\'][^>]*>/si', 848 function ( $m ) { 849 return ! empty( $m[1] ) ? $m[1] : ''; 850 }, 851 $content 852 ); 853 854 // Strip remaining HTML tags. 855 $content = wp_strip_all_tags( $content ); 856 857 // Decode HTML entities. 858 $content = html_entity_decode( $content, ENT_QUOTES | ENT_HTML5, 'UTF-8' ); 859 860 // Normalize whitespace: collapse 3+ newlines to 2, trim lines. 861 $content = preg_replace( '/\n{3,}/', "\n\n", $content ); 862 $content = preg_replace( '/[ \t]+/', ' ', $content ); 863 $content = preg_replace( '/^ +| +$/m', '', $content ); 864 865 return trim( $content ); 866 } 867 868 /** 869 * Strip HTML to plain text for excerpts and short fields. 761 870 * 762 871 * @param string $content Raw content. 763 * @return string Cleaned content.872 * @return string Plain text. 764 873 */ 765 874 private function clean_content( $content ) { … … 768 877 } 769 878 770 // Strip shortcodes.771 879 $content = strip_shortcodes( $content ); 772 773 // Strip HTML tags.774 880 $content = wp_strip_all_tags( $content ); 775 776 // Decode HTML entities.777 881 $content = html_entity_decode( $content, ENT_QUOTES | ENT_HTML5, 'UTF-8' ); 778 779 // Normalize whitespace.780 882 $content = preg_replace( '/\s+/', ' ', $content ); 781 883 -
intufind/trunk/intufind.php
r3476603 r3484551 4 4 * Plugin URI: https://intufind.com/integrations/wordpress 5 5 * Description: AI-powered search and chat for WordPress. Syncs your content to the cloud for semantic search, intelligent recommendations, and conversational AI. 6 * Version: 1. 3.06 * Version: 1.4.0 7 7 * Requires at least: 6.0 8 8 * Requires PHP: 8.0 … … 26 26 * Plugin constants. 27 27 */ 28 define( 'INTUFIND_VERSION', '1. 3.0' );28 define( 'INTUFIND_VERSION', '1.4.0' ); 29 29 define( 'INTUFIND_PLUGIN_FILE', __FILE__ ); 30 30 define( 'INTUFIND_PLUGIN_DIR', plugin_dir_path( __FILE__ ) ); -
intufind/trunk/readme.txt
r3476603 r3484551 5 5 Tested up to: 6.9 6 6 Requires PHP: 8.0 7 Stable tag: 1. 3.07 Stable tag: 1.4.0 8 8 WC tested up to: 9.6 9 9 License: GPLv2 or later … … 215 215 == Changelog == 216 216 217 = 1.4.0 = 218 * Content sync now preserves document structure (headings, lists, bold, links) as markdown for better AI search and chat quality 219 * Added html_to_markdown() converter for rich content fields (post content, product descriptions, ACF WYSIWYG, Divi builder) 220 * Retained plain-text extraction for short fields like excerpts and text-only ACF fields 221 217 222 = 1.3.0 = 218 223 * Added FacetWP integration — FacetWP Search facets now use Intufind semantic search automatically … … 299 304 == Upgrade Notice == 300 305 306 = 1.4.0 = 307 Content sync now converts HTML to markdown, preserving headings, lists, and formatting for significantly better AI search and chat responses. Re-sync recommended after updating. 308 301 309 = 1.3.0 = 302 310 Adds native FacetWP support. FacetWP Search facets automatically use Intufind semantic search when the Search Override is enabled — no configuration needed.
Note: See TracChangeset
for help on using the changeset viewer.