11import functools
2- import re
32import string
43import sys
54import typing as t
@@ -16,9 +15,6 @@ def __html__(self) -> str:
1615
1716__version__ = "2.1.4.dev"
1817
19- _strip_comments_re = re .compile (r"<!--.*?-->" , re .DOTALL )
20- _strip_tags_re = re .compile (r"<.*?>" , re .DOTALL )
21-
2218
2319def _simple_escaping_wrapper (func : "t.Callable[_P, str]" ) -> "t.Callable[_P, Markup]" :
2420 @functools .wraps (func )
@@ -162,10 +158,41 @@ def striptags(self) -> str:
162158 >>> Markup("Main »\t <em>About</em>").striptags()
163159 'Main » About'
164160 """
165- # Use two regexes to avoid ambiguous matches.
166- value = _strip_comments_re .sub ("" , self )
167- value = _strip_tags_re .sub ("" , value )
168- value = " " .join (value .split ())
161+ # collapse spaces
162+ value = " " .join (self .split ())
163+
164+ # Look for comments then tags separately. Otherwise, a comment that
165+ # contains a tag would end early, leaving some of the comment behind.
166+
167+ while True :
168+ # keep finding comment start marks
169+ start = value .find ("<!--" )
170+
171+ if start == - 1 :
172+ break
173+
174+ # find a comment end mark beyond the start, otherwise stop
175+ end = value .find ("-->" , start )
176+
177+ if end == - 1 :
178+ break
179+
180+ value = f"{ value [:start ]} { value [end + 3 :]} "
181+
182+ # remove tags using the same method
183+ while True :
184+ start = value .find ("<" )
185+
186+ if start == - 1 :
187+ break
188+
189+ end = value .find (">" , start )
190+
191+ if end == - 1 :
192+ break
193+
194+ value = f"{ value [:start ]} { value [end + 1 :]} "
195+
169196 return self .__class__ (value ).unescape ()
170197
171198 @classmethod
0 commit comments