Skip to content

Commit 5b84792

Browse files
committed
fix(TAG_FULLMATCH): handle invalid /
Also update changelog for the previous commit. closes #108
1 parent 45b0492 commit 5b84792

4 files changed

Lines changed: 22 additions & 9 deletions

File tree

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
Unreleased
2+
----------
3+
- Fixed an issue in handling of / in tags. (#108)
4+
- Fixed a false-positive detection of invalid external links. (#109)
5+
16
v0.49.2
27
-------
38
- Fixed an issue in ``Template.normal_name()`` causing IndexError on empty/invalid template names, e.g. ``{{Template:}}``. (#105)

tests/test_tag.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,16 @@ def test_contents():
7676
assert t.contents == '\nc\n'
7777
t.contents = 'n'
7878
assert t.string == '<t>n</t>'
79+
7980
t = Tag('<t></t>')
8081
assert t.contents == ''
8182
t.contents = 'n'
8283
assert t.string == '<t>n</t>'
84+
8385
t = Tag('<t/>')
8486
assert t.contents == ''
8587
t.contents = 'n'
86-
assert t.string == '<t>n</t>'
88+
assert t.string == '<t/>n</t>'
8789

8890

8991
def test_get_attr_value():
@@ -165,3 +167,11 @@ def test_ref_with_invalid_attr(): # 47,48
165167
assert Tag('<ref name="a"3></ref>').attrs == {'name': 'a'}
166168
assert Tag('<ref name=""></ref>').attrs == {'name': ''}
167169
assert Tag('<ref "16/32"></ref>').attrs == {}
170+
171+
172+
def test_ref_tag_name(): # 108
173+
# This is actually an invalid syntax on Mediawiki, but the same syntax
174+
# is valid for `pre`, `noinclude`, and `includeonly`.
175+
# I think it should be valid and the / should be treated as an invalid
176+
# attribute and be ignored like in normal HTML tags.
177+
assert Tag('<ref/ ></ref>').name == 'ref'

wikitextparser/_spans.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,9 @@
149149
ATTRS_PATTERN = ( # noqa
150150
rb'(?<attr>'
151151
rb'[' + SPACE_CHARS + rb']++(?>' + ATTR_NAME + ATTR_VAL + rb')'
152-
# Invalid attribute. Todo: could the / be removed? see
153-
# https://stackoverflow.com/a/3558200/2705757
154-
+ rb'|(?>[^>/]++|/(?!\s*+>))++'
152+
# See https://stackoverflow.com/a/3558200/2705757 for how HTML5
153+
# treats self-closing marks.
154+
+ rb'|[^>]++'
155155
rb')*+(?<attr_insert>)')
156156
ATTRS_MATCH = regex_compile(
157157
# Leading space is not required at the start of the attribute string.

wikitextparser/_tag.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,9 @@
3232
<(?<name>[A-Za-z0-9]++)''' + ATTRS_PATTERN + rb'''
3333
[''' + SPACE_CHARS + rb''']*+
3434
(?>
35-
(?<self_closing>/\s*>)
36-
|>(?<contents>.*)''' + END_TAG_PATTERN.replace(
35+
>(?<contents>.*)''' + END_TAG_PATTERN.replace(
3736
rb'{name}', rb'(?<end_name>[A-Za-z0-9]++)') + # noqa
38-
rb'''|> # only start; no end tag
37+
rb'''|> # only start; no end tag; could be self-closing
3938
)''', DOTALL | VERBOSE).fullmatch
4039

4140

@@ -182,8 +181,7 @@ def contents(self, contents: str) -> None:
182181
self[start:end] = contents
183182
else:
184183
# This is a self-closing tag.
185-
s, e = match.span('self_closing')
186-
self[s:e] = '>{0}</{1}>'.format(contents, match['name'].decode())
184+
self[-1:] = f'>{contents}</{match["name"].decode()}>'
187185

188186
@property
189187
def parsed_contents(self) -> SubWikiText:

0 commit comments

Comments
 (0)