Skip to content

Commit 2abff64

Browse files
authored
Add pretty print debug (#227)
1 parent 30b6089 commit 2abff64

4 files changed

Lines changed: 201 additions & 1 deletion

File tree

docs/src/markdown/about/changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
ignore empty ones. As the scraping environment is different that a browser environment, it was chosen not to
99
aggressively forgive bad syntax and invalid features to ensure the user is alerted that their program may not perform
1010
as expected.
11+
- **NEW**: Add support to output a pretty print format of a compiled `SelectorList` for debug purposes.
1112

1213
## 2.2.1
1314

docs/src/markdown/about/development.md

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,63 @@ object that may chain other `SelectorLists` objects depending on the complexity
193193
a selector list, then you will get multiple `Selector` objects (one for each compound selector in the list) which in
194194
turn may chain other `Selector` objects.
195195

196+
To view the selector list in in a compiled object for debugging purposes, one can access it via `SoupSieve.selectors`,
197+
though it is recommended to pretty print them:
198+
199+
```pycon3
200+
>>> import soupsieve as sv
201+
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
202+
SelectorList(
203+
selectors=(
204+
Selector(
205+
tag=SelectorTag(
206+
name='that',
207+
prefix=None),
208+
ids=(),
209+
classes=(
210+
'class',
211+
),
212+
attributes=(
213+
SelectorAttribute(
214+
attribute='name',
215+
prefix='',
216+
pattern=re.compile(
217+
'^value$'),
218+
xml_type_pattern=None),
219+
),
220+
nth=(),
221+
selectors=(),
222+
relation=SelectorList(
223+
selectors=(
224+
Selector(
225+
tag=SelectorTag(
226+
name='this',
227+
prefix=None),
228+
ids=(),
229+
classes=(),
230+
attributes=(),
231+
nth=(),
232+
selectors=(),
233+
relation=SelectorList(
234+
selectors=(),
235+
is_not=False,
236+
is_html=False),
237+
rel_type='>',
238+
contains=(),
239+
lang=(),
240+
flags=0),
241+
),
242+
is_not=False,
243+
is_html=False),
244+
rel_type=None,
245+
contains=(),
246+
lang=(),
247+
flags=0),
248+
),
249+
is_not=False,
250+
is_html=False)
251+
```
252+
196253
### `SelectorList`
197254

198255
```py3

soupsieve/css_types.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""CSS selector structure items."""
22
import copyreg
33
from collections.abc import Hashable, Mapping
4+
from .pretty import pretty
45

56
__all__ = (
67
'Selector',
@@ -80,11 +81,16 @@ def __repr__(self): # pragma: no cover
8081
"""Representation."""
8182

8283
return "{}({})".format(
83-
self.__base__(), ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
84+
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
8485
)
8586

8687
__str__ = __repr__
8788

89+
def pretty(self): # pragma: no cover
90+
"""Pretty print."""
91+
92+
print(pretty(self))
93+
8894

8995
class ImmutableDict(Mapping):
9096
"""Hashable, immutable dictionary."""

soupsieve/pretty.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
"""
2+
Format a pretty string of a `SoupSieve` object for easy debugging.
3+
4+
This won't necessarily support all types and such, and definitely
5+
not support custom outputs.
6+
7+
It is mainly geared towards our types as the `SelectorList`
8+
object is a beast to look at without some indentation and newlines.
9+
The format and various output types is fairly known (though it
10+
hasn't been tested extensively to make sure we aren't missing corners).
11+
12+
Example:
13+
14+
```
15+
>>> import soupsieve as sv
16+
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
17+
SelectorList(
18+
selectors=(
19+
Selector(
20+
tag=SelectorTag(
21+
name='that',
22+
prefix=None),
23+
ids=(),
24+
classes=(
25+
'class',
26+
),
27+
attributes=(
28+
SelectorAttribute(
29+
attribute='name',
30+
prefix='',
31+
pattern=re.compile(
32+
'^value$'),
33+
xml_type_pattern=None),
34+
),
35+
nth=(),
36+
selectors=(),
37+
relation=SelectorList(
38+
selectors=(
39+
Selector(
40+
tag=SelectorTag(
41+
name='this',
42+
prefix=None),
43+
ids=(),
44+
classes=(),
45+
attributes=(),
46+
nth=(),
47+
selectors=(),
48+
relation=SelectorList(
49+
selectors=(),
50+
is_not=False,
51+
is_html=False),
52+
rel_type='>',
53+
contains=(),
54+
lang=(),
55+
flags=0),
56+
),
57+
is_not=False,
58+
is_html=False),
59+
rel_type=None,
60+
contains=(),
61+
lang=(),
62+
flags=0),
63+
),
64+
is_not=False,
65+
is_html=False)
66+
```
67+
"""
68+
import re
69+
70+
RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
71+
RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
72+
RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
73+
RE_LSTRT = re.compile(r'\[')
74+
RE_DSTRT = re.compile(r'\{')
75+
RE_TSTRT = re.compile(r'\(')
76+
RE_LEND = re.compile(r'\]')
77+
RE_DEND = re.compile(r'\}')
78+
RE_TEND = re.compile(r'\)')
79+
RE_INT = re.compile(r'\d+')
80+
RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
81+
RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
82+
RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
83+
RE_SEP = re.compile(r'\s*(,)\s*')
84+
RE_DSEP = re.compile(r'\s*(:)\s*')
85+
86+
TOKENS = {
87+
'class': RE_CLASS,
88+
'param': RE_PARAM,
89+
'empty': RE_EMPTY,
90+
'lstrt': RE_LSTRT,
91+
'dstrt': RE_DSTRT,
92+
'tstrt': RE_TSTRT,
93+
'lend': RE_LEND,
94+
'dend': RE_DEND,
95+
'tend': RE_TEND,
96+
'sqstr': RE_SQSTR,
97+
'sep': RE_SEP,
98+
'dsep': RE_DSEP,
99+
'int': RE_INT,
100+
'kword': RE_KWORD,
101+
'dqstr': RE_DQSTR
102+
}
103+
104+
105+
def pretty(obj): # pragma: no cover
106+
"""Make the object output string pretty."""
107+
108+
sel = str(obj)
109+
index = 0
110+
end = len(sel) - 1
111+
indent = 0
112+
output = []
113+
114+
while index <= end:
115+
m = None
116+
for k, v in TOKENS.items():
117+
m = v.match(sel, index)
118+
119+
if m:
120+
name = k
121+
index = m.end(0)
122+
if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
123+
indent += 4
124+
output.append('{}\n{}'.format(m.group(0), " " * indent))
125+
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
126+
output.append(m.group(0))
127+
elif name in ('lend', 'dend', 'tend'):
128+
indent -= 4
129+
output.append(m.group(0))
130+
elif name in ('sep',):
131+
output.append('{}\n{}'.format(m.group(1), " " * indent))
132+
elif name in ('dsep',):
133+
output.append('{} '.format(m.group(1)))
134+
break
135+
136+
return ''.join(output)

0 commit comments

Comments
 (0)