|
| 1 | +import base64 |
| 2 | +import gzip |
1 | 3 | import unittest |
2 | 4 | from lxml.tests.common_imports import make_doctest |
3 | 5 |
|
@@ -123,6 +125,132 @@ def test_sneaky_js_in_math_style(self): |
123 | 125 | b'<math><style>/* deleted */</style></math>', |
124 | 126 | lxml.html.tostring(clean_html(s))) |
125 | 127 |
|
| 128 | + def test_sneaky_import_in_style(self): |
| 129 | + # Prevent "@@importimport" -> "@import" replacement etc. |
| 130 | + style_codes = [ |
| 131 | + "@@importimport(extstyle.css)", |
| 132 | + "@ @ import import(extstyle.css)", |
| 133 | + "@ @ importimport(extstyle.css)", |
| 134 | + "@@ import import(extstyle.css)", |
| 135 | + "@ @import import(extstyle.css)", |
| 136 | + "@@importimport()", |
| 137 | + "@@importimport() ()", |
| 138 | + "@/* ... */import()", |
| 139 | + "@im/* ... */port()", |
| 140 | + "@ @import/* ... */import()", |
| 141 | + "@ /* ... */ import()", |
| 142 | + ] |
| 143 | + for style_code in style_codes: |
| 144 | + html = '<style>%s</style>' % style_code |
| 145 | + s = lxml.html.fragment_fromstring(html) |
| 146 | + |
| 147 | + cleaned = lxml.html.tostring(clean_html(s)) |
| 148 | + self.assertEqual( |
| 149 | + b'<style>/* deleted */</style>', |
| 150 | + cleaned, |
| 151 | + "%s -> %s" % (style_code, cleaned)) |
| 152 | + |
| 153 | + def test_sneaky_schemes_in_style(self): |
| 154 | + style_codes = [ |
| 155 | + "javasjavascript:cript:", |
| 156 | + "javascriptjavascript::", |
| 157 | + "javascriptjavascript:: :", |
| 158 | + "vbjavascript:cript:", |
| 159 | + ] |
| 160 | + for style_code in style_codes: |
| 161 | + html = '<style>%s</style>' % style_code |
| 162 | + s = lxml.html.fragment_fromstring(html) |
| 163 | + |
| 164 | + cleaned = lxml.html.tostring(clean_html(s)) |
| 165 | + self.assertEqual( |
| 166 | + b'<style>/* deleted */</style>', |
| 167 | + cleaned, |
| 168 | + "%s -> %s" % (style_code, cleaned)) |
| 169 | + |
| 170 | + def test_sneaky_urls_in_style(self): |
| 171 | + style_codes = [ |
| 172 | + "url(data:image/svg+xml;base64,...)", |
| 173 | + "url(javasjavascript:cript:)", |
| 174 | + "url(javasjavascript:cript: ::)", |
| 175 | + "url(vbjavascript:cript:)", |
| 176 | + "url(vbjavascript:cript: :)", |
| 177 | + ] |
| 178 | + for style_code in style_codes: |
| 179 | + html = '<style>%s</style>' % style_code |
| 180 | + s = lxml.html.fragment_fromstring(html) |
| 181 | + |
| 182 | + cleaned = lxml.html.tostring(clean_html(s)) |
| 183 | + self.assertEqual( |
| 184 | + b'<style>url()</style>', |
| 185 | + cleaned, |
| 186 | + "%s -> %s" % (style_code, cleaned)) |
| 187 | + |
| 188 | + def test_svg_data_links(self): |
| 189 | + # Remove SVG images with potentially insecure content. |
| 190 | + svg = b'<svg onload="alert(123)" />' |
| 191 | + svgz = gzip.compress(svg) |
| 192 | + svg_b64 = base64.b64encode(svg).decode('ASCII') |
| 193 | + svgz_b64 = base64.b64encode(svgz).decode('ASCII') |
| 194 | + urls = [ |
| 195 | + "data:image/svg+xml;base64," + svg_b64, |
| 196 | + "data:image/svg+xml-compressed;base64," + svgz_b64, |
| 197 | + ] |
| 198 | + for url in urls: |
| 199 | + html = '<img src="%s">' % url |
| 200 | + s = lxml.html.fragment_fromstring(html) |
| 201 | + |
| 202 | + cleaned = lxml.html.tostring(clean_html(s)) |
| 203 | + self.assertEqual( |
| 204 | + b'<img src="">', |
| 205 | + cleaned, |
| 206 | + "%s -> %s" % (url, cleaned)) |
| 207 | + |
| 208 | + def test_image_data_links(self): |
| 209 | + data = b'123' |
| 210 | + data_b64 = base64.b64encode(data).decode('ASCII') |
| 211 | + urls = [ |
| 212 | + "data:image/jpeg;base64," + data_b64, |
| 213 | + "data:image/apng;base64," + data_b64, |
| 214 | + "data:image/png;base64," + data_b64, |
| 215 | + "data:image/gif;base64," + data_b64, |
| 216 | + "data:image/webp;base64," + data_b64, |
| 217 | + "data:image/bmp;base64," + data_b64, |
| 218 | + "data:image/tiff;base64," + data_b64, |
| 219 | + "data:image/x-icon;base64," + data_b64, |
| 220 | + ] |
| 221 | + for url in urls: |
| 222 | + html = '<img src="%s">' % url |
| 223 | + s = lxml.html.fragment_fromstring(html) |
| 224 | + |
| 225 | + cleaned = lxml.html.tostring(clean_html(s)) |
| 226 | + self.assertEqual( |
| 227 | + html.encode("UTF-8"), |
| 228 | + cleaned, |
| 229 | + "%s -> %s" % (url, cleaned)) |
| 230 | + |
| 231 | + def test_image_data_links_in_style(self): |
| 232 | + data = b'123' |
| 233 | + data_b64 = base64.b64encode(data).decode('ASCII') |
| 234 | + urls = [ |
| 235 | + "data:image/jpeg;base64," + data_b64, |
| 236 | + "data:image/apng;base64," + data_b64, |
| 237 | + "data:image/png;base64," + data_b64, |
| 238 | + "data:image/gif;base64," + data_b64, |
| 239 | + "data:image/webp;base64," + data_b64, |
| 240 | + "data:image/bmp;base64," + data_b64, |
| 241 | + "data:image/tiff;base64," + data_b64, |
| 242 | + "data:image/x-icon;base64," + data_b64, |
| 243 | + ] |
| 244 | + for url in urls: |
| 245 | + html = '<style> url(%s) </style>' % url |
| 246 | + s = lxml.html.fragment_fromstring(html) |
| 247 | + |
| 248 | + cleaned = lxml.html.tostring(clean_html(s)) |
| 249 | + self.assertEqual( |
| 250 | + html.encode("UTF-8"), |
| 251 | + cleaned, |
| 252 | + "%s -> %s" % (url, cleaned)) |
| 253 | + |
126 | 254 | def test_formaction_attribute_in_button_input(self): |
127 | 255 | # The formaction attribute overrides the form's action and should be |
128 | 256 | # treated as a malicious link attribute |
|
0 commit comments