| Home | Trees | Indices | Help |
|
|---|
|
|
1 # -*- coding: utf-8 -*-
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 from __future__ import absolute_import
11
12 from collections import OrderedDict
13 import os.path
14 import unittest
15 import copy
16 import sys
17 import re
18 import gc
19 import operator
20 import textwrap
21 import zlib
22 import gzip
23
24 from .common_imports import etree, StringIO, BytesIO, HelperTestCase
25 from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url, tmpfile
26 from .common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
27 from .common_imports import canonicalize, _str, _bytes
28
29 print("""
30 TESTED VERSION: %s""" % etree.__version__ + """
31 Python: %r""" % (sys.version_info,) + """
32 lxml.etree: %r""" % (etree.LXML_VERSION,) + """
33 libxml used: %r""" % (etree.LIBXML_VERSION,) + """
34 libxml compiled: %r""" % (etree.LIBXML_COMPILED_VERSION,) + """
35 libxslt used: %r""" % (etree.LIBXSLT_VERSION,) + """
36 libxslt compiled: %r""" % (etree.LIBXSLT_COMPILED_VERSION,) + """
37 FS encoding: %s""" % (sys.getfilesystemencoding(),) + """
38 Default encoding: %s""" % (sys.getdefaultencoding(),) + """
39 Max Unicode: %s""" % (sys.maxunicode,) + """
40 """)
41
42 try:
43 _unicode = unicode
44 except NameError:
45 # Python 3
46 _unicode = str
47
48
50 """Tests only for etree, not ElementTree"""
51 etree = etree
52
54 self.assertTrue(isinstance(etree.__version__, _unicode))
55 self.assertTrue(isinstance(etree.LXML_VERSION, tuple))
56 self.assertEqual(len(etree.LXML_VERSION), 4)
57 self.assertTrue(isinstance(etree.LXML_VERSION[0], int))
58 self.assertTrue(isinstance(etree.LXML_VERSION[1], int))
59 self.assertTrue(isinstance(etree.LXML_VERSION[2], int))
60 self.assertTrue(isinstance(etree.LXML_VERSION[3], int))
61 self.assertTrue(etree.__version__.startswith(
62 str(etree.LXML_VERSION[0])))
63
65 if hasattr(self.etree, '__pyx_capi__'):
66 # newer Pyrex compatible C-API
67 self.assertTrue(isinstance(self.etree.__pyx_capi__, dict))
68 self.assertTrue(len(self.etree.__pyx_capi__) > 0)
69 else:
70 # older C-API mechanism
71 self.assertTrue(hasattr(self.etree, '_import_c_api'))
72
74 import lxml
75 includes = lxml.get_include()
76 self.assertTrue(includes)
77 self.assertTrue(len(includes) >= 2)
78 self.assertTrue(os.path.join(os.path.dirname(lxml.__file__), 'includes') in includes,
79 includes)
80
82 Element = self.etree.Element
83 el = Element('name')
84 self.assertEqual(el.tag, 'name')
85 el = Element('{}name')
86 self.assertEqual(el.tag, 'name')
87
89 Element = self.etree.Element
90 el = Element('name')
91 self.assertRaises(ValueError, Element, '{}')
92 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
93
94 self.assertRaises(ValueError, Element, '{test}')
95 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
96
98 Element = self.etree.Element
99 self.assertRaises(ValueError, Element, 'p:name')
100 self.assertRaises(ValueError, Element, '{test}p:name')
101
102 el = Element('name')
103 self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
104
106 Element = self.etree.Element
107 self.assertRaises(ValueError, Element, "p'name")
108 self.assertRaises(ValueError, Element, 'p"name')
109
110 self.assertRaises(ValueError, Element, "{test}p'name")
111 self.assertRaises(ValueError, Element, '{test}p"name')
112
113 el = Element('name')
114 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
115 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
116
118 Element = self.etree.Element
119 self.assertRaises(ValueError, Element, ' name ')
120 self.assertRaises(ValueError, Element, 'na me')
121 self.assertRaises(ValueError, Element, '{test} name')
122
123 el = Element('name')
124 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
125
127 Element = self.etree.Element
128 SubElement = self.etree.SubElement
129
130 el = Element('name')
131 self.assertRaises(ValueError, SubElement, el, '{}')
132 self.assertRaises(ValueError, SubElement, el, '{test}')
133
135 Element = self.etree.Element
136 SubElement = self.etree.SubElement
137
138 el = Element('name')
139 self.assertRaises(ValueError, SubElement, el, 'p:name')
140 self.assertRaises(ValueError, SubElement, el, '{test}p:name')
141
143 Element = self.etree.Element
144 SubElement = self.etree.SubElement
145
146 el = Element('name')
147 self.assertRaises(ValueError, SubElement, el, "p'name")
148 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
149
150 self.assertRaises(ValueError, SubElement, el, 'p"name')
151 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
152
154 Element = self.etree.Element
155 SubElement = self.etree.SubElement
156
157 el = Element('name')
158 self.assertRaises(ValueError, SubElement, el, ' name ')
159 self.assertRaises(ValueError, SubElement, el, 'na me')
160 self.assertRaises(ValueError, SubElement, el, '{test} name')
161
163 Element = self.etree.Element
164 SubElement = self.etree.SubElement
165
166 el = Element('name')
167 self.assertRaises(ValueError, SubElement, el, 'name', {'a b c' : 'abc'})
168 self.assertRaises(ValueError, SubElement, el, 'name', {'a' : 'a\0\n'})
169 self.assertEqual(0, len(el))
170
172 QName = self.etree.QName
173 self.assertRaises(ValueError, QName, '')
174 self.assertRaises(ValueError, QName, None)
175 self.assertRaises(ValueError, QName, None, None)
176 self.assertRaises(ValueError, QName, 'test', '')
177
179 QName = self.etree.QName
180 q = QName(None, 'TAG')
181 self.assertEqual('TAG', q)
182 self.assertEqual('TAG', q.localname)
183 self.assertEqual(None, q.namespace)
184
186 QName = self.etree.QName
187 self.assertRaises(ValueError, QName, 'p:name')
188 self.assertRaises(ValueError, QName, 'test', 'p:name')
189
191 QName = self.etree.QName
192 self.assertRaises(ValueError, QName, ' name ')
193 self.assertRaises(ValueError, QName, 'na me')
194 self.assertRaises(ValueError, QName, 'test', ' name')
195
197 # ET doesn't have namespace/localname properties on QNames
198 QName = self.etree.QName
199 namespace, localname = 'http://myns', 'a'
200 qname = QName(namespace, localname)
201 self.assertEqual(namespace, qname.namespace)
202 self.assertEqual(localname, qname.localname)
203
205 # ET doesn't have namespace/localname properties on QNames
206 QName = self.etree.QName
207 qname1 = QName('http://myns', 'a')
208 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
209
210 qname2 = QName(a)
211 self.assertEqual(a.tag, qname1.text)
212 self.assertEqual(a.tag, qname1)
213 self.assertEqual(qname1.text, qname2.text)
214 self.assertEqual(qname1, qname2.text)
215 self.assertEqual(qname1.text, qname2)
216 self.assertEqual(qname1, qname2)
217
219 # ET doesn't resove QNames as text values
220 etree = self.etree
221 qname = etree.QName('http://myns', 'a')
222 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
223 a.text = qname
224
225 self.assertEqual("p:a", a.text)
226
228 etree = self.etree
229 self.assertRaises(ValueError,
230 etree.Element, "root", nsmap={'"' : 'testns'})
231 self.assertRaises(ValueError,
232 etree.Element, "root", nsmap={'&' : 'testns'})
233 self.assertRaises(ValueError,
234 etree.Element, "root", nsmap={'a:b' : 'testns'})
235
237 XML = self.etree.XML
238 tostring = self.etree.tostring
239 a = XML('<a aa="A"><b ba="B">B1</b>B2<c ca="C">C1</c>C2</a>')
240 a[0].clear(keep_tail=True)
241 self.assertEqual(_bytes('<a aa="A"><b/>B2<c ca="C">C1</c>C2</a>'), tostring(a))
242
244 # ET in Py 3.x has no "attrib.has_key()" method
245 XML = self.etree.XML
246
247 root = XML(_bytes('<foo bar="Bar" xmlns:ns="http://ns.codespeak.net/test" ns:baz="Baz" />'))
248 self.assertEqual(
249 True, root.attrib.has_key('bar'))
250 self.assertEqual(
251 False, root.attrib.has_key('baz'))
252 self.assertEqual(
253 False, root.attrib.has_key('hah'))
254 self.assertEqual(
255 True,
256 root.attrib.has_key('{http://ns.codespeak.net/test}baz'))
257
259 Element = self.etree.Element
260 root = Element("root")
261 root.set("attr", "TEST")
262 self.assertEqual("TEST", root.get("attr"))
263
265 # ElementTree accepts arbitrary attribute values
266 # lxml.etree allows only strings
267 Element = self.etree.Element
268
269 root = Element("root")
270 root.set("attr", "TEST")
271 self.assertEqual("TEST", root.get("attr"))
272 self.assertRaises(TypeError, root.set, "newattr", 5)
273
275 Element = self.etree.Element
276
277 root = Element("root")
278 root.set("attr", "TEST")
279 self.assertEqual("TEST", root.attrib["attr"])
280
281 root2 = Element("root2", root.attrib, attr2='TOAST')
282 self.assertEqual("TEST", root2.attrib["attr"])
283 self.assertEqual("TOAST", root2.attrib["attr2"])
284 self.assertEqual(None, root.attrib.get("attr2"))
285
287 Element = self.etree.Element
288
289 keys = ["attr%d" % i for i in range(12, 4, -1)]
290 values = ["TEST-%d" % i for i in range(12, 4, -1)]
291 items = list(zip(keys, values))
292
293 root = Element("root")
294 for key, value in items:
295 root.set(key, value)
296 self.assertEqual(keys, root.attrib.keys())
297 self.assertEqual(values, root.attrib.values())
298
299 attr_order = [
300 ('attr_99', 'TOAST-1'),
301 ('attr_98', 'TOAST-2'),
302 ]
303 ordered_dict_types = [OrderedDict, lambda x:x]
304 if sys.version_info >= (3, 6):
305 ordered_dict_types.append(dict)
306 else:
307 # Keyword arguments are not ordered in Py<3.6, and thus get sorted.
308 attr_order.sort()
309 attr_order += items
310 expected_keys = [attr[0] for attr in attr_order]
311 expected_values = [attr[1] for attr in attr_order]
312 expected_items = list(zip(expected_keys, expected_values))
313
314 for dict_type in ordered_dict_types:
315 root2 = Element("root2", dict_type(root.attrib),
316 attr_99='TOAST-1', attr_98='TOAST-2')
317
318 try:
319 self.assertSequenceEqual(expected_keys, root2.attrib.keys())
320 self.assertSequenceEqual(expected_values, root2.attrib.values())
321 self.assertSequenceEqual(expected_items, root2.attrib.items())
322 except AssertionError as exc:
323 exc.args = ("Order of '%s': %s" % (dict_type.__name__, exc.args[0]),) + exc.args[1:]
324 raise
325
326 self.assertEqual(keys, root.attrib.keys())
327 self.assertEqual(values, root.attrib.values())
328
330 # ElementTree accepts arbitrary attribute values
331 # lxml.etree allows only strings, or None for (html5) boolean attributes
332 Element = self.etree.Element
333 root = Element("root")
334 self.assertRaises(TypeError, root.set, "newattr", 5)
335 self.assertRaises(TypeError, root.set, "newattr", object)
336 self.assertRaises(TypeError, root.set, "newattr", None)
337 self.assertRaises(TypeError, root.set, "newattr")
338
340 XML = self.etree.XML
341 xml = _bytes('<test a="5" b="10" c="20"><x a="4" b="2"/></test>')
342
343 root = XML(xml)
344 self.etree.strip_attributes(root, 'a')
345 self.assertEqual(_bytes('<test b="10" c="20"><x b="2"></x></test>'),
346 self._writeElement(root))
347
348 root = XML(xml)
349 self.etree.strip_attributes(root, 'b', 'c')
350 self.assertEqual(_bytes('<test a="5"><x a="4"></x></test>'),
351 self._writeElement(root))
352
354 XML = self.etree.XML
355 xml = _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20" n:a="5"><x a="4" n:b="2"/></test>')
356
357 root = XML(xml)
358 self.etree.strip_attributes(root, 'a')
359 self.assertEqual(
360 _bytes('<test xmlns:n="http://test/ns" b="10" c="20" n:a="5"><x n:b="2"></x></test>'),
361 self._writeElement(root))
362
363 root = XML(xml)
364 self.etree.strip_attributes(root, '{http://test/ns}a', 'c')
365 self.assertEqual(
366 _bytes('<test xmlns:n="http://test/ns" a="6" b="10"><x a="4" n:b="2"></x></test>'),
367 self._writeElement(root))
368
369 root = XML(xml)
370 self.etree.strip_attributes(root, '{http://test/ns}*')
371 self.assertEqual(
372 _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20"><x a="4"></x></test>'),
373 self._writeElement(root))
374
376 XML = self.etree.XML
377 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
378
379 root = XML(xml)
380 self.etree.strip_elements(root, 'a')
381 self.assertEqual(_bytes('<test><x></x></test>'),
382 self._writeElement(root))
383
384 root = XML(xml)
385 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
386 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
387 self._writeElement(root))
388
389 root = XML(xml)
390 self.etree.strip_elements(root, 'c')
391 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
392 self._writeElement(root))
393
395 XML = self.etree.XML
396 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
397
398 root = XML(xml)
399 self.etree.strip_elements(root, 'a')
400 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
401 self._writeElement(root))
402
403 root = XML(xml)
404 self.etree.strip_elements(root, '{urn:a}b', 'c')
405 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
406 self._writeElement(root))
407
408 root = XML(xml)
409 self.etree.strip_elements(root, '{urn:a}*', 'c')
410 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
411 self._writeElement(root))
412
413 root = XML(xml)
414 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
415 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
416 self._writeElement(root))
417
436
462
489
516
535
548
550 # lxml.etree separates target and text
551 Element = self.etree.Element
552 SubElement = self.etree.SubElement
553 ProcessingInstruction = self.etree.ProcessingInstruction
554
555 a = Element('a')
556 a.append(ProcessingInstruction('foo', 'some more text'))
557 self.assertEqual(a[0].target, 'foo')
558 self.assertEqual(a[0].text, 'some more text')
559
561 XML = self.etree.XML
562 root = XML(_bytes("<test><?mypi my test ?></test>"))
563 self.assertEqual(root[0].target, "mypi")
564 self.assertEqual(root[0].text, "my test ")
565
567 XML = self.etree.XML
568 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
569 self.assertEqual(root[0].target, "mypi")
570 self.assertEqual(root[0].get('my'), "1")
571 self.assertEqual(root[0].get('test'), " abc ")
572 self.assertEqual(root[0].get('quotes'), "' '")
573 self.assertEqual(root[0].get('only'), None)
574 self.assertEqual(root[0].get('names'), None)
575 self.assertEqual(root[0].get('nope'), None)
576
578 XML = self.etree.XML
579 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
580 self.assertEqual(root[0].target, "mypi")
581 self.assertEqual(root[0].attrib['my'], "1")
582 self.assertEqual(root[0].attrib['test'], " abc ")
583 self.assertEqual(root[0].attrib['quotes'], "' '")
584 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
585 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
586 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
587
589 # previously caused a crash
590 ProcessingInstruction = self.etree.ProcessingInstruction
591
592 a = ProcessingInstruction("PI", "ONE")
593 b = copy.deepcopy(a)
594 b.text = "ANOTHER"
595
596 self.assertEqual('ONE', a.text)
597 self.assertEqual('ANOTHER', b.text)
598
600 XML = self.etree.XML
601 tostring = self.etree.tostring
602 root = XML(_bytes("<?mypi my test ?><test/><!--comment -->"))
603 tree1 = self.etree.ElementTree(root)
604 self.assertEqual(_bytes("<?mypi my test ?><test/><!--comment -->"),
605 tostring(tree1))
606
607 tree2 = copy.deepcopy(tree1)
608 self.assertEqual(_bytes("<?mypi my test ?><test/><!--comment -->"),
609 tostring(tree2))
610
611 root2 = copy.deepcopy(tree1.getroot())
612 self.assertEqual(_bytes("<test/>"),
613 tostring(root2))
614
616 XML = self.etree.XML
617 tostring = self.etree.tostring
618 xml = _bytes('<!DOCTYPE test [\n<!ENTITY entity "tasty">\n]>\n<test/>')
619 root = XML(xml)
620 tree1 = self.etree.ElementTree(root)
621 self.assertEqual(xml, tostring(tree1))
622
623 tree2 = copy.deepcopy(tree1)
624 self.assertEqual(xml, tostring(tree2))
625
626 root2 = copy.deepcopy(tree1.getroot())
627 self.assertEqual(_bytes("<test/>"),
628 tostring(root2))
629
631 XML = self.etree.XML
632 tostring = self.etree.tostring
633 xml = _bytes('<!-- comment --><!DOCTYPE test [\n<!ENTITY entity "tasty">\n]>\n<test/>')
634 root = XML(xml)
635 tree1 = self.etree.ElementTree(root)
636 self.assertEqual(xml, tostring(tree1))
637
638 tree2 = copy.deepcopy(tree1)
639 self.assertEqual(xml, tostring(tree2))
640
642 fromstring = self.etree.fromstring
643 tostring = self.etree.tostring
644 XMLParser = self.etree.XMLParser
645
646 xml = _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
647 parser = XMLParser(remove_comments=True)
648 root = fromstring(xml, parser)
649 self.assertEqual(
650 _bytes('<a><b><c/></b></a>'),
651 tostring(root))
652
654 parse = self.etree.parse
655 tostring = self.etree.tostring
656 XMLParser = self.etree.XMLParser
657
658 xml = _bytes('<?test?><a><?A?><b><?B?><c/></b><?C?></a><?tail?>')
659
660 f = BytesIO(xml)
661 tree = parse(f)
662 self.assertEqual(
663 xml,
664 tostring(tree))
665
666 parser = XMLParser(remove_pis=True)
667 tree = parse(f, parser)
668 self.assertEqual(
669 _bytes('<a><b><c/></b></a>'),
670 tostring(tree))
671
673 # ET raises IOError only
674 parse = self.etree.parse
675 self.assertRaises(TypeError, parse, 'notthere.xml', object())
676
678 # ET removes comments
679 iterparse = self.etree.iterparse
680 tostring = self.etree.tostring
681
682 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
683 events = list(iterparse(f))
684 root = events[-1][1]
685 self.assertEqual(3, len(events))
686 self.assertEqual(
687 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
688 tostring(root))
689
691 # ET removes comments
692 iterparse = self.etree.iterparse
693 tostring = self.etree.tostring
694
695 def name(event, el):
696 if event == 'comment':
697 return el.text
698 else:
699 return el.tag
700
701 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
702 events = list(iterparse(f, events=('end', 'comment')))
703 root = events[-1][1]
704 self.assertEqual(6, len(events))
705 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
706 [ name(*item) for item in events ])
707 self.assertEqual(
708 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
709 tostring(root))
710
712 # ET removes pis
713 iterparse = self.etree.iterparse
714 tostring = self.etree.tostring
715 ElementTree = self.etree.ElementTree
716
717 def name(event, el):
718 if event == 'pi':
719 return el.target, el.text
720 else:
721 return el.tag
722
723 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
724 events = list(iterparse(f, events=('end', 'pi')))
725 root = events[-2][1]
726 self.assertEqual(8, len(events))
727 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
728 ('pid','d'), 'a', ('pie','e')],
729 [ name(*item) for item in events ])
730 self.assertEqual(
731 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
732 tostring(ElementTree(root)))
733
735 iterparse = self.etree.iterparse
736 tostring = self.etree.tostring
737
738 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
739 events = list(iterparse(f, remove_comments=True,
740 events=('end', 'comment')))
741 root = events[-1][1]
742 self.assertEqual(3, len(events))
743 self.assertEqual(['c', 'b', 'a'],
744 [ el.tag for (event, el) in events ])
745 self.assertEqual(
746 _bytes('<a><b><c/></b></a>'),
747 tostring(root))
748
750 iterparse = self.etree.iterparse
751 f = BytesIO('<a><b><c/></a>')
752 # ET raises ExpatError, lxml raises XMLSyntaxError
753 self.assertRaises(self.etree.XMLSyntaxError, list, iterparse(f))
754
756 iterparse = self.etree.iterparse
757 f = BytesIO('<a><b><c/></a>')
758 it = iterparse(f, events=('start', 'end'), recover=True)
759 events = [(ev, el.tag) for ev, el in it]
760 root = it.root
761 self.assertTrue(root is not None)
762
763 self.assertEqual(1, events.count(('start', 'a')))
764 self.assertEqual(1, events.count(('end', 'a')))
765
766 self.assertEqual(1, events.count(('start', 'b')))
767 self.assertEqual(1, events.count(('end', 'b')))
768
769 self.assertEqual(1, events.count(('start', 'c')))
770 self.assertEqual(1, events.count(('end', 'c')))
771
773 iterparse = self.etree.iterparse
774 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
775 it = iterparse(f, events=('start', 'end'), recover=True)
776 events = [(ev, el.tag) for ev, el in it]
777 root = it.root
778 self.assertTrue(root is not None)
779
780 self.assertEqual(1, events.count(('start', 'a')))
781 self.assertEqual(1, events.count(('end', 'a')))
782
783 self.assertEqual(2, events.count(('start', 'b')))
784 self.assertEqual(2, events.count(('end', 'b')))
785
786 self.assertEqual(2, events.count(('start', 'c')))
787 self.assertEqual(2, events.count(('end', 'c')))
788
790 iterparse = self.etree.iterparse
791 f = BytesIO("""
792 <a> \n \n <b> b test </b> \n
793
794 \n\t <c> \n </c> </a> \n """)
795 iterator = iterparse(f, remove_blank_text=True)
796 text = [ (element.text, element.tail)
797 for event, element in iterator ]
798 self.assertEqual(
799 [(" b test ", None), (" \n ", None), (None, None)],
800 text)
801
803 iterparse = self.etree.iterparse
804 f = BytesIO('<a><b><d/></b><c/></a>')
805
806 iterator = iterparse(f, tag="b", events=('start', 'end'))
807 events = list(iterator)
808 root = iterator.root
809 self.assertEqual(
810 [('start', root[0]), ('end', root[0])],
811 events)
812
814 iterparse = self.etree.iterparse
815 f = BytesIO('<a><b><d/></b><c/></a>')
816
817 iterator = iterparse(f, tag="*", events=('start', 'end'))
818 events = list(iterator)
819 self.assertEqual(
820 8,
821 len(events))
822
824 iterparse = self.etree.iterparse
825 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
826
827 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
828 events = list(iterator)
829 root = iterator.root
830 self.assertEqual(
831 [('start', root[0]), ('end', root[0])],
832 events)
833
835 iterparse = self.etree.iterparse
836 f = BytesIO('<a><b><d/></b><c/></a>')
837 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
838 events = list(iterator)
839 root = iterator.root
840 self.assertEqual(
841 [('start', root[0]), ('end', root[0])],
842 events)
843
844 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
845 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
846 events = list(iterator)
847 root = iterator.root
848 self.assertEqual([], events)
849
851 iterparse = self.etree.iterparse
852 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
853 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
854 events = list(iterator)
855 self.assertEqual(8, len(events))
856
858 iterparse = self.etree.iterparse
859 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
860 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
861 events = list(iterator)
862 self.assertEqual([], events)
863
864 f = BytesIO('<a><b><d/></b><c/></a>')
865 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
866 events = list(iterator)
867 self.assertEqual(8, len(events))
868
870 text = _str('Søk på nettet')
871 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
872 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
873 ).encode('iso-8859-1')
874
875 self.assertRaises(self.etree.ParseError,
876 list, self.etree.iterparse(BytesIO(xml_latin1)))
877
879 text = _str('Søk på nettet', encoding="UTF-8")
880 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
881 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
882 ).encode('iso-8859-1')
883
884 iterator = self.etree.iterparse(BytesIO(xml_latin1),
885 encoding="iso-8859-1")
886 self.assertEqual(1, len(list(iterator)))
887
888 a = iterator.root
889 self.assertEqual(a.text, text)
890
892 tostring = self.etree.tostring
893 f = BytesIO('<root><![CDATA[test]]></root>')
894 context = self.etree.iterparse(f, strip_cdata=False)
895 content = [ el.text for event,el in context ]
896
897 self.assertEqual(['test'], content)
898 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
899 tostring(context.root))
900
904
906 self.etree.XMLParser(encoding="ascii")
907 self.etree.XMLParser(encoding="utf-8")
908 self.etree.XMLParser(encoding="iso-8859-1")
909
911 parser = self.etree.XMLParser(recover=True)
912
913 parser.feed('<?xml version=')
914 parser.feed('"1.0"?><ro')
915 parser.feed('ot><')
916 parser.feed('a test="works"')
917 parser.feed('><othertag/></root') # <a> not closed!
918 parser.feed('>')
919
920 root = parser.close()
921
922 self.assertEqual(root.tag, "root")
923 self.assertEqual(len(root), 1)
924 self.assertEqual(root[0].tag, "a")
925 self.assertEqual(root[0].get("test"), "works")
926 self.assertEqual(len(root[0]), 1)
927 self.assertEqual(root[0][0].tag, "othertag")
928 # FIXME: would be nice to get some errors logged ...
929 #self.assertTrue(len(parser.error_log) > 0, "error log is empty")
930
932 # test that recover mode plays nicely with the no-id-dict setup
933 parser = self.etree.XMLParser(recover=True, collect_ids=False)
934
935 parser.feed('<?xml version=')
936 parser.feed('"1.0"?><ro')
937 parser.feed('ot xml:id="123"><')
938 parser.feed('a test="works" xml:id=')
939 parser.feed('"321"><othertag/></root') # <a> not closed!
940 parser.feed('>')
941
942 root = parser.close()
943
944 self.assertEqual(root.tag, "root")
945 self.assertEqual(len(root), 1)
946 self.assertEqual(root[0].tag, "a")
947 self.assertEqual(root[0].get("test"), "works")
948 self.assertEqual(root[0].attrib, {
949 'test': 'works',
950 '{http://www.w3.org/XML/1998/namespace}id': '321'})
951 self.assertEqual(len(root[0]), 1)
952 self.assertEqual(root[0][0].tag, "othertag")
953 # FIXME: would be nice to get some errors logged ...
954 #self.assertTrue(len(parser.error_log) > 0, "error log is empty")
955
957 assertEqual = self.assertEqual
958 assertFalse = self.assertFalse
959
960 events = []
961 class Target(object):
962 def start(self, tag, attrib):
963 events.append("start")
964 assertFalse(attrib)
965 assertEqual("TAG", tag)
966 def end(self, tag):
967 events.append("end")
968 assertEqual("TAG", tag)
969 def close(self):
970 return "DONE" # no Element!
971
972 parser = self.etree.XMLParser(target=Target())
973 tree = self.etree.ElementTree()
974
975 self.assertRaises(TypeError,
976 tree.parse, BytesIO("<TAG/>"), parser=parser)
977 self.assertEqual(["start", "end"], events)
978
980 # ET doesn't call .close() on errors
981 events = []
982 class Target(object):
983 def start(self, tag, attrib):
984 events.append("start-" + tag)
985 def end(self, tag):
986 events.append("end-" + tag)
987 if tag == 'a':
988 raise ValueError("dead and gone")
989 def data(self, data):
990 events.append("data-" + data)
991 def close(self):
992 events.append("close")
993 return "DONE"
994
995 parser = self.etree.XMLParser(target=Target())
996
997 try:
998 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
999 done = parser.close()
1000 self.fail("error expected, but parsing succeeded")
1001 except ValueError:
1002 done = 'value error received as expected'
1003
1004 self.assertEqual(["start-root", "data-A", "start-a",
1005 "data-ca", "end-a", "close"],
1006 events)
1007
1009 # ET doesn't call .close() on errors
1010 events = []
1011 class Target(object):
1012 def start(self, tag, attrib):
1013 events.append("start-" + tag)
1014 def end(self, tag):
1015 events.append("end-" + tag)
1016 if tag == 'a':
1017 raise ValueError("dead and gone")
1018 def data(self, data):
1019 events.append("data-" + data)
1020 def close(self):
1021 events.append("close")
1022 return "DONE"
1023
1024 parser = self.etree.XMLParser(target=Target())
1025
1026 try:
1027 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
1028 parser=parser)
1029 self.fail("error expected, but parsing succeeded")
1030 except ValueError:
1031 done = 'value error received as expected'
1032
1033 self.assertEqual(["start-root", "data-A", "start-a",
1034 "data-ca", "end-a", "close"],
1035 events)
1036
1038 # test that target parsing works nicely with the no-id-hash setup
1039 events = []
1040 class Target(object):
1041 def start(self, tag, attrib):
1042 events.append("start-" + tag)
1043 def end(self, tag):
1044 events.append("end-" + tag)
1045 def data(self, data):
1046 events.append("data-" + data)
1047 def comment(self, text):
1048 events.append("comment-" + text)
1049 def close(self):
1050 return "DONE"
1051
1052 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
1053
1054 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
1055 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
1056 done = parser.close()
1057
1058 self.assertEqual("DONE", done)
1059 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1060 "start-sub", "end-sub", "data-B", "end-root"],
1061 events)
1062
1064 events = []
1065 class Target(object):
1066 def start(self, tag, attrib):
1067 events.append("start-" + tag)
1068 def end(self, tag):
1069 events.append("end-" + tag)
1070 def data(self, data):
1071 events.append("data-" + data)
1072 def comment(self, text):
1073 events.append("comment-" + text)
1074 def close(self):
1075 return "DONE"
1076
1077 parser = self.etree.XMLParser(target=Target())
1078
1079 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1080 done = parser.close()
1081
1082 self.assertEqual("DONE", done)
1083 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1084 "start-sub", "end-sub", "comment-c", "data-B",
1085 "end-root", "comment-d"],
1086 events)
1087
1089 events = []
1090 class Target(object):
1091 def start(self, tag, attrib):
1092 events.append("start-" + tag)
1093 def end(self, tag):
1094 events.append("end-" + tag)
1095 def data(self, data):
1096 events.append("data-" + data)
1097 def pi(self, target, data):
1098 events.append("pi-" + target + "-" + data)
1099 def close(self):
1100 return "DONE"
1101
1102 parser = self.etree.XMLParser(target=Target())
1103
1104 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1105 done = parser.close()
1106
1107 self.assertEqual("DONE", done)
1108 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1109 "data-B", "end-root", "pi-test-c"],
1110 events)
1111
1113 events = []
1114 class Target(object):
1115 def start(self, tag, attrib):
1116 events.append("start-" + tag)
1117 def end(self, tag):
1118 events.append("end-" + tag)
1119 def data(self, data):
1120 events.append("data-" + data)
1121 def close(self):
1122 return "DONE"
1123
1124 parser = self.etree.XMLParser(target=Target(),
1125 strip_cdata=False)
1126
1127 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1128 done = parser.close()
1129
1130 self.assertEqual("DONE", done)
1131 self.assertEqual(["start-root", "data-A", "start-a",
1132 "data-ca", "end-a", "data-B", "end-root"],
1133 events)
1134
1136 events = []
1137 class Target(object):
1138 def start(self, tag, attrib):
1139 events.append("start-" + tag)
1140 def end(self, tag):
1141 events.append("end-" + tag)
1142 def data(self, data):
1143 events.append("data-" + data)
1144 def close(self):
1145 events.append("close")
1146 return "DONE"
1147
1148 parser = self.etree.XMLParser(target=Target(),
1149 recover=True)
1150
1151 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1152 done = parser.close()
1153
1154 self.assertEqual("DONE", done)
1155 self.assertEqual(["start-root", "data-A", "start-a",
1156 "data-ca", "end-a", "data-B",
1157 "end-root", "close"],
1158 events)
1159
1161 iterwalk = self.etree.iterwalk
1162 root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1163
1164 iterator = iterwalk(root, tag="b", events=('start', 'end'))
1165 events = list(iterator)
1166 self.assertEqual(
1167 [('start', root[0]), ('end', root[0])],
1168 events)
1169
1171 iterwalk = self.etree.iterwalk
1172 root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1173
1174 iterator = iterwalk(root, tag="*", events=('start', 'end'))
1175 events = list(iterator)
1176 self.assertEqual(
1177 8,
1178 len(events))
1179
1181 iterwalk = self.etree.iterwalk
1182 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1183
1184 events = list(iterwalk(root))
1185 self.assertEqual(
1186 [('end', root[0]), ('end', root[1]), ('end', root)],
1187 events)
1188
1190 iterwalk = self.etree.iterwalk
1191 root = self.etree.XML(
1192 b'<!--C0--><a><!--Ca--><b><!--Cb--></b><!--Cc--><c/></a><!--C99-->')
1193
1194 iterator = iterwalk(root, events=('start', 'end', 'comment'))
1195 events = list(iterator)
1196 self.assertEqual(
1197 [('start', root), ('comment', root[0]),
1198 ('start', root[1]), ('comment', root[1][0]), ('end', root[1]),
1199 ('comment', root[2]), ('start', root[3]), ('end', root[3]),
1200 ('end', root),
1201 ],
1202 events)
1203
1205 iterwalk = self.etree.iterwalk
1206 root = self.etree.XML(
1207 b'<!--C0--><a><!--Ca--><b><!--Cb--></b><!--Cc--><c/></a><!--C99-->')
1208
1209 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'comment'))
1210 events = list(iterator)
1211 self.assertEqual(
1212 [('comment', root.getprevious()),
1213 ('start', root), ('comment', root[0]), # <a>
1214 ('start', root[1]), ('comment', root[1][0]), ('end', root[1]), # <b>
1215 ('comment', root[2]), ('start', root[3]), ('end', root[3]), # <c>
1216 ('end', root), ('comment', root.getnext()),
1217 ],
1218 events)
1219
1221 iterwalk = self.etree.iterwalk
1222 root = self.etree.XML(
1223 b'<?C0?><a><?Ca?><b><?Cb?></b><?Cc?><c/></a><?C99?>')
1224
1225 iterator = iterwalk(root, events=('start', 'end', 'pi'))
1226 events = list(iterator)
1227 self.assertEqual(
1228 [('start', root), ('pi', root[0]),
1229 ('start', root[1]), ('pi', root[1][0]), ('end', root[1]),
1230 ('pi', root[2]), ('start', root[3]), ('end', root[3]),
1231 ('end', root),
1232 ],
1233 events)
1234
1236 iterwalk = self.etree.iterwalk
1237 root = self.etree.XML(
1238 b'<?C0?><a><?Ca?><b><?Cb?></b><?Cc?><c/></a><?C99?>')
1239
1240 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'pi'))
1241 events = list(iterator)
1242 self.assertEqual(
1243 [('pi', root.getprevious()),
1244 ('start', root), ('pi', root[0]), # <a>
1245 ('start', root[1]), ('pi', root[1][0]), ('end', root[1]), # <b>
1246 ('pi', root[2]), ('start', root[3]), ('end', root[3]), # <c>
1247 ('end', root), ('pi', root.getnext()),
1248 ],
1249 events)
1250
1252 iterwalk = self.etree.iterwalk
1253 root = self.etree.XML(
1254 b'<!--C0--><?C0?><!--C1--><a><?Ca?><b><!--Cb--></b><?Cc?><c/></a><!--C99--><?C99?>')
1255
1256 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'pi', 'comment'))
1257 events = list(iterator)
1258 self.assertEqual(
1259 [('comment', root.getprevious().getprevious().getprevious()),
1260 ('pi', root.getprevious().getprevious()),
1261 ('comment', root.getprevious()),
1262 ('start', root), ('pi', root[0]), # <a>
1263 ('start', root[1]), ('comment', root[1][0]), ('end', root[1]), # <b>
1264 ('pi', root[2]), ('start', root[3]), ('end', root[3]), # <c>
1265 ('end', root), ('comment', root.getnext()), ('pi', root.getnext().getnext()),
1266 ],
1267 events)
1268
1270 iterwalk = self.etree.iterwalk
1271 root = self.etree.XML(
1272 b'<!--C0--><?C0?><!--C1--><a><?Ca?><b><!--Cb--></b><?Cc?><c/></a><!--C99--><?C99?>')
1273
1274 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end'))
1275 events = list(iterator)
1276 self.assertEqual(
1277 [('start', root), # <a>
1278 ('start', root[1]), ('end', root[1]), # <b>
1279 ('start', root[3]), ('end', root[3]), # <c>
1280 ('end', root),
1281 ],
1282 events)
1283
1285 iterwalk = self.etree.iterwalk
1286 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1287
1288 iterator = iterwalk(root, events=('start',))
1289 events = list(iterator)
1290 self.assertEqual(
1291 [('start', root), ('start', root[0]), ('start', root[1])],
1292 events)
1293
1295 iterwalk = self.etree.iterwalk
1296 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1297
1298 iterator = iterwalk(root, events=('start','end'))
1299 events = list(iterator)
1300 self.assertEqual(
1301 [('start', root), ('start', root[0]), ('end', root[0]),
1302 ('start', root[1]), ('end', root[1]), ('end', root)],
1303 events)
1304
1314
1324
1338
1340 iterwalk = self.etree.iterwalk
1341 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1342
1343 iterator = iterwalk(root)
1344 for event, elem in iterator:
1345 elem.clear()
1346
1347 self.assertEqual(0,
1348 len(root))
1349
1351 iterwalk = self.etree.iterwalk
1352 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1353
1354 attr_name = '{testns}bla'
1355 events = []
1356 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1357 for event, elem in iterator:
1358 events.append(event)
1359 if event == 'start':
1360 if elem.tag != '{ns1}a':
1361 elem.set(attr_name, 'value')
1362
1363 self.assertEqual(
1364 ['start-ns', 'start', 'start', 'start-ns', 'start',
1365 'end', 'end-ns', 'end', 'end', 'end-ns'],
1366 events)
1367
1368 self.assertEqual(
1369 None,
1370 root.get(attr_name))
1371 self.assertEqual(
1372 'value',
1373 root[0].get(attr_name))
1374
1376 iterwalk = self.etree.iterwalk
1377 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1378
1379 iterator = iterwalk(root)
1380 tags = []
1381 for event, elem in iterator:
1382 tags.append(elem.tag)
1383 # requesting a skip after an 'end' event should never have an effect
1384 iterator.skip_subtree()
1385
1386 self.assertEqual(['c', 'b', 'e', 'd', 'a'], tags)
1387
1389 iterwalk = self.etree.iterwalk
1390 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1391
1392 iterator = iterwalk(root, events=('start', 'end'))
1393 tags = []
1394 for event, elem in iterator:
1395 tags.append((event, elem.tag))
1396 if elem.tag in ('b', 'e'):
1397 # skipping should only have an effect on 'start', not on 'end'
1398 iterator.skip_subtree()
1399
1400 self.assertEqual(
1401 [('start', 'a'),
1402 ('start', 'b'), ('end', 'b'), # ignored child 'c'
1403 ('start', 'd'),
1404 ('start', 'e'), ('end', 'e'),
1405 ('end', 'd'),
1406 ('end', 'a')],
1407 tags)
1408
1410 iterwalk = self.etree.iterwalk
1411 root = self.etree.XML(_bytes(
1412 '<a xmlns="ns1"><b xmlns="nsb"><c xmlns="ns2"/></b><d xmlns="ns2"><e/></d></a>'))
1413
1414 events = []
1415 iterator = iterwalk(root, events=('start','start-ns','end-ns'))
1416 for event, elem in iterator:
1417 if event in ('start-ns', 'end-ns'):
1418 events.append((event, elem))
1419 if event == 'start-ns' and elem == ('', 'nsb'):
1420 events.append('skip')
1421 iterator.skip_subtree()
1422 else:
1423 events.append((event, elem.tag))
1424
1425 self.assertEqual(
1426 [('start-ns', ('', 'ns1')),
1427 ('start', '{ns1}a'),
1428 ('start-ns', ('', 'nsb')),
1429 'skip',
1430 ('start', '{nsb}b'),
1431 ('end-ns', None),
1432 ('start-ns', ('', 'ns2')),
1433 ('start', '{ns2}d'),
1434 ('start', '{ns2}e'),
1435 ('end-ns', None),
1436 ('end-ns', None)
1437 ],
1438 events)
1439
1441 iterwalk = self.etree.iterwalk
1442 root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1443
1444 counts = []
1445 for event, elem in iterwalk(root):
1446 counts.append(len(list(elem.getiterator())))
1447 self.assertEqual(
1448 [1,2,1,4],
1449 counts)
1450
1452 # https://bugs.launchpad.net/lxml/+bug/1844674
1453 XML = self.etree.XML
1454 root = XML(_bytes(
1455 "<root>RTEXT<a></a>ATAIL<b/><!-- COMMENT -->CTAIL<?PI PITEXT?> PITAIL </root>"
1456 ))
1457
1458 text = list(root.itertext())
1459 self.assertEqual(["RTEXT", "ATAIL", "CTAIL", " PITAIL "],
1460 text)
1461
1463 parse = self.etree.parse
1464 parser = self.etree.XMLParser(dtd_validation=True)
1465 assertEqual = self.assertEqual
1466 test_url = _str("__nosuch.dtd")
1467
1468 class MyResolver(self.etree.Resolver):
1469 def resolve(self, url, id, context):
1470 assertEqual(url, test_url)
1471 return self.resolve_string(
1472 _str('''<!ENTITY myentity "%s">
1473 <!ELEMENT doc ANY>''') % url, context)
1474
1475 parser.resolvers.add(MyResolver())
1476
1477 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1478 tree = parse(StringIO(xml), parser)
1479 root = tree.getroot()
1480 self.assertEqual(root.text, test_url)
1481
1483 parse = self.etree.parse
1484 parser = self.etree.XMLParser(dtd_validation=True)
1485 assertEqual = self.assertEqual
1486 test_url = _str("__nosuch.dtd")
1487
1488 class MyResolver(self.etree.Resolver):
1489 def resolve(self, url, id, context):
1490 assertEqual(url, test_url)
1491 return self.resolve_string(
1492 (_str('''<!ENTITY myentity "%s">
1493 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1494 context)
1495
1496 parser.resolvers.add(MyResolver())
1497
1498 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1499 tree = parse(StringIO(xml), parser)
1500 root = tree.getroot()
1501 self.assertEqual(root.text, test_url)
1502
1504 parse = self.etree.parse
1505 parser = self.etree.XMLParser(dtd_validation=True)
1506 assertEqual = self.assertEqual
1507 test_url = _str("__nosuch.dtd")
1508
1509 class MyResolver(self.etree.Resolver):
1510 def resolve(self, url, id, context):
1511 assertEqual(url, test_url)
1512 return self.resolve_file(
1513 SillyFileLike(
1514 _str('''<!ENTITY myentity "%s">
1515 <!ELEMENT doc ANY>''') % url), context)
1516
1517 parser.resolvers.add(MyResolver())
1518
1519 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1520 tree = parse(StringIO(xml), parser)
1521 root = tree.getroot()
1522 self.assertEqual(root.text, test_url)
1523
1525 parse = self.etree.parse
1526 parser = self.etree.XMLParser(attribute_defaults=True)
1527 assertEqual = self.assertEqual
1528 test_url = _str("__nosuch.dtd")
1529
1530 class MyResolver(self.etree.Resolver):
1531 def resolve(self, url, id, context):
1532 assertEqual(url, test_url)
1533 return self.resolve_filename(
1534 fileInTestDir('test.dtd'), context)
1535
1536 parser.resolvers.add(MyResolver())
1537
1538 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1539 tree = parse(StringIO(xml), parser)
1540 root = tree.getroot()
1541 self.assertEqual(
1542 root.attrib, {'default': 'valueA'})
1543 self.assertEqual(
1544 root[0].attrib, {'default': 'valueB'})
1545
1547 parse = self.etree.parse
1548 parser = self.etree.XMLParser(attribute_defaults=True)
1549 assertEqual = self.assertEqual
1550 test_url = _str("__nosuch.dtd")
1551
1552 class MyResolver(self.etree.Resolver):
1553 def resolve(self, url, id, context):
1554 expected = fileUrlInTestDir(test_url)
1555 url = url.replace('file://', 'file:') # depends on libxml2 version
1556 expected = expected.replace('file://', 'file:')
1557 assertEqual(url, expected)
1558 return self.resolve_filename(
1559 fileUrlInTestDir('test.dtd'), context)
1560
1561 parser.resolvers.add(MyResolver())
1562
1563 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1564 tree = parse(StringIO(xml), parser,
1565 base_url=fileUrlInTestDir('__test.xml'))
1566 root = tree.getroot()
1567 self.assertEqual(
1568 root.attrib, {'default': 'valueA'})
1569 self.assertEqual(
1570 root[0].attrib, {'default': 'valueB'})
1571
1573 parse = self.etree.parse
1574 parser = self.etree.XMLParser(attribute_defaults=True)
1575 assertEqual = self.assertEqual
1576 test_url = _str("__nosuch.dtd")
1577
1578 class MyResolver(self.etree.Resolver):
1579 def resolve(self, url, id, context):
1580 assertEqual(url, test_url)
1581 return self.resolve_file(
1582 open(fileInTestDir('test.dtd'), 'rb'), context)
1583
1584 parser.resolvers.add(MyResolver())
1585
1586 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1587 tree = parse(StringIO(xml), parser)
1588 root = tree.getroot()
1589 self.assertEqual(
1590 root.attrib, {'default': 'valueA'})
1591 self.assertEqual(
1592 root[0].attrib, {'default': 'valueB'})
1593
1595 parse = self.etree.parse
1596 parser = self.etree.XMLParser(load_dtd=True)
1597 assertEqual = self.assertEqual
1598 test_url = _str("__nosuch.dtd")
1599
1600 class check(object):
1601 resolved = False
1602
1603 class MyResolver(self.etree.Resolver):
1604 def resolve(self, url, id, context):
1605 assertEqual(url, test_url)
1606 check.resolved = True
1607 return self.resolve_empty(context)
1608
1609 parser.resolvers.add(MyResolver())
1610
1611 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1612 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1613 self.assertTrue(check.resolved)
1614
1616 parse = self.etree.parse
1617 parser = self.etree.XMLParser(dtd_validation=True)
1618
1619 class _LocalException(Exception):
1620 pass
1621
1622 class MyResolver(self.etree.Resolver):
1623 def resolve(self, url, id, context):
1624 raise _LocalException
1625
1626 parser.resolvers.add(MyResolver())
1627
1628 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1629 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1630
1632 parse = self.etree.parse
1633 tostring = self.etree.tostring
1634 parser = self.etree.XMLParser(resolve_entities=False)
1635 Entity = self.etree.Entity
1636
1637 xml = _bytes('<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>')
1638 tree = parse(BytesIO(xml), parser)
1639 root = tree.getroot()
1640 self.assertEqual(root[0].tag, Entity)
1641 self.assertEqual(root[0].text, "&myentity;")
1642 self.assertEqual(root[0].tail, None)
1643 self.assertEqual(root[0].name, "myentity")
1644
1645 self.assertEqual(_bytes('<doc>&myentity;</doc>'),
1646 tostring(root))
1647
1649 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1650 <root>
1651 <child1/>
1652 <child2/>
1653 <child3> </child3>
1654 </root>''')
1655
1656 parser = self.etree.XMLParser(resolve_entities=False)
1657 root = etree.fromstring(xml, parser)
1658 self.assertEqual([ el.tag for el in root ],
1659 ['child1', 'child2', 'child3'])
1660
1661 root[0] = root[-1]
1662 self.assertEqual([ el.tag for el in root ],
1663 ['child3', 'child2'])
1664 self.assertEqual(root[0][0].text, ' ')
1665 self.assertEqual(root[0][0].name, 'nbsp')
1666
1668 Entity = self.etree.Entity
1669 Element = self.etree.Element
1670 tostring = self.etree.tostring
1671
1672 root = Element("root")
1673 root.append( Entity("test") )
1674
1675 self.assertEqual(root[0].tag, Entity)
1676 self.assertEqual(root[0].text, "&test;")
1677 self.assertEqual(root[0].tail, None)
1678 self.assertEqual(root[0].name, "test")
1679
1680 self.assertEqual(_bytes('<root>&test;</root>'),
1681 tostring(root))
1682
1684 Entity = self.etree.Entity
1685 Element = self.etree.Element
1686 parser = self.etree.XMLParser(resolve_entities=False)
1687 entity = self.etree.XML('''<!DOCTYPE data [
1688 <!ENTITY a "a">
1689 <!ENTITY b "&a;">
1690 ]>
1691 <data>&b;</data>
1692 ''', parser)
1693
1694 el = Element('test')
1695 el.append(entity)
1696 self.assertEqual(el.tag, 'test')
1697 self.assertEqual(el[0].tag, 'data')
1698 self.assertEqual(el[0][0].tag, Entity)
1699 self.assertEqual(el[0][0].name, 'b')
1700
1702 Entity = self.etree.Entity
1703 self.assertEqual(Entity("test").text, '&test;')
1704 self.assertEqual(Entity("#17683").text, '䔓')
1705 self.assertEqual(Entity("#x1768").text, 'ᝨ')
1706 self.assertEqual(Entity("#x98AF").text, '颯')
1707
1709 Entity = self.etree.Entity
1710 self.assertRaises(ValueError, Entity, 'a b c')
1711 self.assertRaises(ValueError, Entity, 'a,b')
1712 self.assertRaises(ValueError, Entity, 'a\0b')
1713 self.assertRaises(ValueError, Entity, '#abc')
1714 self.assertRaises(ValueError, Entity, '#xxyz')
1715
1717 CDATA = self.etree.CDATA
1718 Element = self.etree.Element
1719 tostring = self.etree.tostring
1720
1721 root = Element("root")
1722 root.text = CDATA('test')
1723
1724 self.assertEqual('test',
1725 root.text)
1726 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1727 tostring(root))
1728
1730 CDATA = self.etree.CDATA
1731 Element = self.etree.Element
1732 SubElement = self.etree.SubElement
1733 tostring = self.etree.tostring
1734
1735 root = Element("root")
1736 child = SubElement(root, 'child')
1737 child.tail = CDATA('test')
1738
1739 self.assertEqual('test', child.tail)
1740 self.assertEqual(_bytes('<root><child/><![CDATA[test]]></root>'),
1741 tostring(root))
1742
1743 root = Element("root")
1744 root.tail = CDATA('test')
1745
1746 self.assertEqual('test', root.tail)
1747 self.assertEqual(_bytes('<root/><![CDATA[test]]>'),
1748 tostring(root))
1749
1751 CDATA = self.etree.CDATA
1752 Element = self.etree.Element
1753 root = Element("root")
1754
1755 root.text = CDATA("test")
1756 self.assertEqual('test', root.text)
1757
1758 root.text = CDATA(_str("test"))
1759 self.assertEqual('test', root.text)
1760
1761 self.assertRaises(TypeError, CDATA, 1)
1762
1764 CDATA = self.etree.CDATA
1765 Element = self.etree.Element
1766
1767 root = Element("root")
1768 cdata = CDATA('test')
1769
1770 self.assertRaises(TypeError,
1771 root.set, 'attr', cdata)
1772 self.assertRaises(TypeError,
1773 operator.setitem, root.attrib, 'attr', cdata)
1774
1776 tostring = self.etree.tostring
1777 parser = self.etree.XMLParser(strip_cdata=False)
1778 root = self.etree.XML(_bytes('<root><![CDATA[test]]></root>'), parser)
1779
1780 self.assertEqual('test', root.text)
1781 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1782 tostring(root))
1783
1785 tostring = self.etree.tostring
1786 parser = self.etree.XMLParser(strip_cdata=False)
1787 root = self.etree.XML(_bytes('<root><![CDATA[test]]></root>'), parser)
1788 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1789 tostring(root))
1790
1791 self.assertEqual(['test'], root.xpath('//text()'))
1792
1793 # TypeError in etree, AssertionError in ElementTree;
1795 Element = self.etree.Element
1796 SubElement = self.etree.SubElement
1797
1798 a = Element('a')
1799 b = SubElement(a, 'b')
1800
1801 self.assertRaises(TypeError,
1802 a.__setitem__, 0, 'foo')
1803
1805 Element = self.etree.Element
1806 root = Element('root')
1807 # raises AssertionError in ElementTree
1808 self.assertRaises(TypeError, root.append, None)
1809 self.assertRaises(TypeError, root.extend, [None])
1810 self.assertRaises(TypeError, root.extend, [Element('one'), None])
1811 self.assertEqual('one', root[0].tag)
1812
1814 Element = self.etree.Element
1815 SubElement = self.etree.SubElement
1816 root = Element('root')
1817 self.assertRaises(ValueError, root.append, root)
1818 child = SubElement(root, 'child')
1819 self.assertRaises(ValueError, child.append, root)
1820 child2 = SubElement(child, 'child2')
1821 self.assertRaises(ValueError, child2.append, root)
1822 self.assertRaises(ValueError, child2.append, child)
1823 self.assertEqual('child2', root[0][0].tag)
1824
1826 Element = self.etree.Element
1827 SubElement = self.etree.SubElement
1828 root = Element('root')
1829 SubElement(root, 'a')
1830 SubElement(root, 'b')
1831
1832 self.assertEqual(['a', 'b'],
1833 [c.tag for c in root])
1834 root[1].addnext(root[0])
1835 self.assertEqual(['b', 'a'],
1836 [c.tag for c in root])
1837
1839 Element = self.etree.Element
1840 SubElement = self.etree.SubElement
1841 root = Element('root')
1842 SubElement(root, 'a')
1843 SubElement(root, 'b')
1844
1845 self.assertEqual(['a', 'b'],
1846 [c.tag for c in root])
1847 root[0].addprevious(root[1])
1848 self.assertEqual(['b', 'a'],
1849 [c.tag for c in root])
1850
1852 Element = self.etree.Element
1853 SubElement = self.etree.SubElement
1854 root = Element('root')
1855 a = SubElement(root, 'a')
1856 b = SubElement(a, 'b')
1857 # appending parent as sibling is forbidden
1858 self.assertRaises(ValueError, b.addnext, a)
1859 self.assertEqual(['a'], [c.tag for c in root])
1860 self.assertEqual(['b'], [c.tag for c in a])
1861
1863 Element = self.etree.Element
1864 SubElement = self.etree.SubElement
1865 root = Element('root')
1866 a = SubElement(root, 'a')
1867 b = SubElement(a, 'b')
1868 # appending parent as sibling is forbidden
1869 self.assertRaises(ValueError, b.addprevious, a)
1870 self.assertEqual(['a'], [c.tag for c in root])
1871 self.assertEqual(['b'], [c.tag for c in a])
1872
1874 Element = self.etree.Element
1875 SubElement = self.etree.SubElement
1876 root = Element('root')
1877 a = SubElement(root, 'a')
1878 b = SubElement(a, 'b')
1879 c = SubElement(b, 'c')
1880 # appending parent as sibling is forbidden
1881 self.assertRaises(ValueError, c.addnext, a)
1882
1884 Element = self.etree.Element
1885 SubElement = self.etree.SubElement
1886 root = Element('root')
1887 a = SubElement(root, 'a')
1888 b = SubElement(a, 'b')
1889 c = SubElement(b, 'c')
1890 # appending parent as sibling is forbidden
1891 self.assertRaises(ValueError, c.addprevious, a)
1892
1894 Element = self.etree.Element
1895 SubElement = self.etree.SubElement
1896 root = Element('root')
1897 a = SubElement(root, 'a')
1898 b = SubElement(root, 'b')
1899 a.addprevious(a)
1900 self.assertEqual('a', root[0].tag)
1901 self.assertEqual('b', root[1].tag)
1902 b.addprevious(b)
1903 self.assertEqual('a', root[0].tag)
1904 self.assertEqual('b', root[1].tag)
1905 b.addprevious(a)
1906 self.assertEqual('a', root[0].tag)
1907 self.assertEqual('b', root[1].tag)
1908
1910 Element = self.etree.Element
1911 SubElement = self.etree.SubElement
1912 root = Element('root')
1913 a = SubElement(root, 'a')
1914 b = SubElement(root, 'b')
1915 a.addnext(a)
1916 self.assertEqual('a', root[0].tag)
1917 self.assertEqual('b', root[1].tag)
1918 b.addnext(b)
1919 self.assertEqual('a', root[0].tag)
1920 self.assertEqual('b', root[1].tag)
1921 a.addnext(b)
1922 self.assertEqual('a', root[0].tag)
1923 self.assertEqual('b', root[1].tag)
1924
1926 Element = self.etree.Element
1927 a = Element('a')
1928 b = Element('b')
1929 self.assertRaises(TypeError, a.addnext, b)
1930
1932 Element = self.etree.Element
1933 SubElement = self.etree.SubElement
1934 PI = self.etree.PI
1935 root = Element('root')
1936 SubElement(root, 'a')
1937 pi = PI('TARGET', 'TEXT')
1938 pi.tail = "TAIL"
1939
1940 self.assertEqual(_bytes('<root><a></a></root>'),
1941 self._writeElement(root))
1942 root[0].addprevious(pi)
1943 self.assertEqual(_bytes('<root><?TARGET TEXT?>TAIL<a></a></root>'),
1944 self._writeElement(root))
1945
1947 Element = self.etree.Element
1948 PI = self.etree.PI
1949 root = Element('root')
1950 pi = PI('TARGET', 'TEXT')
1951 pi.tail = "TAIL"
1952
1953 self.assertEqual(_bytes('<root></root>'),
1954 self._writeElement(root))
1955 root.addprevious(pi)
1956 self.assertEqual(_bytes('<?TARGET TEXT?>\n<root></root>'),
1957 self._writeElement(root))
1958
1960 Element = self.etree.Element
1961 SubElement = self.etree.SubElement
1962 PI = self.etree.PI
1963 root = Element('root')
1964 SubElement(root, 'a')
1965 pi = PI('TARGET', 'TEXT')
1966 pi.tail = "TAIL"
1967
1968 self.assertEqual(_bytes('<root><a></a></root>'),
1969 self._writeElement(root))
1970 root[0].addnext(pi)
1971 self.assertEqual(_bytes('<root><a></a><?TARGET TEXT?>TAIL</root>'),
1972 self._writeElement(root))
1973
1975 Element = self.etree.Element
1976 PI = self.etree.PI
1977 root = Element('root')
1978 pi = PI('TARGET', 'TEXT')
1979 pi.tail = "TAIL"
1980
1981 self.assertEqual(_bytes('<root></root>'),
1982 self._writeElement(root))
1983 root.addnext(pi)
1984 self.assertEqual(_bytes('<root></root>\n<?TARGET TEXT?>'),
1985 self._writeElement(root))
1986
1988 Element = self.etree.Element
1989 SubElement = self.etree.SubElement
1990 Comment = self.etree.Comment
1991 root = Element('root')
1992 SubElement(root, 'a')
1993 comment = Comment('TEXT ')
1994 comment.tail = "TAIL"
1995
1996 self.assertEqual(_bytes('<root><a></a></root>'),
1997 self._writeElement(root))
1998 root[0].addnext(comment)
1999 self.assertEqual(_bytes('<root><a></a><!--TEXT -->TAIL</root>'),
2000 self._writeElement(root))
2001
2003 Element = self.etree.Element
2004 Comment = self.etree.Comment
2005 root = Element('root')
2006 comment = Comment('TEXT ')
2007 comment.tail = "TAIL"
2008
2009 self.assertEqual(_bytes('<root></root>'),
2010 self._writeElement(root))
2011 root.addnext(comment)
2012 self.assertEqual(_bytes('<root></root>\n<!--TEXT -->'),
2013 self._writeElement(root))
2014
2016 Element = self.etree.Element
2017 SubElement = self.etree.SubElement
2018 Comment = self.etree.Comment
2019 root = Element('root')
2020 SubElement(root, 'a')
2021 comment = Comment('TEXT ')
2022 comment.tail = "TAIL"
2023
2024 self.assertEqual(_bytes('<root><a></a></root>'),
2025 self._writeElement(root))
2026 root[0].addprevious(comment)
2027 self.assertEqual(_bytes('<root><!--TEXT -->TAIL<a></a></root>'),
2028 self._writeElement(root))
2029
2031 Element = self.etree.Element
2032 Comment = self.etree.Comment
2033 root = Element('root')
2034 comment = Comment('TEXT ')
2035 comment.tail = "TAIL"
2036
2037 self.assertEqual(_bytes('<root></root>'),
2038 self._writeElement(root))
2039 root.addprevious(comment)
2040 self.assertEqual(_bytes('<!--TEXT -->\n<root></root>'),
2041 self._writeElement(root))
2042
2043 # ET's Elements have items() and key(), but not values()
2045 XML = self.etree.XML
2046
2047 root = XML(_bytes('<doc alpha="Alpha" beta="Beta" gamma="Gamma"/>'))
2048 values = root.values()
2049 values.sort()
2050 self.assertEqual(['Alpha', 'Beta', 'Gamma'], values)
2051
2052 # gives error in ElementTree
2054 Element = self.etree.Element
2055 Comment = self.etree.Comment
2056
2057 a = Element('a')
2058 a.append(Comment())
2059 self.assertEqual(
2060 _bytes('<a><!----></a>'),
2061 self._writeElement(a))
2062
2063 # ElementTree ignores comments
2065 ElementTree = self.etree.ElementTree
2066 tostring = self.etree.tostring
2067
2068 xml = _bytes('<a><b/><!----><c/></a>')
2069 f = BytesIO(xml)
2070 doc = ElementTree(file=f)
2071 a = doc.getroot()
2072 self.assertEqual(
2073 '',
2074 a[1].text)
2075 self.assertEqual(
2076 xml,
2077 tostring(a))
2078
2079 # ElementTree ignores comments
2081 ElementTree = self.etree.ElementTree
2082
2083 f = BytesIO('<a><b></b><!-- hoi --><c></c></a>')
2084 doc = ElementTree(file=f)
2085 a = doc.getroot()
2086 self.assertEqual(
2087 ' hoi ',
2088 a[1].text)
2089
2090 # does not raise an exception in ElementTree
2092 Element = self.etree.Element
2093 Comment = self.etree.Comment
2094
2095 c = Comment()
2096 el = Element('myel')
2097
2098 self.assertRaises(TypeError, c.append, el)
2099 self.assertRaises(TypeError, c.insert, 0, el)
2100 self.assertRaises(TypeError, c.set, "myattr", "test")
2101
2103 c = self.etree.Comment()
2104 self.assertEqual(0, len(c.attrib))
2105
2106 self.assertFalse(c.attrib.__contains__('nope'))
2107 self.assertFalse('nope' in c.attrib)
2108 self.assertFalse('nope' in c.attrib.keys())
2109 self.assertFalse('nope' in c.attrib.values())
2110 self.assertFalse(('nope', 'huhu') in c.attrib.items())
2111
2112 self.assertEqual([], list(c.attrib))
2113 self.assertEqual([], list(c.attrib.keys()))
2114 self.assertEqual([], list(c.attrib.items()))
2115 self.assertEqual([], list(c.attrib.values()))
2116 self.assertEqual([], list(c.attrib.iterkeys()))
2117 self.assertEqual([], list(c.attrib.iteritems()))
2118 self.assertEqual([], list(c.attrib.itervalues()))
2119
2120 self.assertEqual('HUHU', c.attrib.pop('nope', 'HUHU'))
2121 self.assertRaises(KeyError, c.attrib.pop, 'nope')
2122
2123 self.assertRaises(KeyError, c.attrib.__getitem__, 'only')
2124 self.assertRaises(KeyError, c.attrib.__getitem__, 'names')
2125 self.assertRaises(KeyError, c.attrib.__getitem__, 'nope')
2126 self.assertRaises(KeyError, c.attrib.__setitem__, 'nope', 'yep')
2127 self.assertRaises(KeyError, c.attrib.__delitem__, 'nope')
2128
2129 # test passing 'None' to dump()
2132
2134 ElementTree = self.etree.ElementTree
2135
2136 f = BytesIO('<a xmlns:foo="http://www.infrae.com/ns/1"><foo:b/></a>')
2137 doc = ElementTree(file=f)
2138 a = doc.getroot()
2139 self.assertEqual(
2140 None,
2141 a.prefix)
2142 self.assertEqual(
2143 'foo',
2144 a[0].prefix)
2145
2147 ElementTree = self.etree.ElementTree
2148
2149 f = BytesIO('<a xmlns="http://www.infrae.com/ns/1"><b/></a>')
2150 doc = ElementTree(file=f)
2151 a = doc.getroot()
2152 self.assertEqual(
2153 None,
2154 a.prefix)
2155 self.assertEqual(
2156 None,
2157 a[0].prefix)
2158
2160 Element = self.etree.Element
2161 SubElement = self.etree.SubElement
2162
2163 a = Element('a')
2164 b = SubElement(a, 'b')
2165 c = SubElement(a, 'c')
2166 d = SubElement(b, 'd')
2167 self.assertEqual(
2168 None,
2169 a.getparent())
2170 self.assertEqual(
2171 a,
2172 b.getparent())
2173 self.assertEqual(
2174 b.getparent(),
2175 c.getparent())
2176 self.assertEqual(
2177 b,
2178 d.getparent())
2179
2181 XML = self.etree.XML
2182
2183 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2184 result = []
2185 for el in root.iterchildren():
2186 result.append(el.tag)
2187 self.assertEqual(['one', 'two', 'three'], result)
2188
2190 XML = self.etree.XML
2191
2192 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2193 result = []
2194 for el in root.iterchildren(reversed=True):
2195 result.append(el.tag)
2196 self.assertEqual(['three', 'two', 'one'], result)
2197
2199 XML = self.etree.XML
2200
2201 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2202 result = []
2203 for el in root.iterchildren(tag='two'):
2204 result.append(el.text)
2205 self.assertEqual(['Two', 'Bla'], result)
2206
2208 XML = self.etree.XML
2209
2210 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2211 result = []
2212 for el in root.iterchildren('two'):
2213 result.append(el.text)
2214 self.assertEqual(['Two', 'Bla'], result)
2215
2217 XML = self.etree.XML
2218
2219 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2220 result = []
2221 for el in root.iterchildren(reversed=True, tag='two'):
2222 result.append(el.text)
2223 self.assertEqual(['Bla', 'Two'], result)
2224
2226 XML = self.etree.XML
2227
2228 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2229 result = []
2230 for el in root.iterchildren(tag=['two', 'three']):
2231 result.append(el.text)
2232 self.assertEqual(['Two', 'Bla', None], result)
2233
2235 XML = self.etree.XML
2236
2237 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2238 result = []
2239 for el in root.iterchildren('two', 'three'):
2240 result.append(el.text)
2241 self.assertEqual(['Two', 'Bla', None], result)
2242
2244 XML = self.etree.XML
2245
2246 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2247 result = []
2248 for el in root.iterchildren(reversed=True, tag=['two', 'three']):
2249 result.append(el.text)
2250 self.assertEqual([None, 'Bla', 'Two'], result)
2251
2253 Element = self.etree.Element
2254 SubElement = self.etree.SubElement
2255
2256 a = Element('a')
2257 b = SubElement(a, 'b')
2258 c = SubElement(a, 'c')
2259 d = SubElement(b, 'd')
2260 self.assertEqual(
2261 [],
2262 list(a.iterancestors()))
2263 self.assertEqual(
2264 [a],
2265 list(b.iterancestors()))
2266 self.assertEqual(
2267 [a],
2268 list(c.iterancestors()))
2269 self.assertEqual(
2270 [b, a],
2271 list(d.iterancestors()))
2272
2274 Element = self.etree.Element
2275 SubElement = self.etree.SubElement
2276
2277 a = Element('a')
2278 b = SubElement(a, 'b')
2279 c = SubElement(a, 'c')
2280 d = SubElement(b, 'd')
2281 self.assertEqual(
2282 [a],
2283 list(d.iterancestors('a')))
2284 self.assertEqual(
2285 [a],
2286 list(d.iterancestors(tag='a')))
2287
2288 self.assertEqual(
2289 [b, a],
2290 list(d.iterancestors('*')))
2291 self.assertEqual(
2292 [b, a],
2293 list(d.iterancestors(tag='*')))
2294
2296 Element = self.etree.Element
2297 SubElement = self.etree.SubElement
2298
2299 a = Element('a')
2300 b = SubElement(a, 'b')
2301 c = SubElement(a, 'c')
2302 d = SubElement(b, 'd')
2303 self.assertEqual(
2304 [b, a],
2305 list(d.iterancestors(tag=('a', 'b'))))
2306 self.assertEqual(
2307 [b, a],
2308 list(d.iterancestors('a', 'b')))
2309
2310 self.assertEqual(
2311 [],
2312 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2313 self.assertEqual(
2314 [],
2315 list(d.iterancestors('w', 'x', 'y', 'z')))
2316
2317 self.assertEqual(
2318 [],
2319 list(d.iterancestors(tag=('d', 'x'))))
2320 self.assertEqual(
2321 [],
2322 list(d.iterancestors('d', 'x')))
2323
2324 self.assertEqual(
2325 [b, a],
2326 list(d.iterancestors(tag=('b', '*'))))
2327 self.assertEqual(
2328 [b, a],
2329 list(d.iterancestors('b', '*')))
2330
2331 self.assertEqual(
2332 [b],
2333 list(d.iterancestors(tag=('b', 'c'))))
2334 self.assertEqual(
2335 [b],
2336 list(d.iterancestors('b', 'c')))
2337
2339 Element = self.etree.Element
2340 SubElement = self.etree.SubElement
2341
2342 a = Element('a')
2343 b = SubElement(a, 'b')
2344 c = SubElement(a, 'c')
2345 d = SubElement(b, 'd')
2346 e = SubElement(c, 'e')
2347
2348 self.assertEqual(
2349 [b, d, c, e],
2350 list(a.iterdescendants()))
2351 self.assertEqual(
2352 [],
2353 list(d.iterdescendants()))
2354
2356 Element = self.etree.Element
2357 SubElement = self.etree.SubElement
2358
2359 a = Element('a')
2360 b = SubElement(a, 'b')
2361 c = SubElement(a, 'c')
2362 d = SubElement(b, 'd')
2363 e = SubElement(c, 'e')
2364
2365 self.assertEqual(
2366 [],
2367 list(a.iterdescendants('a')))
2368 self.assertEqual(
2369 [],
2370 list(a.iterdescendants(tag='a')))
2371
2372 a2 = SubElement(e, 'a')
2373 self.assertEqual(
2374 [a2],
2375 list(a.iterdescendants('a')))
2376
2377 self.assertEqual(
2378 [a2],
2379 list(c.iterdescendants('a')))
2380 self.assertEqual(
2381 [a2],
2382 list(c.iterdescendants(tag='a')))
2383
2385 Element = self.etree.Element
2386 SubElement = self.etree.SubElement
2387
2388 a = Element('a')
2389 b = SubElement(a, 'b')
2390 c = SubElement(a, 'c')
2391 d = SubElement(b, 'd')
2392 e = SubElement(c, 'e')
2393
2394 self.assertEqual(
2395 [b, e],
2396 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2397 self.assertEqual(
2398 [b, e],
2399 list(a.iterdescendants('a', 'b', 'e')))
2400
2401 a2 = SubElement(e, 'a')
2402 self.assertEqual(
2403 [b, a2],
2404 list(a.iterdescendants(tag=('a', 'b'))))
2405 self.assertEqual(
2406 [b, a2],
2407 list(a.iterdescendants('a', 'b')))
2408
2409 self.assertEqual(
2410 [],
2411 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2412 self.assertEqual(
2413 [],
2414 list(c.iterdescendants('x', 'y', 'z')))
2415
2416 self.assertEqual(
2417 [b, d, c, e, a2],
2418 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2419 self.assertEqual(
2420 [b, d, c, e, a2],
2421 list(a.iterdescendants('x', 'y', 'z', '*')))
2422
2424 Element = self.etree.Element
2425 SubElement = self.etree.SubElement
2426
2427 a = Element('a')
2428 b = SubElement(a, 'b')
2429 c = SubElement(a, 'c')
2430 d = SubElement(b, 'd')
2431 self.assertEqual(
2432 a,
2433 a.getroottree().getroot())
2434 self.assertEqual(
2435 a,
2436 b.getroottree().getroot())
2437 self.assertEqual(
2438 a,
2439 d.getroottree().getroot())
2440
2442 Element = self.etree.Element
2443 SubElement = self.etree.SubElement
2444
2445 a = Element('a')
2446 b = SubElement(a, 'b')
2447 c = SubElement(a, 'c')
2448 self.assertEqual(
2449 None,
2450 a.getnext())
2451 self.assertEqual(
2452 c,
2453 b.getnext())
2454 self.assertEqual(
2455 None,
2456 c.getnext())
2457
2459 Element = self.etree.Element
2460 SubElement = self.etree.SubElement
2461
2462 a = Element('a')
2463 b = SubElement(a, 'b')
2464 c = SubElement(a, 'c')
2465 d = SubElement(b, 'd')
2466 self.assertEqual(
2467 None,
2468 a.getprevious())
2469 self.assertEqual(
2470 b,
2471 c.getprevious())
2472 self.assertEqual(
2473 None,
2474 b.getprevious())
2475
2477 Element = self.etree.Element
2478 SubElement = self.etree.SubElement
2479
2480 a = Element('a')
2481 b = SubElement(a, 'b')
2482 c = SubElement(a, 'c')
2483 d = SubElement(b, 'd')
2484 self.assertEqual(
2485 [],
2486 list(a.itersiblings()))
2487 self.assertEqual(
2488 [c],
2489 list(b.itersiblings()))
2490 self.assertEqual(
2491 [],
2492 list(c.itersiblings()))
2493 self.assertEqual(
2494 [b],
2495 list(c.itersiblings(preceding=True)))
2496 self.assertEqual(
2497 [],
2498 list(b.itersiblings(preceding=True)))
2499
2501 Element = self.etree.Element
2502 SubElement = self.etree.SubElement
2503
2504 a = Element('a')
2505 b = SubElement(a, 'b')
2506 c = SubElement(a, 'c')
2507 d = SubElement(b, 'd')
2508 self.assertEqual(
2509 [],
2510 list(a.itersiblings(tag='XXX')))
2511 self.assertEqual(
2512 [c],
2513 list(b.itersiblings(tag='c')))
2514 self.assertEqual(
2515 [c],
2516 list(b.itersiblings(tag='*')))
2517 self.assertEqual(
2518 [b],
2519 list(c.itersiblings(preceding=True, tag='b')))
2520 self.assertEqual(
2521 [],
2522 list(c.itersiblings(preceding=True, tag='c')))
2523
2525 Element = self.etree.Element
2526 SubElement = self.etree.SubElement
2527
2528 a = Element('a')
2529 b = SubElement(a, 'b')
2530 c = SubElement(a, 'c')
2531 d = SubElement(b, 'd')
2532 e = SubElement(a, 'e')
2533 self.assertEqual(
2534 [],
2535 list(a.itersiblings(tag=('XXX', 'YYY'))))
2536 self.assertEqual(
2537 [c, e],
2538 list(b.itersiblings(tag=('c', 'd', 'e'))))
2539 self.assertEqual(
2540 [b],
2541 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2542 self.assertEqual(
2543 [c, b],
2544 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2545
2547 parseid = self.etree.parseid
2548 XML = self.etree.XML
2549 xml_text = _bytes('''
2550 <!DOCTYPE document [
2551 <!ELEMENT document (h1,p)*>
2552 <!ELEMENT h1 (#PCDATA)>
2553 <!ATTLIST h1 myid ID #REQUIRED>
2554 <!ELEMENT p (#PCDATA)>
2555 <!ATTLIST p someid ID #REQUIRED>
2556 ]>
2557 <document>
2558 <h1 myid="chapter1">...</h1>
2559 <p id="note1" class="note">...</p>
2560 <p>Regular paragraph.</p>
2561 <p xml:id="xmlid">XML:ID paragraph.</p>
2562 <p someid="warn1" class="warning">...</p>
2563 </document>
2564 ''')
2565
2566 tree, dic = parseid(BytesIO(xml_text))
2567 root = tree.getroot()
2568 root2 = XML(xml_text)
2569 self.assertEqual(self._writeElement(root),
2570 self._writeElement(root2))
2571 expected = {
2572 "chapter1" : root[0],
2573 "xmlid" : root[3],
2574 "warn1" : root[4]
2575 }
2576 self.assertTrue("chapter1" in dic)
2577 self.assertTrue("warn1" in dic)
2578 self.assertTrue("xmlid" in dic)
2579 self._checkIDDict(dic, expected)
2580
2582 XMLDTDID = self.etree.XMLDTDID
2583 XML = self.etree.XML
2584 xml_text = _bytes('''
2585 <!DOCTYPE document [
2586 <!ELEMENT document (h1,p)*>
2587 <!ELEMENT h1 (#PCDATA)>
2588 <!ATTLIST h1 myid ID #REQUIRED>
2589 <!ELEMENT p (#PCDATA)>
2590 <!ATTLIST p someid ID #REQUIRED>
2591 ]>
2592 <document>
2593 <h1 myid="chapter1">...</h1>
2594 <p id="note1" class="note">...</p>
2595 <p>Regular paragraph.</p>
2596 <p xml:id="xmlid">XML:ID paragraph.</p>
2597 <p someid="warn1" class="warning">...</p>
2598 </document>
2599 ''')
2600
2601 root, dic = XMLDTDID(xml_text)
2602 root2 = XML(xml_text)
2603 self.assertEqual(self._writeElement(root),
2604 self._writeElement(root2))
2605 expected = {
2606 "chapter1" : root[0],
2607 "xmlid" : root[3],
2608 "warn1" : root[4]
2609 }
2610 self.assertTrue("chapter1" in dic)
2611 self.assertTrue("warn1" in dic)
2612 self.assertTrue("xmlid" in dic)
2613 self._checkIDDict(dic, expected)
2614
2616 XMLDTDID = self.etree.XMLDTDID
2617 XML = self.etree.XML
2618 xml_text = _bytes('''
2619 <document>
2620 <h1 myid="chapter1">...</h1>
2621 <p id="note1" class="note">...</p>
2622 <p>Regular paragraph.</p>
2623 <p someid="warn1" class="warning">...</p>
2624 </document>
2625 ''')
2626
2627 root, dic = XMLDTDID(xml_text)
2628 root2 = XML(xml_text)
2629 self.assertEqual(self._writeElement(root),
2630 self._writeElement(root2))
2631 expected = {}
2632 self._checkIDDict(dic, expected)
2633
2635 XMLDTDID = self.etree.XMLDTDID
2636 XML = self.etree.XML
2637 xml_text = _bytes('''
2638 <!DOCTYPE document [
2639 <!ELEMENT document (h1,p)*>
2640 <!ELEMENT h1 (#PCDATA)>
2641 <!ATTLIST h1 myid ID #REQUIRED>
2642 <!ELEMENT p (#PCDATA)>
2643 <!ATTLIST p someid ID #REQUIRED>
2644 ]>
2645 <document>
2646 <h1 myid="chapter1">...</h1>
2647 <p id="note1" class="note">...</p>
2648 <p>Regular paragraph.</p>
2649 <p xml:id="xmlid">XML:ID paragraph.</p>
2650 <p someid="warn1" class="warning">...</p>
2651 </document>
2652 ''')
2653
2654 parser = etree.XMLParser(collect_ids=False)
2655 root, dic = XMLDTDID(xml_text, parser=parser)
2656 root2 = XML(xml_text)
2657 self.assertEqual(self._writeElement(root),
2658 self._writeElement(root2))
2659 self.assertFalse(dic)
2660 self._checkIDDict(dic, {})
2661
2663 self.assertEqual(len(dic),
2664 len(expected))
2665 self.assertEqual(sorted(dic.items()),
2666 sorted(expected.items()))
2667 if sys.version_info < (3,):
2668 self.assertEqual(sorted(dic.iteritems()),
2669 sorted(expected.iteritems()))
2670 self.assertEqual(sorted(dic.keys()),
2671 sorted(expected.keys()))
2672 if sys.version_info < (3,):
2673 self.assertEqual(sorted(dic.iterkeys()),
2674 sorted(expected.iterkeys()))
2675 if sys.version_info < (3,):
2676 self.assertEqual(sorted(dic.values()),
2677 sorted(expected.values()))
2678 self.assertEqual(sorted(dic.itervalues()),
2679 sorted(expected.itervalues()))
2680
2682 self.assertRaises(ValueError, self.etree.register_namespace,
2683 "XML", "http://www.w3.org/XML/1998/namespace")
2684 self.assertRaises(ValueError, self.etree.register_namespace,
2685 "xml", "http://www.w3.org/XML/2345")
2686 self.etree.register_namespace("xml", "http://www.w3.org/XML/1998/namespace") # ok
2687
2689 etree = self.etree
2690
2691 r = {'foo': 'http://ns.infrae.com/foo'}
2692 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2693 self.assertEqual(
2694 'foo',
2695 e.prefix)
2696 self.assertEqual(
2697 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2698 self._writeElement(e))
2699
2701 etree = self.etree
2702
2703 r = {None: 'http://ns.infrae.com/foo'}
2704 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2705 self.assertEqual(
2706 None,
2707 e.prefix)
2708 self.assertEqual(
2709 '{http://ns.infrae.com/foo}bar',
2710 e.tag)
2711 self.assertEqual(
2712 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2713 self._writeElement(e))
2714
2716 etree = self.etree
2717
2718 r = {None: 'http://ns.infrae.com/foo', 'p': 'http://test/'}
2719 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2720 self.assertEqual(None, e.prefix)
2721 self.assertEqual('{http://ns.infrae.com/foo}bar', e.tag)
2722 self.assertEqual(
2723 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>'),
2724 self._writeElement(e))
2725
2727 etree = self.etree
2728
2729 r = {None: 'http://ns.infrae.com/foo',
2730 'hoi': 'http://ns.infrae.com/hoi'}
2731 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2732 e.set('{http://ns.infrae.com/hoi}test', 'value')
2733 self.assertEqual(
2734 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2735 self._writeElement(e))
2736
2738 etree = self.etree
2739
2740 root = etree.Element('{http://test/ns}root',
2741 nsmap={None: 'http://test/ns'})
2742 sub = etree.Element('{http://test/ns}sub',
2743 nsmap={'test': 'http://test/ns'})
2744
2745 sub.attrib['{http://test/ns}attr'] = 'value'
2746 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2747 self.assertEqual(
2748 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2749 etree.tostring(sub))
2750
2751 root.append(sub)
2752 self.assertEqual(
2753 _bytes('<root xmlns="http://test/ns">'
2754 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2755 '</root>'),
2756 etree.tostring(root))
2757
2759 etree = self.etree
2760
2761 root = etree.Element('root')
2762 sub = etree.Element('{http://test/ns}sub',
2763 nsmap={'test': 'http://test/ns'})
2764
2765 sub.attrib['{http://test/ns}attr'] = 'value'
2766 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2767 self.assertEqual(
2768 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2769 etree.tostring(sub))
2770
2771 root.append(sub)
2772 self.assertEqual(
2773 _bytes('<root>'
2774 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2775 '</root>'),
2776 etree.tostring(root))
2777
2779 etree = self.etree
2780
2781 root = etree.Element('root')
2782 sub = etree.Element('{http://test/ns}sub',
2783 nsmap={None: 'http://test/ns'})
2784
2785 sub.attrib['{http://test/ns}attr'] = 'value'
2786 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2787 self.assertEqual(
2788 _bytes('<sub xmlns="http://test/ns" '
2789 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2790 etree.tostring(sub))
2791
2792 root.append(sub)
2793 self.assertEqual(
2794 _bytes('<root>'
2795 '<sub xmlns="http://test/ns"'
2796 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2797 '</root>'),
2798 etree.tostring(root))
2799
2801 etree = self.etree
2802
2803 root = etree.Element('{http://test/ns}root',
2804 nsmap={'test': 'http://test/ns',
2805 None: 'http://test/ns'})
2806 sub = etree.Element('{http://test/ns}sub',
2807 nsmap={None: 'http://test/ns'})
2808
2809 sub.attrib['{http://test/ns}attr'] = 'value'
2810 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2811 self.assertEqual(
2812 _bytes('<sub xmlns="http://test/ns" '
2813 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2814 etree.tostring(sub))
2815
2816 root.append(sub)
2817 self.assertEqual(
2818 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2819 '<test:sub test:attr="value"/>'
2820 '</test:root>'),
2821 etree.tostring(root))
2822
2824 etree = self.etree
2825 r = {None: 'http://ns.infrae.com/foo',
2826 'hoi': 'http://ns.infrae.com/hoi'}
2827 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2828 tree = etree.ElementTree(element=e)
2829 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2830 self.assertEqual(
2831 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2832 self._writeElement(e))
2833
2835 etree = self.etree
2836
2837 r = {None: 'http://ns.infrae.com/foo'}
2838 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2839 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2840
2841 e1.append(e2)
2842
2843 self.assertEqual(
2844 None,
2845 e1.prefix)
2846 self.assertEqual(
2847 None,
2848 e1[0].prefix)
2849 self.assertEqual(
2850 '{http://ns.infrae.com/foo}bar',
2851 e1.tag)
2852 self.assertEqual(
2853 '{http://ns.infrae.com/foo}bar',
2854 e1[0].tag)
2855
2857 etree = self.etree
2858
2859 r = {None: 'http://ns.infrae.com/BAR'}
2860 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2861 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2862
2863 e1.append(e2)
2864
2865 self.assertEqual(
2866 None,
2867 e1.prefix)
2868 self.assertNotEqual(
2869 None,
2870 e2.prefix)
2871 self.assertEqual(
2872 '{http://ns.infrae.com/BAR}bar',
2873 e1.tag)
2874 self.assertEqual(
2875 '{http://ns.infrae.com/foo}bar',
2876 e2.tag)
2877
2879 ns_href = "http://a.b.c"
2880 one = self.etree.fromstring(
2881 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2882 baz = one[0][0]
2883
2884 two = self.etree.fromstring(
2885 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2886 two.append(baz)
2887 del one # make sure the source document is deallocated
2888
2889 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2890 self.assertEqual(
2891 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2892 self.etree.tostring(two))
2893
2895 xml = _bytes(
2896 '<foo xmlns="F" xmlns:x="x">'
2897 '<bar xmlns:ns="NS" xmlns:b="b" xmlns="B">'
2898 '<ns:baz/>'
2899 '</bar></foo>'
2900 )
2901 root = self.etree.fromstring(xml)
2902 self.assertEqual(xml, self.etree.tostring(root))
2903 self.etree.cleanup_namespaces(root)
2904 self.assertEqual(
2905 _bytes('<foo xmlns="F"><bar xmlns:ns="NS" xmlns="B"><ns:baz/></bar></foo>'),
2906 self.etree.tostring(root))
2907
2909 xml = _bytes(
2910 '<foo xmlns="F" xmlns:x="X" xmlns:a="A">'
2911 '<bar xmlns:ns="NS" xmlns:b="b" xmlns="B">'
2912 '<ns:baz a:test="attr"/>'
2913 '</bar></foo>'
2914 )
2915 root = self.etree.fromstring(xml)
2916 self.assertEqual(xml, self.etree.tostring(root))
2917 self.etree.cleanup_namespaces(root)
2918 self.assertEqual(
2919 _bytes('<foo xmlns="F" xmlns:a="A">'
2920 '<bar xmlns:ns="NS" xmlns="B">'
2921 '<ns:baz a:test="attr"/>'
2922 '</bar></foo>'),
2923 self.etree.tostring(root))
2924
2926 xml = ('<n12:foo ' +
2927 ' '.join('xmlns:n{n}="NS{n}"'.format(n=i) for i in range(100)) +
2928 '><n68:a/></n12:foo>').encode('utf8')
2929 root = self.etree.fromstring(xml)
2930 self.assertEqual(xml, self.etree.tostring(root))
2931 self.etree.cleanup_namespaces(root)
2932 self.assertEqual(
2933 b'<n12:foo xmlns:n12="NS12" xmlns:n68="NS68"><n68:a/></n12:foo>',
2934 self.etree.tostring(root))
2935
2937 xml = ('<root>' +
2938 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2939 '<n64:x/>' + '</a>'*100 + '</root>').encode('utf8')
2940 root = self.etree.fromstring(xml)
2941 self.assertEqual(xml, self.etree.tostring(root))
2942 self.etree.cleanup_namespaces(root)
2943 self.assertEqual(
2944 b'<root>' + b'<a>'*64 + b'<a xmlns:n64="NS64">' + b'<a>'*35 +
2945 b'<n64:x/>' + b'</a>'*100 + b'</root>',
2946 self.etree.tostring(root))
2947
2949 xml = ('<root>' +
2950 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2951 '<n64:x xmlns:a="A" a:attr="X"/>' +
2952 '</a>'*100 +
2953 '</root>').encode('utf8')
2954 root = self.etree.fromstring(xml)
2955 self.assertEqual(xml, self.etree.tostring(root))
2956 self.etree.cleanup_namespaces(root, top_nsmap={'n64': 'NS64'})
2957 self.assertEqual(
2958 b'<root xmlns:n64="NS64">' + b'<a>'*100 +
2959 b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>'*100 + b'</root>',
2960 self.etree.tostring(root))
2961
2963 xml = ('<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2964 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2965 '<foo>foo:bar</foo>'
2966 '</root>').encode('utf8')
2967 root = self.etree.fromstring(xml)
2968 self.assertEqual(xml, self.etree.tostring(root))
2969 self.etree.cleanup_namespaces(root, keep_ns_prefixes=['foo'])
2970 self.assertEqual(
2971 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2972 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2973 b'<foo>foo:bar</foo>'
2974 b'</root>',
2975 self.etree.tostring(root))
2976
2978 xml = ('<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2979 '<sub xmlns:foo="FOO">'
2980 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2981 '<foo>foo:bar</foo>'
2982 '</sub>'
2983 '</root>').encode('utf8')
2984 root = self.etree.fromstring(xml)
2985 self.assertEqual(xml, self.etree.tostring(root))
2986 self.etree.cleanup_namespaces(
2987 root,
2988 top_nsmap={'foo': 'FOO', 'unused1': 'UNUSED'},
2989 keep_ns_prefixes=['foo'])
2990 self.assertEqual(
2991 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2992 b'<sub>'
2993 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2994 b'<foo>foo:bar</foo>'
2995 b'</sub>'
2996 b'</root>',
2997 self.etree.tostring(root))
2998
3000 etree = self.etree
3001
3002 r = {None: 'http://ns.infrae.com/foo',
3003 'hoi': 'http://ns.infrae.com/hoi'}
3004 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
3005 self.assertEqual(
3006 r,
3007 e.nsmap)
3008
3010 etree = self.etree
3011
3012 re = {None: 'http://ns.infrae.com/foo',
3013 'hoi': 'http://ns.infrae.com/hoi'}
3014 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
3015
3016 rs = {None: 'http://ns.infrae.com/honk',
3017 'top': 'http://ns.infrae.com/top'}
3018 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
3019
3020 r = re.copy()
3021 r.update(rs)
3022 self.assertEqual(re, e.nsmap)
3023 self.assertEqual(r, s.nsmap)
3024
3026 etree = self.etree
3027 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
3028 self.assertEqual({'hha': None}, el.nsmap)
3029
3031 Element = self.etree.Element
3032 SubElement = self.etree.SubElement
3033
3034 a = Element('a')
3035 b = SubElement(a, 'b')
3036 c = SubElement(a, 'c')
3037 d = SubElement(b, 'd')
3038 e = SubElement(c, 'e')
3039 f = SubElement(c, 'f')
3040
3041 self.assertEqual(
3042 [a, b],
3043 list(a.getiterator('a', 'b')))
3044 self.assertEqual(
3045 [],
3046 list(a.getiterator('x', 'y')))
3047 self.assertEqual(
3048 [a, f],
3049 list(a.getiterator('f', 'a')))
3050 self.assertEqual(
3051 [c, e, f],
3052 list(c.getiterator('c', '*', 'a')))
3053 self.assertEqual(
3054 [],
3055 list(a.getiterator( (), () )))
3056
3058 Element = self.etree.Element
3059 SubElement = self.etree.SubElement
3060
3061 a = Element('a')
3062 b = SubElement(a, 'b')
3063 c = SubElement(a, 'c')
3064 d = SubElement(b, 'd')
3065 e = SubElement(c, 'e')
3066 f = SubElement(c, 'f')
3067
3068 self.assertEqual(
3069 [a, b],
3070 list(a.getiterator( ('a', 'b') )))
3071 self.assertEqual(
3072 [],
3073 list(a.getiterator( ('x', 'y') )))
3074 self.assertEqual(
3075 [a, f],
3076 list(a.getiterator( ('f', 'a') )))
3077 self.assertEqual(
3078 [c, e, f],
3079 list(c.getiterator( ('c', '*', 'a') )))
3080 self.assertEqual(
3081 [],
3082 list(a.getiterator( () )))
3083
3085 Element = self.etree.Element
3086 SubElement = self.etree.SubElement
3087
3088 a = Element('{a}a')
3089 b = SubElement(a, '{a}b')
3090 c = SubElement(a, '{a}c')
3091 d = SubElement(b, '{b}d')
3092 e = SubElement(c, '{a}e')
3093 f = SubElement(c, '{b}f')
3094 g = SubElement(c, 'g')
3095
3096 self.assertEqual(
3097 [a],
3098 list(a.getiterator('{a}a')))
3099 self.assertEqual(
3100 [],
3101 list(a.getiterator('{b}a')))
3102 self.assertEqual(
3103 [],
3104 list(a.getiterator('a')))
3105 self.assertEqual(
3106 [a,b,d,c,e,f,g],
3107 list(a.getiterator('*')))
3108 self.assertEqual(
3109 [f],
3110 list(c.getiterator('{b}*')))
3111 self.assertEqual(
3112 [d, f],
3113 list(a.getiterator('{b}*')))
3114 self.assertEqual(
3115 [g],
3116 list(a.getiterator('g')))
3117 self.assertEqual(
3118 [g],
3119 list(a.getiterator('{}g')))
3120 self.assertEqual(
3121 [g],
3122 list(a.getiterator('{}*')))
3123
3125 Element = self.etree.Element
3126 Comment = self.etree.Comment
3127 SubElement = self.etree.SubElement
3128
3129 a = Element('{a}a')
3130 b = SubElement(a, '{nsA}b')
3131 c = SubElement(b, '{nsB}b')
3132 d = SubElement(a, 'b')
3133 e = SubElement(a, '{nsA}e')
3134 f = SubElement(e, '{nsB}e')
3135 g = SubElement(e, 'e')
3136 a.append(Comment('test'))
3137
3138 self.assertEqual(
3139 [b, c, d],
3140 list(a.getiterator('{*}b')))
3141 self.assertEqual(
3142 [e, f, g],
3143 list(a.getiterator('{*}e')))
3144 self.assertEqual(
3145 [a, b, c, d, e, f, g],
3146 list(a.getiterator('{*}*')))
3147
3149 Element = self.etree.Element
3150 Entity = self.etree.Entity
3151 SubElement = self.etree.SubElement
3152
3153 a = Element('a')
3154 b = SubElement(a, 'b')
3155 entity_b = Entity("TEST-b")
3156 b.append(entity_b)
3157
3158 self.assertEqual(
3159 [entity_b],
3160 list(a.getiterator(Entity)))
3161
3162 entity_a = Entity("TEST-a")
3163 a.append(entity_a)
3164
3165 self.assertEqual(
3166 [entity_b, entity_a],
3167 list(a.getiterator(Entity)))
3168
3169 self.assertEqual(
3170 [entity_b],
3171 list(b.getiterator(Entity)))
3172
3174 Element = self.etree.Element
3175 Comment = self.etree.Comment
3176 PI = self.etree.PI
3177 SubElement = self.etree.SubElement
3178
3179 a = Element('a')
3180 b = SubElement(a, 'b')
3181 a.append(Comment("test"))
3182 a.append(PI("pi", "content"))
3183 c = SubElement(a, 'c')
3184
3185 self.assertEqual(
3186 [a, b, c],
3187 list(a.getiterator(Element)))
3188
3190 # ElementTree iterates over everything here
3191 Element = self.etree.Element
3192 Comment = self.etree.Comment
3193 PI = self.etree.PI
3194 SubElement = self.etree.SubElement
3195
3196 a = Element('a')
3197 b = SubElement(a, 'b')
3198 a.append(Comment("test"))
3199 a.append(PI("pi", "content"))
3200 c = SubElement(a, 'c')
3201
3202 self.assertEqual(
3203 [a, b, c],
3204 list(a.getiterator('*')))
3205
3207 a = etree.Element("a")
3208 b = etree.SubElement(a, "b")
3209 c = etree.SubElement(a, "c")
3210 d1 = etree.SubElement(c, "d")
3211 d2 = etree.SubElement(c, "d")
3212 c.text = d1.text = 'TEXT'
3213
3214 tree = etree.ElementTree(a)
3215 self.assertEqual('.', tree.getelementpath(a))
3216 self.assertEqual('c/d[1]', tree.getelementpath(d1))
3217 self.assertEqual('c/d[2]', tree.getelementpath(d2))
3218
3219 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3220 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3221
3222 tree = etree.ElementTree(c)
3223 self.assertEqual('.', tree.getelementpath(c))
3224 self.assertEqual('d[2]', tree.getelementpath(d2))
3225 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3226
3227 tree = etree.ElementTree(b) # not a parent of a/c/d1/d2
3228 self.assertEqual('.', tree.getelementpath(b))
3229 self.assertRaises(ValueError, tree.getelementpath, a)
3230 self.assertRaises(ValueError, tree.getelementpath, c)
3231 self.assertRaises(ValueError, tree.getelementpath, d2)
3232
3234 a = etree.Element("{http://ns1/}a")
3235 b = etree.SubElement(a, "{http://ns1/}b")
3236 c = etree.SubElement(a, "{http://ns1/}c")
3237 d1 = etree.SubElement(c, "{http://ns1/}d")
3238 d2 = etree.SubElement(c, "{http://ns2/}d")
3239 d3 = etree.SubElement(c, "{http://ns1/}d")
3240
3241 tree = etree.ElementTree(a)
3242 self.assertEqual('.', tree.getelementpath(a))
3243 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
3244 tree.getelementpath(d1))
3245 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
3246 tree.getelementpath(d2))
3247 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
3248 tree.getelementpath(d3))
3249
3250 self.assertEqual(a, tree.find(tree.getelementpath(a)))
3251 self.assertEqual(b, tree.find(tree.getelementpath(b)))
3252 self.assertEqual(c, tree.find(tree.getelementpath(c)))
3253 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3254 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3255 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3256
3257 tree = etree.ElementTree(c)
3258 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
3259 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
3260 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
3261 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3262 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3263 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3264
3265 tree = etree.ElementTree(b) # not a parent of d1/d2
3266 self.assertRaises(ValueError, tree.getelementpath, d1)
3267 self.assertRaises(ValueError, tree.getelementpath, d2)
3268
3270 XML = self.etree.XML
3271 ElementTree = self.etree.ElementTree
3272 QName = self.etree.QName
3273 tree = ElementTree(XML(_bytes('<a><b><c/></b><b/><c><b/></c></a>')))
3274 self.assertEqual(tree.find(QName("c")), tree.getroot()[2])
3275
3277 XML = self.etree.XML
3278 ElementTree = self.etree.ElementTree
3279 QName = self.etree.QName
3280 tree = ElementTree(XML(_bytes('<a><b><c/></b><b/><c><b/></c></a>')))
3281 self.assertEqual(len(list(tree.findall(QName("c")))), 1)
3282
3284 XML = self.etree.XML
3285 ElementTree = self.etree.ElementTree
3286 QName = self.etree.QName
3287 tree = ElementTree(XML(
3288 _bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>')))
3289 self.assertEqual(len(list(tree.findall(QName("b")))), 2)
3290 self.assertEqual(len(list(tree.findall(QName("X", "b")))), 1)
3291
3293 XML = self.etree.XML
3294 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
3295 self.assertEqual(len(root.findall(".//{X}b")), 2)
3296 self.assertEqual(len(root.findall(".//{X}*")), 2)
3297 self.assertEqual(len(root.findall(".//b")), 3)
3298
3300 XML = self.etree.XML
3301 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3302 nsmap = {'xx': 'X'}
3303 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3304 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
3305 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3306 nsmap = {'xx': 'Y'}
3307 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3308 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
3309 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3310
3312 XML = self.etree.XML
3313 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3314 nsmap = {'xx': 'X'}
3315 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3316 nsmap = {'xx': 'X', None: 'Y'}
3317 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3318 nsmap = {'xx': 'X', '': 'Y'}
3319 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3320
3322 XML = self.etree.XML
3323 root = XML(_bytes('<a><b><c/></b><b/><c><b/><b/></c><b/></a>'))
3324 self.assertRaises(SyntaxError, root.findall, '')
3325 self.assertRaises(SyntaxError, root.findall, '//') # absolute path on Element
3326 self.assertRaises(SyntaxError, root.findall, './//')
3327
3329 etree = self.etree
3330 e = etree.Element('foo')
3331 for i in range(10):
3332 etree.SubElement(e, 'a%s' % i)
3333 for i in range(10):
3334 self.assertEqual(
3335 i,
3336 e.index(e[i]))
3337 self.assertEqual(
3338 3, e.index(e[3], 3))
3339 self.assertRaises(
3340 ValueError, e.index, e[3], 4)
3341 self.assertRaises(
3342 ValueError, e.index, e[3], 0, 2)
3343 self.assertRaises(
3344 ValueError, e.index, e[8], 0, -3)
3345 self.assertRaises(
3346 ValueError, e.index, e[8], -5, -3)
3347 self.assertEqual(
3348 8, e.index(e[8], 0, -1))
3349 self.assertEqual(
3350 8, e.index(e[8], -12, -1))
3351 self.assertEqual(
3352 0, e.index(e[0], -12, -1))
3353
3355 etree = self.etree
3356 e = etree.Element('foo')
3357 for i in range(10):
3358 el = etree.SubElement(e, 'a%s' % i)
3359 el.text = "text%d" % i
3360 el.tail = "tail%d" % i
3361
3362 child0 = e[0]
3363 child1 = e[1]
3364 child2 = e[2]
3365
3366 e.replace(e[0], e[1])
3367 self.assertEqual(
3368 9, len(e))
3369 self.assertEqual(
3370 child1, e[0])
3371 self.assertEqual(
3372 child1.text, "text1")
3373 self.assertEqual(
3374 child1.tail, "tail1")
3375 self.assertEqual(
3376 child0.tail, "tail0")
3377 self.assertEqual(
3378 child2, e[1])
3379
3380 e.replace(e[-1], e[0])
3381 self.assertEqual(
3382 child1, e[-1])
3383 self.assertEqual(
3384 child1.text, "text1")
3385 self.assertEqual(
3386 child1.tail, "tail1")
3387 self.assertEqual(
3388 child2, e[0])
3389
3391 etree = self.etree
3392 e = etree.Element('foo')
3393 for i in range(10):
3394 etree.SubElement(e, 'a%s' % i)
3395
3396 new_element = etree.Element("test")
3397 new_element.text = "TESTTEXT"
3398 new_element.tail = "TESTTAIL"
3399 child1 = e[1]
3400 e.replace(e[0], new_element)
3401 self.assertEqual(
3402 new_element, e[0])
3403 self.assertEqual(
3404 "TESTTEXT",
3405 e[0].text)
3406 self.assertEqual(
3407 "TESTTAIL",
3408 e[0].tail)
3409 self.assertEqual(
3410 child1, e[1])
3411
3413 Element = self.etree.Element
3414 SubElement = self.etree.SubElement
3415
3416 a = Element('a')
3417
3418 e = Element('e')
3419 f = Element('f')
3420 g = Element('g')
3421
3422 a[:] = [e, f, g]
3423 self.assertEqual(
3424 [e, f, g],
3425 list(a))
3426
3427 a[::-1] = [e, f, g]
3428 self.assertEqual(
3429 [g, f, e],
3430 list(a))
3431
3433 Element = self.etree.Element
3434 SubElement = self.etree.SubElement
3435
3436 a = Element('a')
3437 b = SubElement(a, 'b')
3438 c = SubElement(a, 'c')
3439 d = SubElement(a, 'd')
3440 e = SubElement(a, 'e')
3441
3442 x = Element('x')
3443 y = Element('y')
3444
3445 a[1::2] = [x, y]
3446 self.assertEqual(
3447 [b, x, d, y],
3448 list(a))
3449
3451 Element = self.etree.Element
3452 SubElement = self.etree.SubElement
3453
3454 a = Element('a')
3455 b = SubElement(a, 'b')
3456 c = SubElement(a, 'c')
3457 d = SubElement(a, 'd')
3458 e = SubElement(a, 'e')
3459
3460 x = Element('x')
3461 y = Element('y')
3462
3463 a[1::-1] = [x, y]
3464 self.assertEqual(
3465 [y, x, d, e],
3466 list(a))
3467
3469 Element = self.etree.Element
3470 SubElement = self.etree.SubElement
3471
3472 a = Element('a')
3473 b = SubElement(a, 'b')
3474 c = SubElement(a, 'c')
3475 d = SubElement(a, 'd')
3476 e = SubElement(a, 'e')
3477
3478 x = Element('x')
3479 y = Element('y')
3480
3481 a[::-2] = [x, y]
3482 self.assertEqual(
3483 [b, y, d, x],
3484 list(a))
3485
3487 Element = self.etree.Element
3488 SubElement = self.etree.SubElement
3489 try:
3490 slice
3491 except NameError:
3492 print("slice() not found")
3493 return
3494
3495 a = Element('a')
3496 b = SubElement(a, 'b')
3497 c = SubElement(a, 'c')
3498 d = SubElement(a, 'd')
3499 e = SubElement(a, 'e')
3500
3501 x = Element('x')
3502 y = Element('y')
3503 z = Element('z')
3504
3505 self.assertRaises(
3506 ValueError,
3507 operator.setitem, a, slice(1,None,2), [x, y, z])
3508
3509 self.assertEqual(
3510 [b, c, d, e],
3511 list(a))
3512
3514 XML = self.etree.XML
3515 root = XML(_bytes('''<?xml version="1.0"?>
3516 <root><test>
3517
3518 <bla/></test>
3519 </root>
3520 '''))
3521
3522 self.assertEqual(
3523 [2, 2, 4],
3524 [ el.sourceline for el in root.getiterator() ])
3525
3527 XML = self.etree.XML
3528 root = XML(_bytes(
3529 '<?xml version="1.0"?>\n'
3530 '<root>' + '\n' * 65536 +
3531 '<p>' + '\n' * 65536 + '</p>\n' +
3532 '<br/>\n'
3533 '</root>'))
3534
3535 if self.etree.LIBXML_VERSION >= (2, 9):
3536 expected = [2, 131074, 131076]
3537 else:
3538 expected = [2, 65535, 65535]
3539
3540 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3541
3543 parse = self.etree.parse
3544 tree = parse(fileInTestDir('include/test_xinclude.xml'))
3545
3546 self.assertEqual(
3547 [1, 2, 3],
3548 [ el.sourceline for el in tree.getiterator() ])
3549
3551 iterparse = self.etree.iterparse
3552 lines = [ el.sourceline for (event, el) in
3553 iterparse(fileInTestDir('include/test_xinclude.xml')) ]
3554
3555 self.assertEqual(
3556 [2, 3, 1],
3557 lines)
3558
3560 iterparse = self.etree.iterparse
3561 lines = [ el.sourceline for (event, el) in
3562 iterparse(fileInTestDir('include/test_xinclude.xml'),
3563 events=("start",)) ]
3564
3565 self.assertEqual(
3566 [1, 2, 3],
3567 lines)
3568
3570 Element = self.etree.Element
3571 SubElement = self.etree.SubElement
3572 el = Element("test")
3573 self.assertEqual(None, el.sourceline)
3574
3575 child = SubElement(el, "test")
3576 self.assertEqual(None, el.sourceline)
3577 self.assertEqual(None, child.sourceline)
3578
3580 etree = self.etree
3581 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3582 docinfo = root.getroottree().docinfo
3583 self.assertEqual(docinfo.URL, "http://no/such/url")
3584
3586 etree = self.etree
3587 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3588 docinfo = root.getroottree().docinfo
3589 self.assertEqual(docinfo.URL, "http://no/such/url")
3590 docinfo.URL = "https://secret/url"
3591 self.assertEqual(docinfo.URL, "https://secret/url")
3592
3594 etree = self.etree
3595 tree = etree.parse(BytesIO("<root/>"), base_url="http://no/such/url")
3596 docinfo = tree.docinfo
3597 self.assertEqual(docinfo.URL, "http://no/such/url")
3598
3600 etree = self.etree
3601 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3602 base_url="http://no/such/url")
3603 docinfo = tree.docinfo
3604 self.assertEqual(docinfo.URL, "http://no/such/url")
3605
3607 etree = self.etree
3608 root = etree.HTML(_bytes("<html/>"), base_url="http://no/such/url")
3609 docinfo = root.getroottree().docinfo
3610 self.assertEqual(docinfo.URL, "http://no/such/url")
3611
3613 etree = self.etree
3614 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3615 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3616 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3617 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3618
3619 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3620
3621 tree = etree.parse(BytesIO(xml))
3622 docinfo = tree.docinfo
3623 self.assertEqual(docinfo.encoding, "ascii")
3624 self.assertEqual(docinfo.xml_version, "1.0")
3625 self.assertEqual(docinfo.public_id, pub_id)
3626 self.assertEqual(docinfo.system_url, sys_id)
3627 self.assertEqual(docinfo.root_name, 'html')
3628 self.assertEqual(docinfo.doctype, doctype_string)
3629
3631 etree = self.etree
3632 xml_header = '<?xml version="1.0" encoding="UTF-8"?>'
3633 sys_id = "some.dtd"
3634 doctype_string = '<!DOCTYPE html SYSTEM "%s">' % sys_id
3635 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3636
3637 tree = etree.parse(BytesIO(xml))
3638 docinfo = tree.docinfo
3639 self.assertEqual(docinfo.encoding, "UTF-8")
3640 self.assertEqual(docinfo.xml_version, "1.0")
3641 self.assertEqual(docinfo.public_id, None)
3642 self.assertEqual(docinfo.system_url, sys_id)
3643 self.assertEqual(docinfo.root_name, 'html')
3644 self.assertEqual(docinfo.doctype, doctype_string)
3645
3647 etree = self.etree
3648 xml = _bytes('<html><body></body></html>')
3649 tree = etree.parse(BytesIO(xml))
3650 docinfo = tree.docinfo
3651 self.assertEqual(docinfo.encoding, "UTF-8")
3652 self.assertEqual(docinfo.xml_version, "1.0")
3653 self.assertEqual(docinfo.public_id, None)
3654 self.assertEqual(docinfo.system_url, None)
3655 self.assertEqual(docinfo.root_name, 'html')
3656 self.assertEqual(docinfo.doctype, '')
3657
3659 etree = self.etree
3660 xml = _bytes('<!DOCTYPE root><root></root>')
3661 tree = etree.parse(BytesIO(xml))
3662 docinfo = tree.docinfo
3663 self.assertEqual(docinfo.encoding, "UTF-8")
3664 self.assertEqual(docinfo.xml_version, "1.0")
3665 self.assertEqual(docinfo.public_id, None)
3666 self.assertEqual(docinfo.system_url, None)
3667 self.assertEqual(docinfo.root_name, 'root')
3668 self.assertEqual(docinfo.doctype, '<!DOCTYPE root>')
3669
3671 etree = self.etree
3672 xml = _bytes('<!DOCTYPE root>\n<root/>')
3673 tree = etree.parse(BytesIO(xml))
3674 self.assertEqual(xml, etree.tostring(tree))
3675
3677 etree = self.etree
3678 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3679 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3680 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3681
3682 xml = _bytes('<!DOCTYPE root>\n<root/>')
3683 tree = etree.parse(BytesIO(xml))
3684 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3685 etree.tostring(tree, doctype=doctype_string))
3686
3688 etree = self.etree
3689 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3690 self.assertEqual(root.base, "http://no/such/url")
3691 self.assertEqual(
3692 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3693 root.base = "https://secret/url"
3694 self.assertEqual(root.base, "https://secret/url")
3695 self.assertEqual(
3696 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3697 "https://secret/url")
3698
3700 etree = self.etree
3701 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3702 self.assertEqual(root.base, "http://no/such/url")
3703 self.assertEqual(
3704 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3705 root.set('{http://www.w3.org/XML/1998/namespace}base',
3706 "https://secret/url")
3707 self.assertEqual(root.base, "https://secret/url")
3708 self.assertEqual(
3709 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3710 "https://secret/url")
3711
3713 etree = self.etree
3714 root = etree.HTML(_bytes("<html><body></body></html>"),
3715 base_url="http://no/such/url")
3716 self.assertEqual(root.base, "http://no/such/url")
3717
3719 etree = self.etree
3720 root = etree.HTML(_bytes('<html><head><base href="http://no/such/url"></head></html>'))
3721 self.assertEqual(root.base, "http://no/such/url")
3722
3724 ET = self.etree
3725 elem = ET.XML("<root></root>")
3726 ET.indent(elem)
3727 self.assertEqual(ET.tostring(elem), b'<root/>')
3728
3729 elem = ET.XML("<html><body>text</body></html>")
3730 ET.indent(elem)
3731 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
3732
3733 elem = ET.XML("<html> <body>text</body> </html>")
3734 ET.indent(elem)
3735 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
3736
3737 elem = ET.XML("<html> <body>text</body> </html>")
3738 ET.indent(elem)
3739 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
3740
3741 elem = ET.XML("<html><body>text</body>tail</html>")
3742 ET.indent(elem)
3743 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')
3744
3745 elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
3746 ET.indent(elem)
3747 self.assertEqual(
3748 ET.tostring(elem),
3749 b'<html>\n'
3750 b' <body>\n'
3751 b' <p>par</p>\n'
3752 b' <p>text</p>\n'
3753 b' <p>\n'
3754 b' <br/>\n'
3755 b' </p>\n'
3756 b' </body>\n'
3757 b'</html>'
3758 )
3759
3760 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
3761 ET.indent(elem)
3762 self.assertEqual(
3763 ET.tostring(elem),
3764 b'<html>\n'
3765 b' <body>\n'
3766 b' <p>pre<br/>post</p>\n'
3767 b' <p>text</p>\n'
3768 b' </body>\n'
3769 b'</html>'
3770 )
3771
3773 ET = self.etree
3774 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
3775 ET.indent(elem, space='\t')
3776 self.assertEqual(
3777 ET.tostring(elem),
3778 b'<html>\n'
3779 b'\t<body>\n'
3780 b'\t\t<p>pre<br/>post</p>\n'
3781 b'\t\t<p>text</p>\n'
3782 b'\t</body>\n'
3783 b'</html>'
3784 )
3785
3786 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
3787 ET.indent(elem, space='')
3788 self.assertEqual(
3789 ET.tostring(elem),
3790 b'<html>\n'
3791 b'<body>\n'
3792 b'<p>pre<br/>post</p>\n'
3793 b'<p>text</p>\n'
3794 b'</body>\n'
3795 b'</html>'
3796 )
3797
3799 ET = self.etree
3800 elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
3801 ET.indent(elem)
3802 self.assertEqual(
3803 {el.tail for el in elem.iter()},
3804 {None, "\n", "\n ", "\n "}
3805 )
3806 self.assertEqual(
3807 {el.text for el in elem.iter()},
3808 {None, "\n ", "\n ", "\n ", "par", "text"}
3809 )
3810 # NOTE: lxml does not reuse Python text strings across elements.
3811 #self.assertEqual(
3812 # len({el.tail for el in elem.iter()}),
3813 # len({id(el.tail) for el in elem.iter()}),
3814 #)
3815
3817 ET = self.etree
3818 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
3819 try:
3820 ET.indent(elem, level=-1)
3821 except ValueError:
3822 pass
3823 else:
3824 self.assertTrue(False, "ValueError not raised")
3825 self.assertEqual(
3826 ET.tostring(elem),
3827 b"<html><body><p>pre<br/>post</p><p>text</p></body></html>"
3828 )
3829
3830 ET.indent(elem, level=2)
3831 self.assertEqual(
3832 ET.tostring(elem),
3833 b'<html>\n'
3834 b' <body>\n'
3835 b' <p>pre<br/>post</p>\n'
3836 b' <p>text</p>\n'
3837 b' </body>\n'
3838 b' </html>'
3839 )
3840
3841 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
3842 ET.indent(elem, level=1, space=' ')
3843 self.assertEqual(
3844 ET.tostring(elem),
3845 b'<html>\n'
3846 b' <body>\n'
3847 b' <p>pre<br/>post</p>\n'
3848 b' <p>text</p>\n'
3849 b' </body>\n'
3850 b' </html>'
3851 )
3852
3854 # parse from a file object that returns unicode strings
3855 f = LargeFileLikeUnicode()
3856 tree = self.etree.parse(f)
3857 root = tree.getroot()
3858 self.assertTrue(root.tag.endswith('root'))
3859
3861 # check that DTDs that go in also go back out
3862 xml = _bytes('''\
3863 <!DOCTYPE test SYSTEM "test.dtd" [
3864 <!ENTITY entity "tasty">
3865 <!ELEMENT test (a)>
3866 <!ELEMENT a (#PCDATA)>
3867 ]>
3868 <test><a>test-test</a></test>\
3869 ''')
3870 tree = self.etree.parse(BytesIO(xml))
3871 self.assertEqual(self.etree.tostring(tree).replace(_bytes(" "), _bytes("")),
3872 xml.replace(_bytes(" "), _bytes("")))
3873
3875 Element = self.etree.Element
3876
3877 a = Element('a')
3878 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
3879 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
3880
3881 self.assertRaises(ValueError, Element, 'ha\0ho')
3882
3884 Element = self.etree.Element
3885
3886 a = Element('a')
3887 self.assertRaises(ValueError, setattr, a, "text",
3888 _str('ha\0ho'))
3889 self.assertRaises(ValueError, setattr, a, "tail",
3890 _str('ha\0ho'))
3891
3892 self.assertRaises(ValueError, Element,
3893 _str('ha\0ho'))
3894
3896 Element = self.etree.Element
3897
3898 a = Element('a')
3899 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
3900 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
3901
3902 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
3903 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
3904
3905 self.assertRaises(ValueError, Element, 'ha\x07ho')
3906 self.assertRaises(ValueError, Element, 'ha\x02ho')
3907
3909 Element = self.etree.Element
3910
3911 a = Element('a')
3912 self.assertRaises(ValueError, setattr, a, "text",
3913 _str('ha\x07ho'))
3914 self.assertRaises(ValueError, setattr, a, "text",
3915 _str('ha\x02ho'))
3916
3917 self.assertRaises(ValueError, setattr, a, "tail",
3918 _str('ha\x07ho'))
3919 self.assertRaises(ValueError, setattr, a, "tail",
3920 _str('ha\x02ho'))
3921
3922 self.assertRaises(ValueError, Element,
3923 _str('ha\x07ho'))
3924 self.assertRaises(ValueError, Element,
3925 _str('ha\x02ho'))
3926
3928 Element = self.etree.Element
3929
3930 a = Element('a')
3931 self.assertRaises(ValueError, setattr, a, "text",
3932 _str('ha\u1234\x07ho'))
3933 self.assertRaises(ValueError, setattr, a, "text",
3934 _str('ha\u1234\x02ho'))
3935
3936 self.assertRaises(ValueError, setattr, a, "tail",
3937 _str('ha\u1234\x07ho'))
3938 self.assertRaises(ValueError, setattr, a, "tail",
3939 _str('ha\u1234\x02ho'))
3940
3941 self.assertRaises(ValueError, Element,
3942 _str('ha\u1234\x07ho'))
3943 self.assertRaises(ValueError, Element,
3944 _str('ha\u1234\x02ho'))
3945
3947 # ElementTree fails to serialize this
3948 tostring = self.etree.tostring
3949 Element = self.etree.Element
3950 SubElement = self.etree.SubElement
3951
3952 a = Element('a')
3953 b = SubElement(a, 'b')
3954 c = SubElement(a, 'c')
3955
3956 result = tostring(a, encoding='UTF-16')
3957 self.assertEqual(_bytes('<a><b></b><c></c></a>'),
3958 canonicalize(result))
3959
3961 # ElementTree raises an AssertionError here
3962 tostring = self.etree.tostring
3963 self.assertRaises(TypeError, self.etree.tostring, None)
3964
3966 tostring = self.etree.tostring
3967 Element = self.etree.Element
3968 SubElement = self.etree.SubElement
3969
3970 a = Element('a')
3971 b = SubElement(a, 'b')
3972 c = SubElement(a, 'c')
3973
3974 result = tostring(a)
3975 self.assertEqual(result, _bytes("<a><b/><c/></a>"))
3976
3977 result = tostring(a, pretty_print=False)
3978 self.assertEqual(result, _bytes("<a><b/><c/></a>"))
3979
3980 result = tostring(a, pretty_print=True)
3981 self.assertEqual(result, _bytes("<a>\n <b/>\n <c/>\n</a>\n"))
3982
3984 tostring = self.etree.tostring
3985 Element = self.etree.Element
3986 SubElement = self.etree.SubElement
3987
3988 a = Element('a')
3989 a.tail = "aTAIL"
3990 b = SubElement(a, 'b')
3991 b.tail = "bTAIL"
3992 c = SubElement(a, 'c')
3993
3994 result = tostring(a)
3995 self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
3996
3997 result = tostring(a, with_tail=False)
3998 self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>"))
3999
4000 result = tostring(a, with_tail=True)
4001 self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
4002
4004 tostring = self.etree.tostring
4005 html = self.etree.fromstring(
4006 '<html><body>'
4007 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
4008 '</body></html>',
4009 parser=self.etree.HTMLParser())
4010 self.assertEqual(html.tag, 'html')
4011 div = html.find('.//div')
4012 self.assertEqual(div.tail, '\r\n')
4013 result = tostring(div, method='html')
4014 self.assertEqual(
4015 result,
4016 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
4017 result = tostring(div, method='html', with_tail=True)
4018 self.assertEqual(
4019 result,
4020 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
4021 result = tostring(div, method='html', with_tail=False)
4022 self.assertEqual(
4023 result,
4024 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
4025
4027 tostring = self.etree.tostring
4028 XML = self.etree.XML
4029 ElementTree = self.etree.ElementTree
4030 Element = self.etree.Element
4031
4032 tree = Element("root").getroottree()
4033 self.assertEqual(None, tree.docinfo.standalone)
4034
4035 tree = XML(_bytes("<root/>")).getroottree()
4036 self.assertEqual(None, tree.docinfo.standalone)
4037
4038 tree = XML(_bytes(
4039 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"
4040 )).getroottree()
4041 self.assertEqual(True, tree.docinfo.standalone)
4042
4043 tree = XML(_bytes(
4044 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"
4045 )).getroottree()
4046 self.assertEqual(False, tree.docinfo.standalone)
4047
4049 tostring = self.etree.tostring
4050 XML = self.etree.XML
4051 ElementTree = self.etree.ElementTree
4052
4053 root = XML(_bytes("<root/>"))
4054
4055 tree = ElementTree(root)
4056 self.assertEqual(None, tree.docinfo.standalone)
4057
4058 result = tostring(root, xml_declaration=True, encoding="ASCII")
4059 self.assertEqual(result, _bytes(
4060 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
4061
4062 result = tostring(root, xml_declaration=True, encoding="ASCII",
4063 standalone=True)
4064 self.assertEqual(result, _bytes(
4065 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
4066
4067 tree = ElementTree(XML(result))
4068 self.assertEqual(True, tree.docinfo.standalone)
4069
4070 result = tostring(root, xml_declaration=True, encoding="ASCII",
4071 standalone=False)
4072 self.assertEqual(result, _bytes(
4073 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
4074
4075 tree = ElementTree(XML(result))
4076 self.assertEqual(False, tree.docinfo.standalone)
4077
4079 tostring = self.etree.tostring
4080 XML = self.etree.XML
4081 ElementTree = self.etree.ElementTree
4082
4083 root = XML(_bytes(
4084 "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n<root/>"))
4085
4086 tree = ElementTree(root)
4087 self.assertEqual(True, tree.docinfo.standalone)
4088
4089 result = tostring(root, xml_declaration=True, encoding="ASCII")
4090 self.assertEqual(result, _bytes(
4091 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
4092
4093 result = tostring(root, xml_declaration=True, encoding="ASCII",
4094 standalone=True)
4095 self.assertEqual(result, _bytes(
4096 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
4097
4099 tostring = self.etree.tostring
4100 Element = self.etree.Element
4101 SubElement = self.etree.SubElement
4102
4103 a = Element('a')
4104 a.text = "A"
4105 a.tail = "tail"
4106 b = SubElement(a, 'b')
4107 b.text = "B"
4108 b.tail = _str("Søk på nettet")
4109 c = SubElement(a, 'c')
4110 c.text = "C"
4111
4112 result = tostring(a, method="text", encoding="UTF-16")
4113
4114 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
4115 result)
4116
4118 tostring = self.etree.tostring
4119 Element = self.etree.Element
4120 SubElement = self.etree.SubElement
4121
4122 a = Element('a')
4123 a.text = _str('Søk på nettetA')
4124 a.tail = "tail"
4125 b = SubElement(a, 'b')
4126 b.text = "B"
4127 b.tail = _str('Søk på nettetB')
4128 c = SubElement(a, 'c')
4129 c.text = "C"
4130
4131 self.assertRaises(UnicodeEncodeError,
4132 tostring, a, method="text")
4133
4134 self.assertEqual(
4135 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
4136 tostring(a, encoding="UTF-8", method="text"))
4137
4139 tounicode = self.etree.tounicode
4140 Element = self.etree.Element
4141 SubElement = self.etree.SubElement
4142
4143 a = Element('a')
4144 b = SubElement(a, 'b')
4145 c = SubElement(a, 'c')
4146
4147 self.assertTrue(isinstance(tounicode(a), _unicode))
4148 self.assertEqual(_bytes('<a><b></b><c></c></a>'),
4149 canonicalize(tounicode(a)))
4150
4152 tounicode = self.etree.tounicode
4153 Element = self.etree.Element
4154 SubElement = self.etree.SubElement
4155
4156 a = Element('a')
4157 b = SubElement(a, 'b')
4158 c = SubElement(a, 'c')
4159 d = SubElement(c, 'd')
4160 self.assertTrue(isinstance(tounicode(b), _unicode))
4161 self.assertTrue(isinstance(tounicode(c), _unicode))
4162 self.assertEqual(_bytes('<b></b>'),
4163 canonicalize(tounicode(b)))
4164 self.assertEqual(_bytes('<c><d></d></c>'),
4165 canonicalize(tounicode(c)))
4166
4170
4172 tounicode = self.etree.tounicode
4173 Element = self.etree.Element
4174 SubElement = self.etree.SubElement
4175
4176 a = Element('a')
4177 b = SubElement(a, 'b')
4178 c = SubElement(a, 'c')
4179 d = SubElement(c, 'd')
4180 b.tail = 'Foo'
4181
4182 self.assertTrue(isinstance(tounicode(b), _unicode))
4183 self.assertTrue(tounicode(b) == '<b/>Foo' or
4184 tounicode(b) == '<b />Foo')
4185
4187 tounicode = self.etree.tounicode
4188 Element = self.etree.Element
4189 SubElement = self.etree.SubElement
4190
4191 a = Element('a')
4192 b = SubElement(a, 'b')
4193 c = SubElement(a, 'c')
4194
4195 result = tounicode(a)
4196 self.assertEqual(result, "<a><b/><c/></a>")
4197
4198 result = tounicode(a, pretty_print=False)
4199 self.assertEqual(result, "<a><b/><c/></a>")
4200
4201 result = tounicode(a, pretty_print=True)
4202 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
4203
4205 tostring = self.etree.tostring
4206 Element = self.etree.Element
4207 SubElement = self.etree.SubElement
4208
4209 a = Element('a')
4210 b = SubElement(a, 'b')
4211 c = SubElement(a, 'c')
4212
4213 self.assertTrue(isinstance(tostring(a, encoding=_unicode), _unicode))
4214 self.assertEqual(_bytes('<a><b></b><c></c></a>'),
4215 canonicalize(tostring(a, encoding=_unicode)))
4216
4218 tostring = self.etree.tostring
4219 Element = self.etree.Element
4220 SubElement = self.etree.SubElement
4221
4222 a = Element('a')
4223 b = SubElement(a, 'b')
4224 c = SubElement(a, 'c')
4225 d = SubElement(c, 'd')
4226 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
4227 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
4228 self.assertEqual(_bytes('<b></b>'),
4229 canonicalize(tostring(b, encoding=_unicode)))
4230 self.assertEqual(_bytes('<c><d></d></c>'),
4231 canonicalize(tostring(c, encoding=_unicode)))
4232
4234 tostring = self.etree.tostring
4235 self.assertRaises(TypeError, self.etree.tostring,
4236 None, encoding=_unicode)
4237
4239 tostring = self.etree.tostring
4240 Element = self.etree.Element
4241 SubElement = self.etree.SubElement
4242
4243 a = Element('a')
4244 b = SubElement(a, 'b')
4245 c = SubElement(a, 'c')
4246 d = SubElement(c, 'd')
4247 b.tail = 'Foo'
4248
4249 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
4250 self.assertTrue(tostring(b, encoding=_unicode) == '<b/>Foo' or
4251 tostring(b, encoding=_unicode) == '<b />Foo')
4252
4254 tostring = self.etree.tostring
4255 Element = self.etree.Element
4256 SubElement = self.etree.SubElement
4257
4258 a = Element('a')
4259 b = SubElement(a, 'b')
4260 c = SubElement(a, 'c')
4261
4262 result = tostring(a, encoding=_unicode)
4263 self.assertEqual(result, "<a><b/><c/></a>")
4264
4265 result = tostring(a, encoding=_unicode, pretty_print=False)
4266 self.assertEqual(result, "<a><b/><c/></a>")
4267
4268 result = tostring(a, encoding=_unicode, pretty_print=True)
4269 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
4270
4272 root = etree.Element('parent')
4273 etree.SubElement(root, 'child')
4274
4275 self.assertEqual(len(root), 1)
4276 self.assertEqual(root[0].tag, 'child')
4277
4278 # in PyPy, GC used to kill the Python proxy instance without cleanup
4279 gc.collect()
4280 self.assertEqual(len(root), 1)
4281 self.assertEqual(root[0].tag, 'child')
4282
4286
4287 el1 = SubEl()
4288 el2 = SubEl()
4289 self.assertEqual('SubEl', el1.tag)
4290 self.assertEqual('SubEl', el2.tag)
4291 el1.other = el2
4292 el2.other = el1
4293
4294 del el1, el2
4295 gc.collect()
4296 # not really testing anything here, but it shouldn't crash
4297
4299 root = etree.Element('parent')
4300 c1 = etree.SubElement(root, 'child1')
4301 c2 = etree.SubElement(root, 'child2')
4302
4303 root.remove(c1)
4304 root.remove(c2)
4305 c1.addnext(c2)
4306 del c1
4307 # trigger deallocation attempt of c1
4308 c2.getprevious()
4309 # make sure it wasn't deallocated
4310 self.assertEqual('child1', c2.getprevious().tag)
4311
4313 root = etree.Element('parent')
4314 c1 = etree.SubElement(root, 'child1')
4315 c2 = etree.SubElement(root, 'child2')
4316
4317 root.remove(c1)
4318 root.remove(c2)
4319 c1.addnext(c2)
4320 c1.tail = 'abc'
4321 c2.tail = 'xyz'
4322 del c1
4323 # trigger deallocation attempt of c1
4324 c2.getprevious()
4325 # make sure it wasn't deallocated
4326 self.assertEqual('child1', c2.getprevious().tag)
4327 self.assertEqual('abc', c2.getprevious().tail)
4328
4329 # helper methods
4330
4332 """Write out element for comparison.
4333 """
4334 ElementTree = self.etree.ElementTree
4335 f = BytesIO()
4336 tree = ElementTree(element=element)
4337 tree.write(f, encoding=encoding, compression=compression)
4338 data = f.getvalue()
4339 if compression:
4340 data = zlib.decompress(data)
4341 return canonicalize(data)
4342
4343
4346 filename = fileInTestDir('test_broken.xml')
4347 root = etree.XML(_bytes('''\
4348 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4349 <xi:include href="%s" parse="text"/>
4350 </doc>
4351 ''' % path2url(filename)))
4352 old_text = root.text
4353 content = read_file(filename)
4354 old_tail = root[0].tail
4355
4356 self.include( etree.ElementTree(root) )
4357 self.assertEqual(old_text + content + old_tail,
4358 root.text)
4359
4361 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'))
4362 self.assertNotEqual(
4363 'a',
4364 tree.getroot()[1].tag)
4365 # process xincludes
4366 self.include( tree )
4367 # check whether we find it replaced with included data
4368 self.assertEqual(
4369 'a',
4370 tree.getroot()[1].tag)
4371
4373 class res(etree.Resolver):
4374 include_text = read_file(fileInTestDir('test.xml'))
4375 called = {}
4376 def resolve(self, url, id, context):
4377 if url.endswith(".dtd"):
4378 self.called["dtd"] = True
4379 return self.resolve_filename(
4380 fileInTestDir('test.dtd'), context)
4381 elif url.endswith("test_xinclude.xml"):
4382 self.called["input"] = True
4383 return None # delegate to default resolver
4384 else:
4385 self.called["include"] = True
4386 return self.resolve_string(self.include_text, context)
4387
4388 res_instance = res()
4389 parser = etree.XMLParser(load_dtd = True)
4390 parser.resolvers.add(res_instance)
4391
4392 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4393 parser = parser)
4394
4395 self.include(tree)
4396
4397 called = list(res_instance.called.items())
4398 called.sort()
4399 self.assertEqual(
4400 [("dtd", True), ("include", True), ("input", True)],
4401 called)
4402
4404 data = textwrap.dedent('''
4405 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4406 <foo/>
4407 <xi:include href="./test.xml" />
4408 </doc>
4409 ''')
4410
4411 class Resolver(etree.Resolver):
4412 called = {}
4413
4414 def resolve(self, url, id, context):
4415 if url.endswith("test_xinclude.xml"):
4416 assert not self.called.get("input")
4417 self.called["input"] = True
4418 return None # delegate to default resolver
4419 elif url.endswith('/test5.xml'):
4420 assert not self.called.get("DONE")
4421 self.called["DONE"] = True
4422 return self.resolve_string('<DONE/>', context)
4423 else:
4424 _, filename = url.rsplit('/', 1)
4425 assert not self.called.get(filename)
4426 self.called[filename] = True
4427 next_data = data.replace(
4428 'test.xml', 'test%d.xml' % len(self.called))
4429 return self.resolve_string(next_data, context)
4430
4431 res_instance = Resolver()
4432 parser = etree.XMLParser(load_dtd=True)
4433 parser.resolvers.add(res_instance)
4434
4435 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4436 parser=parser)
4437
4438 self.include(tree)
4439
4440 called = list(res_instance.called.items())
4441 called.sort()
4442 self.assertEqual(
4443 [("DONE", True), ("input", True), ("test.xml", True),
4444 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
4445 called)
4446
4447
4451
4452
4454 from lxml import ElementInclude
4455
4458
4459 XINCLUDE = {}
4460
4461 XINCLUDE["Recursive1.xml"] = """\
4462 <?xml version='1.0'?>
4463 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4464 <p>The following is the source code of Recursive2.xml:</p>
4465 <xi:include href="Recursive2.xml"/>
4466 </document>
4467 """
4468
4469 XINCLUDE["Recursive2.xml"] = """\
4470 <?xml version='1.0'?>
4471 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4472 <p>The following is the source code of Recursive3.xml:</p>
4473 <xi:include href="Recursive3.xml"/>
4474 </document>
4475 """
4476
4477 XINCLUDE["Recursive3.xml"] = """\
4478 <?xml version='1.0'?>
4479 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4480 <p>The following is the source code of Recursive1.xml:</p>
4481 <xi:include href="Recursive1.xml"/>
4482 </document>
4483 """
4484
4485 XINCLUDE["NonRecursive1.xml"] = """\
4486 <?xml version='1.0'?>
4487 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4488 <p>The following is multiple times the source code of NonRecursive3.xml:</p>
4489 <xi:include href="NonRecursive3.xml"/>
4490 <xi:include href="NonRecursive3.xml"/>
4491 <p>The following is multiple times the source code of Leaf.xml:</p>
4492 <xi:include href="Leaf.xml"/>
4493 <xi:include href="Leaf.xml"/>
4494 <xi:include href="Leaf.xml"/>
4495 <p>One more time the source code of NonRecursive3.xml:</p>
4496 <xi:include href="NonRecursive3.xml"/>
4497 </document>
4498 """
4499
4500 XINCLUDE["NonRecursive2.xml"] = """\
4501 <?xml version='1.0'?>
4502 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4503 <p>The following is multiple times the source code of NonRecursive3.xml:</p>
4504 <xi:include href="NonRecursive3.xml"/>
4505 <xi:include href="NonRecursive3.xml"/>
4506 </document>
4507 """
4508
4509 XINCLUDE["NonRecursive3.xml"] = """\
4510 <?xml version='1.0'?>
4511 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4512 <p>The following is multiple times the source code of Leaf.xml:</p>
4513 <xi:include href="Leaf.xml"/>
4514 <xi:include href="Leaf.xml"/>
4515 </document>
4516 """
4517
4518 XINCLUDE["Leaf.xml"] = """\
4519 <?xml version='1.0'?>
4520 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4521 <p>No further includes</p>
4522 </document>
4523 """
4524
4526 try:
4527 data = textwrap.dedent(self.XINCLUDE[href])
4528 except KeyError:
4529 raise OSError("resource not found")
4530 if parse == "xml":
4531 data = etree.fromstring(data)
4532 return data
4533
4535 # Test infinitely recursive includes.
4536 document = self.xinclude_loader("Recursive1.xml").getroottree()
4537 with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4538 self.include(document, self.xinclude_loader)
4539 self.assertEqual(str(cm.exception),
4540 "recursive include of 'Recursive2.xml' detected")
4541
4542 # Test 'max_depth' limitation.
4543 document = self.xinclude_loader("Recursive1.xml").getroottree()
4544 with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4545 self.include(document, self.xinclude_loader, max_depth=None)
4546 self.assertEqual(str(cm.exception),
4547 "recursive include of 'Recursive2.xml' detected")
4548
4549 document = self.xinclude_loader("Recursive1.xml").getroottree()
4550 with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4551 self.include(document, self.xinclude_loader, max_depth=0)
4552 self.assertEqual(str(cm.exception),
4553 "maximum xinclude depth reached when including file Recursive2.xml")
4554
4555 document = self.xinclude_loader("Recursive1.xml").getroottree()
4556 with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4557 self.include(document, self.xinclude_loader, max_depth=1)
4558 self.assertEqual(str(cm.exception),
4559 "maximum xinclude depth reached when including file Recursive3.xml")
4560
4561 document = self.xinclude_loader("Recursive1.xml").getroottree()
4562 with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4563 self.include(document, self.xinclude_loader, max_depth=2)
4564 self.assertEqual(str(cm.exception),
4565 "maximum xinclude depth reached when including file Recursive1.xml")
4566
4567 document = self.xinclude_loader("Recursive1.xml").getroottree()
4568 with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4569 self.include(document, self.xinclude_loader, max_depth=3)
4570 self.assertEqual(str(cm.exception),
4571 "recursive include of 'Recursive2.xml' detected")
4572
4574 # Test that including the same file multiple times, but on the same level
4575 # is not detected as recursive include
4576 document = self.xinclude_loader("NonRecursive3.xml").getroottree()
4577 self.include(document, self.xinclude_loader)
4578
4579 # same but for more than one level
4580 document = self.xinclude_loader("NonRecursive1.xml").getroottree()
4581 self.include(document, self.xinclude_loader)
4582
4583 # same but no Leaf.xml in top-level file
4584 document = self.xinclude_loader("NonRecursive2.xml").getroottree()
4585 self.include(document, self.xinclude_loader)
4586
4587
4590 tree = self.parse(_bytes('<a><b/></a>'))
4591 f = BytesIO()
4592 tree.write_c14n(f)
4593 s = f.getvalue()
4594 self.assertEqual(_bytes('<a><b></b></a>'),
4595 s)
4596
4598 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4599 f = BytesIO()
4600 tree.write_c14n(f, compression=9)
4601 with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
4602 s = gzfile.read()
4603 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4604 s)
4605
4607 tree = self.parse(_bytes('<a><b/></a>'))
4608 with tmpfile() as filename:
4609 tree.write_c14n(filename)
4610 data = read_file(filename, 'rb')
4611 self.assertEqual(_bytes('<a><b></b></a>'),
4612 data)
4613
4615 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4616 with tmpfile() as filename:
4617 tree.write_c14n(filename, compression=9)
4618 with gzip.open(filename, 'rb') as f:
4619 data = f.read()
4620 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4621 data)
4622
4624 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4625 with tmpfile() as filename:
4626 tree.write(filename, method='c14n2', compression=9)
4627 with gzip.open(filename, 'rb') as f:
4628 data = f.read()
4629 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4630 data)
4631
4633 tree = self.parse(
4634 b'<?xml version="1.0"?> <a> abc \n <b> btext </b> btail <c/> ctail </a> ')
4635 f = BytesIO()
4636 tree.write(f, method='c14n2')
4637 s = f.getvalue()
4638 self.assertEqual(b'<a> abc \n <b> btext </b> btail <c></c> ctail </a>',
4639 s)
4640
4641 f = BytesIO()
4642 tree.write(f, method='c14n2', strip_text=True)
4643 s = f.getvalue()
4644 self.assertEqual(b'<a>abc<b>btext</b>btail<c></c>ctail</a>',
4645 s)
4646
4648 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4649 f = BytesIO()
4650 tree.write_c14n(f)
4651 s = f.getvalue()
4652 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4653 s)
4654 f = BytesIO()
4655 tree.write_c14n(f, with_comments=True)
4656 s = f.getvalue()
4657 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4658 s)
4659 f = BytesIO()
4660 tree.write_c14n(f, with_comments=False)
4661 s = f.getvalue()
4662 self.assertEqual(_bytes('<a><b></b></a>'),
4663 s)
4664
4666 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4667 f = BytesIO()
4668 tree.write(f, method='c14n2')
4669 s = f.getvalue()
4670 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4671 s)
4672 f = BytesIO()
4673 tree.write(f, method='c14n2', with_comments=True)
4674 s = f.getvalue()
4675 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4676 s)
4677 f = BytesIO()
4678 tree.write(f, method='c14n2', with_comments=False)
4679 s = f.getvalue()
4680 self.assertEqual(_bytes('<a><b></b></a>'),
4681 s)
4682
4684 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4685 s = etree.tostring(tree, method='c14n')
4686 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4687 s)
4688 s = etree.tostring(tree, method='c14n', with_comments=True)
4689 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4690 s)
4691 s = etree.tostring(tree, method='c14n', with_comments=False)
4692 self.assertEqual(_bytes('<a><b></b></a>'),
4693 s)
4694
4696 tree = self.parse(b'<!--hi--><a><!--ho--><b/></a><!--hu-->')
4697 s = etree.tostring(tree, method='c14n2')
4698 self.assertEqual(b'<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->',
4699 s)
4700 s = etree.tostring(tree, method='c14n2', with_comments=True)
4701 self.assertEqual(b'<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->',
4702 s)
4703 s = etree.tostring(tree, method='c14n2', with_comments=False)
4704 self.assertEqual(b'<a><b></b></a>',
4705 s)
4706
4708 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4709 s = etree.tostring(tree.getroot(), method='c14n')
4710 self.assertEqual(_bytes('<a><!--ho--><b></b></a>'),
4711 s)
4712 s = etree.tostring(tree.getroot(), method='c14n', with_comments=True)
4713 self.assertEqual(_bytes('<a><!--ho--><b></b></a>'),
4714 s)
4715 s = etree.tostring(tree.getroot(), method='c14n', with_comments=False)
4716 self.assertEqual(_bytes('<a><b></b></a>'),
4717 s)
4718
4720 tree = self.parse(_bytes(
4721 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4722 f = BytesIO()
4723 tree.write_c14n(f)
4724 s = f.getvalue()
4725 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4726 s)
4727 f = BytesIO()
4728 tree.write_c14n(f, exclusive=False)
4729 s = f.getvalue()
4730 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4731 s)
4732 f = BytesIO()
4733 tree.write_c14n(f, exclusive=True)
4734 s = f.getvalue()
4735 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4736 s)
4737
4738 f = BytesIO()
4739 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
4740 s = f.getvalue()
4741 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
4742 s)
4743
4745 tree = self.parse(_bytes(
4746 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4747 s = etree.tostring(tree, method='c14n')
4748 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4749 s)
4750 s = etree.tostring(tree, method='c14n', exclusive=False)
4751 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4752 s)
4753 s = etree.tostring(tree, method='c14n', exclusive=True)
4754 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4755 s)
4756
4757 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4758 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
4759 s)
4760
4762 tree = self.parse(_bytes(
4763 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4764 s = etree.tostring(tree.getroot(), method='c14n')
4765 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4766 s)
4767 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
4768 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4769 s)
4770 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
4771 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4772 s)
4773
4774 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
4775 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4776 s)
4777 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
4778 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
4779 s)
4780
4781 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4782 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4783 s)
4784
4786 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
4787 tree = self.parse(_bytes(
4788 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4789
4790 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
4791 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4792 s)
4793
4794
4797 tree = self.parse(_bytes('<a><b/></a>'))
4798 f = BytesIO()
4799 tree.write(f)
4800 s = f.getvalue()
4801 self.assertEqual(_bytes('<a><b/></a>'),
4802 s)
4803
4805 tree = self.parse(_bytes('<a><b/></a>'))
4806 f = BytesIO()
4807 tree.write(f, doctype='HUHU')
4808 s = f.getvalue()
4809 self.assertEqual(_bytes('HUHU\n<a><b/></a>'),
4810 s)
4811
4813 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4814 f = BytesIO()
4815 tree.write(f, compression=9)
4816 with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
4817 s = gzfile.read()
4818 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4819 s)
4820
4822 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4823 f = BytesIO()
4824 tree.write(f, compression=9, doctype='<!DOCTYPE a>')
4825 with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
4826 s = gzfile.read()
4827 self.assertEqual(_bytes('<!DOCTYPE a>\n<a>'+'<b/>'*200+'</a>'),
4828 s)
4829
4831 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4832 f = BytesIO()
4833 tree.write(f, compression=0)
4834 s0 = f.getvalue()
4835
4836 f = BytesIO()
4837 tree.write(f)
4838 self.assertEqual(f.getvalue(), s0)
4839
4840 f = BytesIO()
4841 tree.write(f, compression=1)
4842 s = f.getvalue()
4843 self.assertTrue(len(s) <= len(s0))
4844 with gzip.GzipFile(fileobj=BytesIO(s)) as gzfile:
4845 s1 = gzfile.read()
4846
4847 f = BytesIO()
4848 tree.write(f, compression=9)
4849 s = f.getvalue()
4850 self.assertTrue(len(s) <= len(s0))
4851 with gzip.GzipFile(fileobj=BytesIO(s)) as gzfile:
4852 s9 = gzfile.read()
4853
4854 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4855 s0)
4856 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4857 s1)
4858 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4859 s9)
4860
4862 tree = self.parse(_bytes('<a><b/></a>'))
4863 with tmpfile() as filename:
4864 tree.write(filename)
4865 data = read_file(filename, 'rb')
4866 self.assertEqual(_bytes('<a><b/></a>'),
4867 data)
4868
4870 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4871 with tmpfile() as filename:
4872 tree.write(filename, compression=9)
4873 with gzip.open(filename, 'rb') as f:
4874 data = f.read()
4875 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4876 data)
4877
4879 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4880 with tmpfile() as filename:
4881 tree.write(filename, compression=9)
4882 data = etree.tostring(etree.parse(filename))
4883 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4884 data)
4885
4887 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4888 with tmpfile() as filename:
4889 tree.write(filename, compression=9)
4890 with gzip.GzipFile(filename) as f:
4891 data = etree.tostring(etree.parse(f))
4892 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4893 data)
4894
4896 xml = _bytes('<a>'+'<b/>'*200+'</a>')
4897 tree = self.parse(xml)
4898 with tmpfile(prefix="p+%20", suffix=".xml") as filename:
4899 url = 'file://' + (filename if sys.platform != 'win32'
4900 else '/' + filename.replace('\\', '/'))
4901 tree.write(url)
4902 data = read_file(filename, 'rb').replace(_bytes('\n'), _bytes(''))
4903 self.assertEqual(data, xml)
4904
4905
4907 etree = etree
4908
4910 parse = self.etree.parse
4911 f = BytesIO('<a><b></c></b></a>')
4912 self.etree.clear_error_log()
4913 try:
4914 parse(f)
4915 logs = None
4916 except SyntaxError:
4917 e = sys.exc_info()[1]
4918 logs = e.error_log
4919 f.close()
4920 self.assertTrue([ log for log in logs
4921 if 'mismatch' in log.message ])
4922 self.assertTrue([ log for log in logs
4923 if 'PARSER' in log.domain_name])
4924 self.assertTrue([ log for log in logs
4925 if 'ERR_TAG_NAME_MISMATCH' in log.type_name ])
4926 self.assertTrue([ log for log in logs
4927 if 1 == log.line ])
4928 self.assertTrue([ log for log in logs
4929 if 15 == log.column ])
4930
4941
4942 self.etree.use_global_python_log(Logger())
4943 f = BytesIO('<a><b></c></b></a>')
4944 try:
4945 parse(f)
4946 except SyntaxError:
4947 pass
4948 f.close()
4949
4950 self.assertTrue([ message for message in messages
4951 if 'mismatch' in message ])
4952 self.assertTrue([ message for message in messages
4953 if ':PARSER:' in message])
4954 self.assertTrue([ message for message in messages
4955 if ':ERR_TAG_NAME_MISMATCH:' in message ])
4956 self.assertTrue([ message for message in messages
4957 if ':1:15:' in message ])
4958
4959
4973 def close(self):
4974 return 'close()'
4975
4976 parser = self.etree.XMLPullParser(target=Target())
4977 events = parser.read_events()
4978
4979 parser.feed('<root><element>')
4980 self.assertFalse(list(events))
4981 self.assertFalse(list(events))
4982 parser.feed('</element><child>')
4983 self.assertEqual([('end', 'end(element)')], list(events))
4984 parser.feed('</child>')
4985 self.assertEqual([('end', 'end(child)')], list(events))
4986 parser.feed('</root>')
4987 self.assertEqual([('end', 'end(root)')], list(events))
4988 self.assertFalse(list(events))
4989 self.assertEqual('close()', parser.close())
4990
4995 def end(self, tag):
4996 return 'end(%s)' % tag
4997 def close(self):
4998 return 'close()'
4999
5000 parser = self.etree.XMLPullParser(
5001 ['start', 'end'], target=Target())
5002 events = parser.read_events()
5003
5004 parser.feed('<root><element>')
5005 self.assertEqual(
5006 [('start', 'start(root)'), ('start', 'start(element)')],
5007 list(events))
5008 self.assertFalse(list(events))
5009 parser.feed('</element><child>')
5010 self.assertEqual(
5011 [('end', 'end(element)'), ('start', 'start(child)')],
5012 list(events))
5013 parser.feed('</child>')
5014 self.assertEqual(
5015 [('end', 'end(child)')],
5016 list(events))
5017 parser.feed('</root>')
5018 self.assertEqual(
5019 [('end', 'end(root)')],
5020 list(events))
5021 self.assertFalse(list(events))
5022 self.assertEqual('close()', parser.close())
5023
5025 parser = self.etree.XMLPullParser(
5026 ['start', 'end'], target=etree.TreeBuilder())
5027 events = parser.read_events()
5028
5029 parser.feed('<root><element>')
5030 self.assert_event_tags(
5031 events, [('start', 'root'), ('start', 'element')])
5032 self.assertFalse(list(events))
5033 parser.feed('</element><child>')
5034 self.assert_event_tags(
5035 events, [('end', 'element'), ('start', 'child')])
5036 parser.feed('</child>')
5037 self.assert_event_tags(
5038 events, [('end', 'child')])
5039 parser.feed('</root>')
5040 self.assert_event_tags(
5041 events, [('end', 'root')])
5042 self.assertFalse(list(events))
5043 root = parser.close()
5044 self.assertEqual('root', root.tag)
5045
5047 class Target(etree.TreeBuilder):
5048 def end(self, tag):
5049 el = super(Target, self).end(tag)
5050 el.tag += '-huhu'
5051 return el
5052
5053 parser = self.etree.XMLPullParser(
5054 ['start', 'end'], target=Target())
5055 events = parser.read_events()
5056
5057 parser.feed('<root><element>')
5058 self.assert_event_tags(
5059 events, [('start', 'root'), ('start', 'element')])
5060 self.assertFalse(list(events))
5061 parser.feed('</element><child>')
5062 self.assert_event_tags(
5063 events, [('end', 'element-huhu'), ('start', 'child')])
5064 parser.feed('</child>')
5065 self.assert_event_tags(
5066 events, [('end', 'child-huhu')])
5067 parser.feed('</root>')
5068 self.assert_event_tags(
5069 events, [('end', 'root-huhu')])
5070 self.assertFalse(list(events))
5071 root = parser.close()
5072 self.assertEqual('root-huhu', root.tag)
5073
5074
5076 suite = unittest.TestSuite()
5077 suite.addTests([unittest.makeSuite(ETreeOnlyTestCase)])
5078 suite.addTests([unittest.makeSuite(ETreeXIncludeTestCase)])
5079 suite.addTests([unittest.makeSuite(ElementIncludeTestCase)])
5080 suite.addTests([unittest.makeSuite(ETreeC14NTestCase)])
5081 suite.addTests([unittest.makeSuite(ETreeWriteTestCase)])
5082 suite.addTests([unittest.makeSuite(ETreeErrorLogTest)])
5083 suite.addTests([unittest.makeSuite(XMLPullParserTest)])
5084
5085 # add original doctests from ElementTree selftest modules
5086 from . import selftest, selftest2
5087 suite.addTests(doctest.DocTestSuite(selftest))
5088 suite.addTests(doctest.DocTestSuite(selftest2))
5089
5090 # add doctests
5091 suite.addTests(doctest.DocTestSuite(etree))
5092 suite.addTests(
5093 [make_doctest('../../../doc/tutorial.txt')])
5094 suite.addTests(
5095 [make_doctest('../../../doc/api.txt')])
5096 suite.addTests(
5097 [make_doctest('../../../doc/FAQ.txt')])
5098 suite.addTests(
5099 [make_doctest('../../../doc/parsing.txt')])
5100 suite.addTests(
5101 [make_doctest('../../../doc/resolvers.txt')])
5102 return suite
5103
5104
5105 if __name__ == '__main__':
5106 print('to test use test.py %s' % __file__)
5107
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Wed Feb 12 18:28:51 2020 | http://epydoc.sourceforge.net |