| Home | Trees | Indices | Help |
|
|---|
|
|
1 # -*- coding: utf-8 -*-
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 from __future__ import absolute_import
11
12 from collections import OrderedDict
13 import os.path
14 import unittest
15 import copy
16 import sys
17 import re
18 import gc
19 import operator
20 import textwrap
21 import zlib
22 import gzip
23
24 from .common_imports import etree, StringIO, BytesIO, HelperTestCase
25 from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url, tmpfile
26 from .common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
27 from .common_imports import canonicalize, _str, _bytes
28
29 print("""
30 TESTED VERSION: %s""" % etree.__version__ + """
31 Python: %r""" % (sys.version_info,) + """
32 lxml.etree: %r""" % (etree.LXML_VERSION,) + """
33 libxml used: %r""" % (etree.LIBXML_VERSION,) + """
34 libxml compiled: %r""" % (etree.LIBXML_COMPILED_VERSION,) + """
35 libxslt used: %r""" % (etree.LIBXSLT_VERSION,) + """
36 libxslt compiled: %r""" % (etree.LIBXSLT_COMPILED_VERSION,) + """
37 FS encoding: %s""" % (sys.getfilesystemencoding(),) + """
38 Default encoding: %s""" % (sys.getdefaultencoding(),) + """
39 Max Unicode: %s""" % (sys.maxunicode,) + """
40 """)
41
42 try:
43 _unicode = unicode
44 except NameError:
45 # Python 3
46 _unicode = str
47
48
50 """Tests only for etree, not ElementTree"""
51 etree = etree
52
54 self.assertTrue(isinstance(etree.__version__, _unicode))
55 self.assertTrue(isinstance(etree.LXML_VERSION, tuple))
56 self.assertEqual(len(etree.LXML_VERSION), 4)
57 self.assertTrue(isinstance(etree.LXML_VERSION[0], int))
58 self.assertTrue(isinstance(etree.LXML_VERSION[1], int))
59 self.assertTrue(isinstance(etree.LXML_VERSION[2], int))
60 self.assertTrue(isinstance(etree.LXML_VERSION[3], int))
61 self.assertTrue(etree.__version__.startswith(
62 str(etree.LXML_VERSION[0])))
63
65 if hasattr(self.etree, '__pyx_capi__'):
66 # newer Pyrex compatible C-API
67 self.assertTrue(isinstance(self.etree.__pyx_capi__, dict))
68 self.assertTrue(len(self.etree.__pyx_capi__) > 0)
69 else:
70 # older C-API mechanism
71 self.assertTrue(hasattr(self.etree, '_import_c_api'))
72
74 import lxml
75 includes = lxml.get_include()
76 self.assertTrue(includes)
77 self.assertTrue(len(includes) >= 2)
78 self.assertTrue(os.path.join(os.path.dirname(lxml.__file__), 'includes') in includes,
79 includes)
80
82 Element = self.etree.Element
83 el = Element('name')
84 self.assertEqual(el.tag, 'name')
85 el = Element('{}name')
86 self.assertEqual(el.tag, 'name')
87
89 Element = self.etree.Element
90 el = Element('name')
91 self.assertRaises(ValueError, Element, '{}')
92 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
93
94 self.assertRaises(ValueError, Element, '{test}')
95 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
96
98 Element = self.etree.Element
99 self.assertRaises(ValueError, Element, 'p:name')
100 self.assertRaises(ValueError, Element, '{test}p:name')
101
102 el = Element('name')
103 self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
104
106 Element = self.etree.Element
107 self.assertRaises(ValueError, Element, "p'name")
108 self.assertRaises(ValueError, Element, 'p"name')
109
110 self.assertRaises(ValueError, Element, "{test}p'name")
111 self.assertRaises(ValueError, Element, '{test}p"name')
112
113 el = Element('name')
114 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
115 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
116
118 Element = self.etree.Element
119 self.assertRaises(ValueError, Element, ' name ')
120 self.assertRaises(ValueError, Element, 'na me')
121 self.assertRaises(ValueError, Element, '{test} name')
122
123 el = Element('name')
124 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
125
127 Element = self.etree.Element
128 SubElement = self.etree.SubElement
129
130 el = Element('name')
131 self.assertRaises(ValueError, SubElement, el, '{}')
132 self.assertRaises(ValueError, SubElement, el, '{test}')
133
135 Element = self.etree.Element
136 SubElement = self.etree.SubElement
137
138 el = Element('name')
139 self.assertRaises(ValueError, SubElement, el, 'p:name')
140 self.assertRaises(ValueError, SubElement, el, '{test}p:name')
141
143 Element = self.etree.Element
144 SubElement = self.etree.SubElement
145
146 el = Element('name')
147 self.assertRaises(ValueError, SubElement, el, "p'name")
148 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
149
150 self.assertRaises(ValueError, SubElement, el, 'p"name')
151 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
152
154 Element = self.etree.Element
155 SubElement = self.etree.SubElement
156
157 el = Element('name')
158 self.assertRaises(ValueError, SubElement, el, ' name ')
159 self.assertRaises(ValueError, SubElement, el, 'na me')
160 self.assertRaises(ValueError, SubElement, el, '{test} name')
161
163 Element = self.etree.Element
164 SubElement = self.etree.SubElement
165
166 el = Element('name')
167 self.assertRaises(ValueError, SubElement, el, 'name', {'a b c' : 'abc'})
168 self.assertRaises(ValueError, SubElement, el, 'name', {'a' : 'a\0\n'})
169 self.assertEqual(0, len(el))
170
172 QName = self.etree.QName
173 self.assertRaises(ValueError, QName, '')
174 self.assertRaises(ValueError, QName, None)
175 self.assertRaises(ValueError, QName, None, None)
176 self.assertRaises(ValueError, QName, 'test', '')
177
179 QName = self.etree.QName
180 q = QName(None, 'TAG')
181 self.assertEqual('TAG', q)
182 self.assertEqual('TAG', q.localname)
183 self.assertEqual(None, q.namespace)
184
186 QName = self.etree.QName
187 self.assertRaises(ValueError, QName, 'p:name')
188 self.assertRaises(ValueError, QName, 'test', 'p:name')
189
191 QName = self.etree.QName
192 self.assertRaises(ValueError, QName, ' name ')
193 self.assertRaises(ValueError, QName, 'na me')
194 self.assertRaises(ValueError, QName, 'test', ' name')
195
197 # ET doesn't have namespace/localname properties on QNames
198 QName = self.etree.QName
199 namespace, localname = 'http://myns', 'a'
200 qname = QName(namespace, localname)
201 self.assertEqual(namespace, qname.namespace)
202 self.assertEqual(localname, qname.localname)
203
205 # ET doesn't have namespace/localname properties on QNames
206 QName = self.etree.QName
207 qname1 = QName('http://myns', 'a')
208 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
209
210 qname2 = QName(a)
211 self.assertEqual(a.tag, qname1.text)
212 self.assertEqual(a.tag, qname1)
213 self.assertEqual(qname1.text, qname2.text)
214 self.assertEqual(qname1, qname2.text)
215 self.assertEqual(qname1.text, qname2)
216 self.assertEqual(qname1, qname2)
217
219 # ET doesn't resove QNames as text values
220 etree = self.etree
221 qname = etree.QName('http://myns', 'a')
222 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
223 a.text = qname
224
225 self.assertEqual("p:a", a.text)
226
228 etree = self.etree
229 self.assertRaises(ValueError,
230 etree.Element, "root", nsmap={'"' : 'testns'})
231 self.assertRaises(ValueError,
232 etree.Element, "root", nsmap={'&' : 'testns'})
233 self.assertRaises(ValueError,
234 etree.Element, "root", nsmap={'a:b' : 'testns'})
235
237 XML = self.etree.XML
238 tostring = self.etree.tostring
239 a = XML('<a aa="A"><b ba="B">B1</b>B2<c ca="C">C1</c>C2</a>')
240 a[0].clear(keep_tail=True)
241 self.assertEqual(_bytes('<a aa="A"><b/>B2<c ca="C">C1</c>C2</a>'), tostring(a))
242
244 # ET in Py 3.x has no "attrib.has_key()" method
245 XML = self.etree.XML
246
247 root = XML(_bytes('<foo bar="Bar" xmlns:ns="http://ns.codespeak.net/test" ns:baz="Baz" />'))
248 self.assertEqual(
249 True, root.attrib.has_key('bar'))
250 self.assertEqual(
251 False, root.attrib.has_key('baz'))
252 self.assertEqual(
253 False, root.attrib.has_key('hah'))
254 self.assertEqual(
255 True,
256 root.attrib.has_key('{http://ns.codespeak.net/test}baz'))
257
259 Element = self.etree.Element
260 root = Element("root")
261 root.set("attr", "TEST")
262 self.assertEqual("TEST", root.get("attr"))
263
265 # ElementTree accepts arbitrary attribute values
266 # lxml.etree allows only strings
267 Element = self.etree.Element
268
269 root = Element("root")
270 root.set("attr", "TEST")
271 self.assertEqual("TEST", root.get("attr"))
272 self.assertRaises(TypeError, root.set, "newattr", 5)
273
275 Element = self.etree.Element
276
277 root = Element("root")
278 root.set("attr", "TEST")
279 self.assertEqual("TEST", root.attrib["attr"])
280
281 root2 = Element("root2", root.attrib, attr2='TOAST')
282 self.assertEqual("TEST", root2.attrib["attr"])
283 self.assertEqual("TOAST", root2.attrib["attr2"])
284 self.assertEqual(None, root.attrib.get("attr2"))
285
287 Element = self.etree.Element
288
289 keys = ["attr%d" % i for i in range(12, 4, -1)]
290 values = ["TEST-%d" % i for i in range(12, 4, -1)]
291 items = list(zip(keys, values))
292
293 root = Element("root")
294 for key, value in items:
295 root.set(key, value)
296 self.assertEqual(keys, root.attrib.keys())
297 self.assertEqual(values, root.attrib.values())
298
299 attr_order = [
300 ('attr_99', 'TOAST-1'),
301 ('attr_98', 'TOAST-2'),
302 ]
303 ordered_dict_types = [OrderedDict, lambda x:x]
304 if sys.version_info >= (3, 6):
305 ordered_dict_types.append(dict)
306 else:
307 # Keyword arguments are not ordered in Py<3.6, and thus get sorted.
308 attr_order.sort()
309 attr_order += items
310 expected_keys = [attr[0] for attr in attr_order]
311 expected_values = [attr[1] for attr in attr_order]
312 expected_items = list(zip(expected_keys, expected_values))
313
314 for dict_type in ordered_dict_types:
315 root2 = Element("root2", dict_type(root.attrib),
316 attr_99='TOAST-1', attr_98='TOAST-2')
317
318 try:
319 self.assertSequenceEqual(expected_keys, root2.attrib.keys())
320 self.assertSequenceEqual(expected_values, root2.attrib.values())
321 self.assertSequenceEqual(expected_items, root2.attrib.items())
322 except AssertionError as exc:
323 exc.args = ("Order of '%s': %s" % (dict_type.__name__, exc.args[0]),) + exc.args[1:]
324 raise
325
326 self.assertEqual(keys, root.attrib.keys())
327 self.assertEqual(values, root.attrib.values())
328
330 # ElementTree accepts arbitrary attribute values
331 # lxml.etree allows only strings, or None for (html5) boolean attributes
332 Element = self.etree.Element
333 root = Element("root")
334 self.assertRaises(TypeError, root.set, "newattr", 5)
335 self.assertRaises(TypeError, root.set, "newattr", object)
336 self.assertRaises(TypeError, root.set, "newattr", None)
337 self.assertRaises(TypeError, root.set, "newattr")
338
340 XML = self.etree.XML
341 xml = _bytes('<test a="5" b="10" c="20"><x a="4" b="2"/></test>')
342
343 root = XML(xml)
344 self.etree.strip_attributes(root, 'a')
345 self.assertEqual(_bytes('<test b="10" c="20"><x b="2"></x></test>'),
346 self._writeElement(root))
347
348 root = XML(xml)
349 self.etree.strip_attributes(root, 'b', 'c')
350 self.assertEqual(_bytes('<test a="5"><x a="4"></x></test>'),
351 self._writeElement(root))
352
354 XML = self.etree.XML
355 xml = _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20" n:a="5"><x a="4" n:b="2"/></test>')
356
357 root = XML(xml)
358 self.etree.strip_attributes(root, 'a')
359 self.assertEqual(
360 _bytes('<test xmlns:n="http://test/ns" b="10" c="20" n:a="5"><x n:b="2"></x></test>'),
361 self._writeElement(root))
362
363 root = XML(xml)
364 self.etree.strip_attributes(root, '{http://test/ns}a', 'c')
365 self.assertEqual(
366 _bytes('<test xmlns:n="http://test/ns" a="6" b="10"><x a="4" n:b="2"></x></test>'),
367 self._writeElement(root))
368
369 root = XML(xml)
370 self.etree.strip_attributes(root, '{http://test/ns}*')
371 self.assertEqual(
372 _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20"><x a="4"></x></test>'),
373 self._writeElement(root))
374
376 XML = self.etree.XML
377 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
378
379 root = XML(xml)
380 self.etree.strip_elements(root, 'a')
381 self.assertEqual(_bytes('<test><x></x></test>'),
382 self._writeElement(root))
383
384 root = XML(xml)
385 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
386 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
387 self._writeElement(root))
388
389 root = XML(xml)
390 self.etree.strip_elements(root, 'c')
391 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
392 self._writeElement(root))
393
395 XML = self.etree.XML
396 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
397
398 root = XML(xml)
399 self.etree.strip_elements(root, 'a')
400 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
401 self._writeElement(root))
402
403 root = XML(xml)
404 self.etree.strip_elements(root, '{urn:a}b', 'c')
405 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
406 self._writeElement(root))
407
408 root = XML(xml)
409 self.etree.strip_elements(root, '{urn:a}*', 'c')
410 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
411 self._writeElement(root))
412
413 root = XML(xml)
414 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
415 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
416 self._writeElement(root))
417
436
462
489
516
535
548
550 # lxml.etree separates target and text
551 Element = self.etree.Element
552 SubElement = self.etree.SubElement
553 ProcessingInstruction = self.etree.ProcessingInstruction
554
555 a = Element('a')
556 a.append(ProcessingInstruction('foo', 'some more text'))
557 self.assertEqual(a[0].target, 'foo')
558 self.assertEqual(a[0].text, 'some more text')
559
561 XML = self.etree.XML
562 root = XML(_bytes("<test><?mypi my test ?></test>"))
563 self.assertEqual(root[0].target, "mypi")
564 self.assertEqual(root[0].text, "my test ")
565
567 XML = self.etree.XML
568 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
569 self.assertEqual(root[0].target, "mypi")
570 self.assertEqual(root[0].get('my'), "1")
571 self.assertEqual(root[0].get('test'), " abc ")
572 self.assertEqual(root[0].get('quotes'), "' '")
573 self.assertEqual(root[0].get('only'), None)
574 self.assertEqual(root[0].get('names'), None)
575 self.assertEqual(root[0].get('nope'), None)
576
578 XML = self.etree.XML
579 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
580 self.assertEqual(root[0].target, "mypi")
581 self.assertEqual(root[0].attrib['my'], "1")
582 self.assertEqual(root[0].attrib['test'], " abc ")
583 self.assertEqual(root[0].attrib['quotes'], "' '")
584 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
585 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
586 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
587
589 # previously caused a crash
590 ProcessingInstruction = self.etree.ProcessingInstruction
591
592 a = ProcessingInstruction("PI", "ONE")
593 b = copy.deepcopy(a)
594 b.text = "ANOTHER"
595
596 self.assertEqual('ONE', a.text)
597 self.assertEqual('ANOTHER', b.text)
598
600 XML = self.etree.XML
601 tostring = self.etree.tostring
602 root = XML(_bytes("<?mypi my test ?><test/><!--comment -->"))
603 tree1 = self.etree.ElementTree(root)
604 self.assertEqual(_bytes("<?mypi my test ?><test/><!--comment -->"),
605 tostring(tree1))
606
607 tree2 = copy.deepcopy(tree1)
608 self.assertEqual(_bytes("<?mypi my test ?><test/><!--comment -->"),
609 tostring(tree2))
610
611 root2 = copy.deepcopy(tree1.getroot())
612 self.assertEqual(_bytes("<test/>"),
613 tostring(root2))
614
616 XML = self.etree.XML
617 tostring = self.etree.tostring
618 xml = _bytes('<!DOCTYPE test [\n<!ENTITY entity "tasty">\n]>\n<test/>')
619 root = XML(xml)
620 tree1 = self.etree.ElementTree(root)
621 self.assertEqual(xml, tostring(tree1))
622
623 tree2 = copy.deepcopy(tree1)
624 self.assertEqual(xml, tostring(tree2))
625
626 root2 = copy.deepcopy(tree1.getroot())
627 self.assertEqual(_bytes("<test/>"),
628 tostring(root2))
629
631 XML = self.etree.XML
632 tostring = self.etree.tostring
633 xml = _bytes('<!-- comment --><!DOCTYPE test [\n<!ENTITY entity "tasty">\n]>\n<test/>')
634 root = XML(xml)
635 tree1 = self.etree.ElementTree(root)
636 self.assertEqual(xml, tostring(tree1))
637
638 tree2 = copy.deepcopy(tree1)
639 self.assertEqual(xml, tostring(tree2))
640
642 fromstring = self.etree.fromstring
643 tostring = self.etree.tostring
644 XMLParser = self.etree.XMLParser
645
646 xml = _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
647 parser = XMLParser(remove_comments=True)
648 root = fromstring(xml, parser)
649 self.assertEqual(
650 _bytes('<a><b><c/></b></a>'),
651 tostring(root))
652
654 parse = self.etree.parse
655 tostring = self.etree.tostring
656 XMLParser = self.etree.XMLParser
657
658 xml = _bytes('<?test?><a><?A?><b><?B?><c/></b><?C?></a><?tail?>')
659
660 f = BytesIO(xml)
661 tree = parse(f)
662 self.assertEqual(
663 xml,
664 tostring(tree))
665
666 parser = XMLParser(remove_pis=True)
667 tree = parse(f, parser)
668 self.assertEqual(
669 _bytes('<a><b><c/></b></a>'),
670 tostring(tree))
671
673 # ET raises IOError only
674 parse = self.etree.parse
675 self.assertRaises(TypeError, parse, 'notthere.xml', object())
676
678 iterparse = self.etree.iterparse
679 f = BytesIO('<a><b><d/></b><c/></a>')
680
681 counts = []
682 for event, elem in iterparse(f):
683 counts.append(len(list(elem.getiterator())))
684 self.assertEqual(
685 [1,2,1,4],
686 counts)
687
689 # ET removes comments
690 iterparse = self.etree.iterparse
691 tostring = self.etree.tostring
692
693 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
694 events = list(iterparse(f))
695 root = events[-1][1]
696 self.assertEqual(3, len(events))
697 self.assertEqual(
698 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
699 tostring(root))
700
702 # ET removes comments
703 iterparse = self.etree.iterparse
704 tostring = self.etree.tostring
705
706 def name(event, el):
707 if event == 'comment':
708 return el.text
709 else:
710 return el.tag
711
712 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
713 events = list(iterparse(f, events=('end', 'comment')))
714 root = events[-1][1]
715 self.assertEqual(6, len(events))
716 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
717 [ name(*item) for item in events ])
718 self.assertEqual(
719 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
720 tostring(root))
721
723 # ET removes pis
724 iterparse = self.etree.iterparse
725 tostring = self.etree.tostring
726 ElementTree = self.etree.ElementTree
727
728 def name(event, el):
729 if event == 'pi':
730 return el.target, el.text
731 else:
732 return el.tag
733
734 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
735 events = list(iterparse(f, events=('end', 'pi')))
736 root = events[-2][1]
737 self.assertEqual(8, len(events))
738 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
739 ('pid','d'), 'a', ('pie','e')],
740 [ name(*item) for item in events ])
741 self.assertEqual(
742 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
743 tostring(ElementTree(root)))
744
746 iterparse = self.etree.iterparse
747 tostring = self.etree.tostring
748
749 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
750 events = list(iterparse(f, remove_comments=True,
751 events=('end', 'comment')))
752 root = events[-1][1]
753 self.assertEqual(3, len(events))
754 self.assertEqual(['c', 'b', 'a'],
755 [ el.tag for (event, el) in events ])
756 self.assertEqual(
757 _bytes('<a><b><c/></b></a>'),
758 tostring(root))
759
761 iterparse = self.etree.iterparse
762 f = BytesIO('<a><b><c/></a>')
763 # ET raises ExpatError, lxml raises XMLSyntaxError
764 self.assertRaises(self.etree.XMLSyntaxError, list, iterparse(f))
765
767 iterparse = self.etree.iterparse
768 f = BytesIO('<a><b><c/></a>')
769 it = iterparse(f, events=('start', 'end'), recover=True)
770 events = [(ev, el.tag) for ev, el in it]
771 root = it.root
772 self.assertTrue(root is not None)
773
774 self.assertEqual(1, events.count(('start', 'a')))
775 self.assertEqual(1, events.count(('end', 'a')))
776
777 self.assertEqual(1, events.count(('start', 'b')))
778 self.assertEqual(1, events.count(('end', 'b')))
779
780 self.assertEqual(1, events.count(('start', 'c')))
781 self.assertEqual(1, events.count(('end', 'c')))
782
784 iterparse = self.etree.iterparse
785 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
786 it = iterparse(f, events=('start', 'end'), recover=True)
787 events = [(ev, el.tag) for ev, el in it]
788 root = it.root
789 self.assertTrue(root is not None)
790
791 self.assertEqual(1, events.count(('start', 'a')))
792 self.assertEqual(1, events.count(('end', 'a')))
793
794 self.assertEqual(2, events.count(('start', 'b')))
795 self.assertEqual(2, events.count(('end', 'b')))
796
797 self.assertEqual(2, events.count(('start', 'c')))
798 self.assertEqual(2, events.count(('end', 'c')))
799
801 iterparse = self.etree.iterparse
802 f = BytesIO("""
803 <a> \n \n <b> b test </b> \n
804
805 \n\t <c> \n </c> </a> \n """)
806 iterator = iterparse(f, remove_blank_text=True)
807 text = [ (element.text, element.tail)
808 for event, element in iterator ]
809 self.assertEqual(
810 [(" b test ", None), (" \n ", None), (None, None)],
811 text)
812
814 iterparse = self.etree.iterparse
815 f = BytesIO('<a><b><d/></b><c/></a>')
816
817 iterator = iterparse(f, tag="b", events=('start', 'end'))
818 events = list(iterator)
819 root = iterator.root
820 self.assertEqual(
821 [('start', root[0]), ('end', root[0])],
822 events)
823
825 iterparse = self.etree.iterparse
826 f = BytesIO('<a><b><d/></b><c/></a>')
827
828 iterator = iterparse(f, tag="*", events=('start', 'end'))
829 events = list(iterator)
830 self.assertEqual(
831 8,
832 len(events))
833
835 iterparse = self.etree.iterparse
836 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
837
838 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
839 events = list(iterator)
840 root = iterator.root
841 self.assertEqual(
842 [('start', root[0]), ('end', root[0])],
843 events)
844
846 iterparse = self.etree.iterparse
847 f = BytesIO('<a><b><d/></b><c/></a>')
848 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
849 events = list(iterator)
850 root = iterator.root
851 self.assertEqual(
852 [('start', root[0]), ('end', root[0])],
853 events)
854
855 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
856 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
857 events = list(iterator)
858 root = iterator.root
859 self.assertEqual([], events)
860
862 iterparse = self.etree.iterparse
863 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
864 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
865 events = list(iterator)
866 self.assertEqual(8, len(events))
867
869 iterparse = self.etree.iterparse
870 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
871 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
872 events = list(iterator)
873 self.assertEqual([], events)
874
875 f = BytesIO('<a><b><d/></b><c/></a>')
876 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
877 events = list(iterator)
878 self.assertEqual(8, len(events))
879
881 text = _str('Søk på nettet')
882 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
883 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
884 ).encode('iso-8859-1')
885
886 self.assertRaises(self.etree.ParseError,
887 list, self.etree.iterparse(BytesIO(xml_latin1)))
888
890 text = _str('Søk på nettet', encoding="UTF-8")
891 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
892 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
893 ).encode('iso-8859-1')
894
895 iterator = self.etree.iterparse(BytesIO(xml_latin1),
896 encoding="iso-8859-1")
897 self.assertEqual(1, len(list(iterator)))
898
899 a = iterator.root
900 self.assertEqual(a.text, text)
901
903 tostring = self.etree.tostring
904 f = BytesIO('<root><![CDATA[test]]></root>')
905 context = self.etree.iterparse(f, strip_cdata=False)
906 content = [ el.text for event,el in context ]
907
908 self.assertEqual(['test'], content)
909 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
910 tostring(context.root))
911
915
917 self.etree.XMLParser(encoding="ascii")
918 self.etree.XMLParser(encoding="utf-8")
919 self.etree.XMLParser(encoding="iso-8859-1")
920
922 parser = self.etree.XMLParser(recover=True)
923
924 parser.feed('<?xml version=')
925 parser.feed('"1.0"?><ro')
926 parser.feed('ot><')
927 parser.feed('a test="works"')
928 parser.feed('><othertag/></root') # <a> not closed!
929 parser.feed('>')
930
931 root = parser.close()
932
933 self.assertEqual(root.tag, "root")
934 self.assertEqual(len(root), 1)
935 self.assertEqual(root[0].tag, "a")
936 self.assertEqual(root[0].get("test"), "works")
937 self.assertEqual(len(root[0]), 1)
938 self.assertEqual(root[0][0].tag, "othertag")
939 # FIXME: would be nice to get some errors logged ...
940 #self.assertTrue(len(parser.error_log) > 0, "error log is empty")
941
943 # test that recover mode plays nicely with the no-id-dict setup
944 parser = self.etree.XMLParser(recover=True, collect_ids=False)
945
946 parser.feed('<?xml version=')
947 parser.feed('"1.0"?><ro')
948 parser.feed('ot xml:id="123"><')
949 parser.feed('a test="works" xml:id=')
950 parser.feed('"321"><othertag/></root') # <a> not closed!
951 parser.feed('>')
952
953 root = parser.close()
954
955 self.assertEqual(root.tag, "root")
956 self.assertEqual(len(root), 1)
957 self.assertEqual(root[0].tag, "a")
958 self.assertEqual(root[0].get("test"), "works")
959 self.assertEqual(root[0].attrib, {
960 'test': 'works',
961 '{http://www.w3.org/XML/1998/namespace}id': '321'})
962 self.assertEqual(len(root[0]), 1)
963 self.assertEqual(root[0][0].tag, "othertag")
964 # FIXME: would be nice to get some errors logged ...
965 #self.assertTrue(len(parser.error_log) > 0, "error log is empty")
966
968 assertEqual = self.assertEqual
969 assertFalse = self.assertFalse
970
971 events = []
972 class Target(object):
973 def start(self, tag, attrib):
974 events.append("start")
975 assertFalse(attrib)
976 assertEqual("TAG", tag)
977 def end(self, tag):
978 events.append("end")
979 assertEqual("TAG", tag)
980 def close(self):
981 return "DONE" # no Element!
982
983 parser = self.etree.XMLParser(target=Target())
984 tree = self.etree.ElementTree()
985
986 self.assertRaises(TypeError,
987 tree.parse, BytesIO("<TAG/>"), parser=parser)
988 self.assertEqual(["start", "end"], events)
989
991 # ET doesn't call .close() on errors
992 events = []
993 class Target(object):
994 def start(self, tag, attrib):
995 events.append("start-" + tag)
996 def end(self, tag):
997 events.append("end-" + tag)
998 if tag == 'a':
999 raise ValueError("dead and gone")
1000 def data(self, data):
1001 events.append("data-" + data)
1002 def close(self):
1003 events.append("close")
1004 return "DONE"
1005
1006 parser = self.etree.XMLParser(target=Target())
1007
1008 try:
1009 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
1010 done = parser.close()
1011 self.fail("error expected, but parsing succeeded")
1012 except ValueError:
1013 done = 'value error received as expected'
1014
1015 self.assertEqual(["start-root", "data-A", "start-a",
1016 "data-ca", "end-a", "close"],
1017 events)
1018
1020 # ET doesn't call .close() on errors
1021 events = []
1022 class Target(object):
1023 def start(self, tag, attrib):
1024 events.append("start-" + tag)
1025 def end(self, tag):
1026 events.append("end-" + tag)
1027 if tag == 'a':
1028 raise ValueError("dead and gone")
1029 def data(self, data):
1030 events.append("data-" + data)
1031 def close(self):
1032 events.append("close")
1033 return "DONE"
1034
1035 parser = self.etree.XMLParser(target=Target())
1036
1037 try:
1038 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
1039 parser=parser)
1040 self.fail("error expected, but parsing succeeded")
1041 except ValueError:
1042 done = 'value error received as expected'
1043
1044 self.assertEqual(["start-root", "data-A", "start-a",
1045 "data-ca", "end-a", "close"],
1046 events)
1047
1049 # test that target parsing works nicely with the no-id-hash setup
1050 events = []
1051 class Target(object):
1052 def start(self, tag, attrib):
1053 events.append("start-" + tag)
1054 def end(self, tag):
1055 events.append("end-" + tag)
1056 def data(self, data):
1057 events.append("data-" + data)
1058 def comment(self, text):
1059 events.append("comment-" + text)
1060 def close(self):
1061 return "DONE"
1062
1063 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
1064
1065 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
1066 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
1067 done = parser.close()
1068
1069 self.assertEqual("DONE", done)
1070 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1071 "start-sub", "end-sub", "data-B", "end-root"],
1072 events)
1073
1075 events = []
1076 class Target(object):
1077 def start(self, tag, attrib):
1078 events.append("start-" + tag)
1079 def end(self, tag):
1080 events.append("end-" + tag)
1081 def data(self, data):
1082 events.append("data-" + data)
1083 def comment(self, text):
1084 events.append("comment-" + text)
1085 def close(self):
1086 return "DONE"
1087
1088 parser = self.etree.XMLParser(target=Target())
1089
1090 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1091 done = parser.close()
1092
1093 self.assertEqual("DONE", done)
1094 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1095 "start-sub", "end-sub", "comment-c", "data-B",
1096 "end-root", "comment-d"],
1097 events)
1098
1100 events = []
1101 class Target(object):
1102 def start(self, tag, attrib):
1103 events.append("start-" + tag)
1104 def end(self, tag):
1105 events.append("end-" + tag)
1106 def data(self, data):
1107 events.append("data-" + data)
1108 def pi(self, target, data):
1109 events.append("pi-" + target + "-" + data)
1110 def close(self):
1111 return "DONE"
1112
1113 parser = self.etree.XMLParser(target=Target())
1114
1115 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1116 done = parser.close()
1117
1118 self.assertEqual("DONE", done)
1119 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1120 "data-B", "end-root", "pi-test-c"],
1121 events)
1122
1124 events = []
1125 class Target(object):
1126 def start(self, tag, attrib):
1127 events.append("start-" + tag)
1128 def end(self, tag):
1129 events.append("end-" + tag)
1130 def data(self, data):
1131 events.append("data-" + data)
1132 def close(self):
1133 return "DONE"
1134
1135 parser = self.etree.XMLParser(target=Target(),
1136 strip_cdata=False)
1137
1138 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1139 done = parser.close()
1140
1141 self.assertEqual("DONE", done)
1142 self.assertEqual(["start-root", "data-A", "start-a",
1143 "data-ca", "end-a", "data-B", "end-root"],
1144 events)
1145
1147 events = []
1148 class Target(object):
1149 def start(self, tag, attrib):
1150 events.append("start-" + tag)
1151 def end(self, tag):
1152 events.append("end-" + tag)
1153 def data(self, data):
1154 events.append("data-" + data)
1155 def close(self):
1156 events.append("close")
1157 return "DONE"
1158
1159 parser = self.etree.XMLParser(target=Target(),
1160 recover=True)
1161
1162 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1163 done = parser.close()
1164
1165 self.assertEqual("DONE", done)
1166 self.assertEqual(["start-root", "data-A", "start-a",
1167 "data-ca", "end-a", "data-B",
1168 "end-root", "close"],
1169 events)
1170
1172 iterwalk = self.etree.iterwalk
1173 root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1174
1175 iterator = iterwalk(root, tag="b", events=('start', 'end'))
1176 events = list(iterator)
1177 self.assertEqual(
1178 [('start', root[0]), ('end', root[0])],
1179 events)
1180
1182 iterwalk = self.etree.iterwalk
1183 root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1184
1185 iterator = iterwalk(root, tag="*", events=('start', 'end'))
1186 events = list(iterator)
1187 self.assertEqual(
1188 8,
1189 len(events))
1190
1192 iterwalk = self.etree.iterwalk
1193 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1194
1195 events = list(iterwalk(root))
1196 self.assertEqual(
1197 [('end', root[0]), ('end', root[1]), ('end', root)],
1198 events)
1199
1201 iterwalk = self.etree.iterwalk
1202 root = self.etree.XML(
1203 b'<!--C0--><a><!--Ca--><b><!--Cb--></b><!--Cc--><c/></a><!--C99-->')
1204
1205 iterator = iterwalk(root, events=('start', 'end', 'comment'))
1206 events = list(iterator)
1207 self.assertEqual(
1208 [('start', root), ('comment', root[0]),
1209 ('start', root[1]), ('comment', root[1][0]), ('end', root[1]),
1210 ('comment', root[2]), ('start', root[3]), ('end', root[3]),
1211 ('end', root),
1212 ],
1213 events)
1214
1216 iterwalk = self.etree.iterwalk
1217 root = self.etree.XML(
1218 b'<!--C0--><a><!--Ca--><b><!--Cb--></b><!--Cc--><c/></a><!--C99-->')
1219
1220 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'comment'))
1221 events = list(iterator)
1222 self.assertEqual(
1223 [('comment', root.getprevious()),
1224 ('start', root), ('comment', root[0]), # <a>
1225 ('start', root[1]), ('comment', root[1][0]), ('end', root[1]), # <b>
1226 ('comment', root[2]), ('start', root[3]), ('end', root[3]), # <c>
1227 ('end', root), ('comment', root.getnext()),
1228 ],
1229 events)
1230
1232 iterwalk = self.etree.iterwalk
1233 root = self.etree.XML(
1234 b'<?C0?><a><?Ca?><b><?Cb?></b><?Cc?><c/></a><?C99?>')
1235
1236 iterator = iterwalk(root, events=('start', 'end', 'pi'))
1237 events = list(iterator)
1238 self.assertEqual(
1239 [('start', root), ('pi', root[0]),
1240 ('start', root[1]), ('pi', root[1][0]), ('end', root[1]),
1241 ('pi', root[2]), ('start', root[3]), ('end', root[3]),
1242 ('end', root),
1243 ],
1244 events)
1245
1247 iterwalk = self.etree.iterwalk
1248 root = self.etree.XML(
1249 b'<?C0?><a><?Ca?><b><?Cb?></b><?Cc?><c/></a><?C99?>')
1250
1251 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'pi'))
1252 events = list(iterator)
1253 self.assertEqual(
1254 [('pi', root.getprevious()),
1255 ('start', root), ('pi', root[0]), # <a>
1256 ('start', root[1]), ('pi', root[1][0]), ('end', root[1]), # <b>
1257 ('pi', root[2]), ('start', root[3]), ('end', root[3]), # <c>
1258 ('end', root), ('pi', root.getnext()),
1259 ],
1260 events)
1261
1263 iterwalk = self.etree.iterwalk
1264 root = self.etree.XML(
1265 b'<!--C0--><?C0?><!--C1--><a><?Ca?><b><!--Cb--></b><?Cc?><c/></a><!--C99--><?C99?>')
1266
1267 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'pi', 'comment'))
1268 events = list(iterator)
1269 self.assertEqual(
1270 [('comment', root.getprevious().getprevious().getprevious()),
1271 ('pi', root.getprevious().getprevious()),
1272 ('comment', root.getprevious()),
1273 ('start', root), ('pi', root[0]), # <a>
1274 ('start', root[1]), ('comment', root[1][0]), ('end', root[1]), # <b>
1275 ('pi', root[2]), ('start', root[3]), ('end', root[3]), # <c>
1276 ('end', root), ('comment', root.getnext()), ('pi', root.getnext().getnext()),
1277 ],
1278 events)
1279
1281 iterwalk = self.etree.iterwalk
1282 root = self.etree.XML(
1283 b'<!--C0--><?C0?><!--C1--><a><?Ca?><b><!--Cb--></b><?Cc?><c/></a><!--C99--><?C99?>')
1284
1285 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end'))
1286 events = list(iterator)
1287 self.assertEqual(
1288 [('start', root), # <a>
1289 ('start', root[1]), ('end', root[1]), # <b>
1290 ('start', root[3]), ('end', root[3]), # <c>
1291 ('end', root),
1292 ],
1293 events)
1294
1296 iterwalk = self.etree.iterwalk
1297 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1298
1299 iterator = iterwalk(root, events=('start',))
1300 events = list(iterator)
1301 self.assertEqual(
1302 [('start', root), ('start', root[0]), ('start', root[1])],
1303 events)
1304
1306 iterwalk = self.etree.iterwalk
1307 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1308
1309 iterator = iterwalk(root, events=('start','end'))
1310 events = list(iterator)
1311 self.assertEqual(
1312 [('start', root), ('start', root[0]), ('end', root[0]),
1313 ('start', root[1]), ('end', root[1]), ('end', root)],
1314 events)
1315
1325
1335
1349
1351 iterwalk = self.etree.iterwalk
1352 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1353
1354 iterator = iterwalk(root)
1355 for event, elem in iterator:
1356 elem.clear()
1357
1358 self.assertEqual(0,
1359 len(root))
1360
1362 iterwalk = self.etree.iterwalk
1363 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1364
1365 attr_name = '{testns}bla'
1366 events = []
1367 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1368 for event, elem in iterator:
1369 events.append(event)
1370 if event == 'start':
1371 if elem.tag != '{ns1}a':
1372 elem.set(attr_name, 'value')
1373
1374 self.assertEqual(
1375 ['start-ns', 'start', 'start', 'start-ns', 'start',
1376 'end', 'end-ns', 'end', 'end', 'end-ns'],
1377 events)
1378
1379 self.assertEqual(
1380 None,
1381 root.get(attr_name))
1382 self.assertEqual(
1383 'value',
1384 root[0].get(attr_name))
1385
1387 iterwalk = self.etree.iterwalk
1388 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1389
1390 iterator = iterwalk(root)
1391 tags = []
1392 for event, elem in iterator:
1393 tags.append(elem.tag)
1394 # requesting a skip after an 'end' event should never have an effect
1395 iterator.skip_subtree()
1396
1397 self.assertEqual(['c', 'b', 'e', 'd', 'a'], tags)
1398
1400 iterwalk = self.etree.iterwalk
1401 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1402
1403 iterator = iterwalk(root, events=('start', 'end'))
1404 tags = []
1405 for event, elem in iterator:
1406 tags.append((event, elem.tag))
1407 if elem.tag in ('b', 'e'):
1408 # skipping should only have an effect on 'start', not on 'end'
1409 iterator.skip_subtree()
1410
1411 self.assertEqual(
1412 [('start', 'a'),
1413 ('start', 'b'), ('end', 'b'), # ignored child 'c'
1414 ('start', 'd'),
1415 ('start', 'e'), ('end', 'e'),
1416 ('end', 'd'),
1417 ('end', 'a')],
1418 tags)
1419
1421 iterwalk = self.etree.iterwalk
1422 root = self.etree.XML(_bytes(
1423 '<a xmlns="ns1"><b xmlns="nsb"><c xmlns="ns2"/></b><d xmlns="ns2"><e/></d></a>'))
1424
1425 events = []
1426 iterator = iterwalk(root, events=('start','start-ns','end-ns'))
1427 for event, elem in iterator:
1428 if event in ('start-ns', 'end-ns'):
1429 events.append((event, elem))
1430 if event == 'start-ns' and elem == ('', 'nsb'):
1431 events.append('skip')
1432 iterator.skip_subtree()
1433 else:
1434 events.append((event, elem.tag))
1435
1436 self.assertEqual(
1437 [('start-ns', ('', 'ns1')),
1438 ('start', '{ns1}a'),
1439 ('start-ns', ('', 'nsb')),
1440 'skip',
1441 ('start', '{nsb}b'),
1442 ('end-ns', None),
1443 ('start-ns', ('', 'ns2')),
1444 ('start', '{ns2}d'),
1445 ('start', '{ns2}e'),
1446 ('end-ns', None),
1447 ('end-ns', None)
1448 ],
1449 events)
1450
1452 iterwalk = self.etree.iterwalk
1453 root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1454
1455 counts = []
1456 for event, elem in iterwalk(root):
1457 counts.append(len(list(elem.getiterator())))
1458 self.assertEqual(
1459 [1,2,1,4],
1460 counts)
1461
1463 # https://bugs.launchpad.net/lxml/+bug/1844674
1464 XML = self.etree.XML
1465 root = XML(_bytes(
1466 "<root>RTEXT<a></a>ATAIL<b/><!-- COMMENT -->CTAIL<?PI PITEXT?> PITAIL </root>"
1467 ))
1468
1469 text = list(root.itertext())
1470 self.assertEqual(["RTEXT", "ATAIL", "CTAIL", " PITAIL "],
1471 text)
1472
1474 parse = self.etree.parse
1475 parser = self.etree.XMLParser(dtd_validation=True)
1476 assertEqual = self.assertEqual
1477 test_url = _str("__nosuch.dtd")
1478
1479 class MyResolver(self.etree.Resolver):
1480 def resolve(self, url, id, context):
1481 assertEqual(url, test_url)
1482 return self.resolve_string(
1483 _str('''<!ENTITY myentity "%s">
1484 <!ELEMENT doc ANY>''') % url, context)
1485
1486 parser.resolvers.add(MyResolver())
1487
1488 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1489 tree = parse(StringIO(xml), parser)
1490 root = tree.getroot()
1491 self.assertEqual(root.text, test_url)
1492
1494 parse = self.etree.parse
1495 parser = self.etree.XMLParser(dtd_validation=True)
1496 assertEqual = self.assertEqual
1497 test_url = _str("__nosuch.dtd")
1498
1499 class MyResolver(self.etree.Resolver):
1500 def resolve(self, url, id, context):
1501 assertEqual(url, test_url)
1502 return self.resolve_string(
1503 (_str('''<!ENTITY myentity "%s">
1504 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1505 context)
1506
1507 parser.resolvers.add(MyResolver())
1508
1509 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1510 tree = parse(StringIO(xml), parser)
1511 root = tree.getroot()
1512 self.assertEqual(root.text, test_url)
1513
1515 parse = self.etree.parse
1516 parser = self.etree.XMLParser(dtd_validation=True)
1517 assertEqual = self.assertEqual
1518 test_url = _str("__nosuch.dtd")
1519
1520 class MyResolver(self.etree.Resolver):
1521 def resolve(self, url, id, context):
1522 assertEqual(url, test_url)
1523 return self.resolve_file(
1524 SillyFileLike(
1525 _str('''<!ENTITY myentity "%s">
1526 <!ELEMENT doc ANY>''') % url), context)
1527
1528 parser.resolvers.add(MyResolver())
1529
1530 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1531 tree = parse(StringIO(xml), parser)
1532 root = tree.getroot()
1533 self.assertEqual(root.text, test_url)
1534
1536 parse = self.etree.parse
1537 parser = self.etree.XMLParser(attribute_defaults=True)
1538 assertEqual = self.assertEqual
1539 test_url = _str("__nosuch.dtd")
1540
1541 class MyResolver(self.etree.Resolver):
1542 def resolve(self, url, id, context):
1543 assertEqual(url, test_url)
1544 return self.resolve_filename(
1545 fileInTestDir('test.dtd'), context)
1546
1547 parser.resolvers.add(MyResolver())
1548
1549 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1550 tree = parse(StringIO(xml), parser)
1551 root = tree.getroot()
1552 self.assertEqual(
1553 root.attrib, {'default': 'valueA'})
1554 self.assertEqual(
1555 root[0].attrib, {'default': 'valueB'})
1556
1558 parse = self.etree.parse
1559 parser = self.etree.XMLParser(attribute_defaults=True)
1560 assertEqual = self.assertEqual
1561 test_url = _str("__nosuch.dtd")
1562
1563 class MyResolver(self.etree.Resolver):
1564 def resolve(self, url, id, context):
1565 expected = fileUrlInTestDir(test_url)
1566 url = url.replace('file://', 'file:') # depends on libxml2 version
1567 expected = expected.replace('file://', 'file:')
1568 assertEqual(url, expected)
1569 return self.resolve_filename(
1570 fileUrlInTestDir('test.dtd'), context)
1571
1572 parser.resolvers.add(MyResolver())
1573
1574 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1575 tree = parse(StringIO(xml), parser,
1576 base_url=fileUrlInTestDir('__test.xml'))
1577 root = tree.getroot()
1578 self.assertEqual(
1579 root.attrib, {'default': 'valueA'})
1580 self.assertEqual(
1581 root[0].attrib, {'default': 'valueB'})
1582
1584 parse = self.etree.parse
1585 parser = self.etree.XMLParser(attribute_defaults=True)
1586 assertEqual = self.assertEqual
1587 test_url = _str("__nosuch.dtd")
1588
1589 class MyResolver(self.etree.Resolver):
1590 def resolve(self, url, id, context):
1591 assertEqual(url, test_url)
1592 return self.resolve_file(
1593 open(fileInTestDir('test.dtd'), 'rb'), context)
1594
1595 parser.resolvers.add(MyResolver())
1596
1597 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1598 tree = parse(StringIO(xml), parser)
1599 root = tree.getroot()
1600 self.assertEqual(
1601 root.attrib, {'default': 'valueA'})
1602 self.assertEqual(
1603 root[0].attrib, {'default': 'valueB'})
1604
1606 parse = self.etree.parse
1607 parser = self.etree.XMLParser(load_dtd=True)
1608 assertEqual = self.assertEqual
1609 test_url = _str("__nosuch.dtd")
1610
1611 class check(object):
1612 resolved = False
1613
1614 class MyResolver(self.etree.Resolver):
1615 def resolve(self, url, id, context):
1616 assertEqual(url, test_url)
1617 check.resolved = True
1618 return self.resolve_empty(context)
1619
1620 parser.resolvers.add(MyResolver())
1621
1622 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1623 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1624 self.assertTrue(check.resolved)
1625
1627 parse = self.etree.parse
1628 parser = self.etree.XMLParser(dtd_validation=True)
1629
1630 class _LocalException(Exception):
1631 pass
1632
1633 class MyResolver(self.etree.Resolver):
1634 def resolve(self, url, id, context):
1635 raise _LocalException
1636
1637 parser.resolvers.add(MyResolver())
1638
1639 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1640 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1641
1643 parse = self.etree.parse
1644 tostring = self.etree.tostring
1645 parser = self.etree.XMLParser(resolve_entities=False)
1646 Entity = self.etree.Entity
1647
1648 xml = _bytes('<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>')
1649 tree = parse(BytesIO(xml), parser)
1650 root = tree.getroot()
1651 self.assertEqual(root[0].tag, Entity)
1652 self.assertEqual(root[0].text, "&myentity;")
1653 self.assertEqual(root[0].tail, None)
1654 self.assertEqual(root[0].name, "myentity")
1655
1656 self.assertEqual(_bytes('<doc>&myentity;</doc>'),
1657 tostring(root))
1658
1660 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1661 <root>
1662 <child1/>
1663 <child2/>
1664 <child3> </child3>
1665 </root>''')
1666
1667 parser = self.etree.XMLParser(resolve_entities=False)
1668 root = etree.fromstring(xml, parser)
1669 self.assertEqual([ el.tag for el in root ],
1670 ['child1', 'child2', 'child3'])
1671
1672 root[0] = root[-1]
1673 self.assertEqual([ el.tag for el in root ],
1674 ['child3', 'child2'])
1675 self.assertEqual(root[0][0].text, ' ')
1676 self.assertEqual(root[0][0].name, 'nbsp')
1677
1679 Entity = self.etree.Entity
1680 Element = self.etree.Element
1681 tostring = self.etree.tostring
1682
1683 root = Element("root")
1684 root.append( Entity("test") )
1685
1686 self.assertEqual(root[0].tag, Entity)
1687 self.assertEqual(root[0].text, "&test;")
1688 self.assertEqual(root[0].tail, None)
1689 self.assertEqual(root[0].name, "test")
1690
1691 self.assertEqual(_bytes('<root>&test;</root>'),
1692 tostring(root))
1693
1695 Entity = self.etree.Entity
1696 Element = self.etree.Element
1697 parser = self.etree.XMLParser(resolve_entities=False)
1698 entity = self.etree.XML('''<!DOCTYPE data [
1699 <!ENTITY a "a">
1700 <!ENTITY b "&a;">
1701 ]>
1702 <data>&b;</data>
1703 ''', parser)
1704
1705 el = Element('test')
1706 el.append(entity)
1707 self.assertEqual(el.tag, 'test')
1708 self.assertEqual(el[0].tag, 'data')
1709 self.assertEqual(el[0][0].tag, Entity)
1710 self.assertEqual(el[0][0].name, 'b')
1711
1713 Entity = self.etree.Entity
1714 self.assertEqual(Entity("test").text, '&test;')
1715 self.assertEqual(Entity("#17683").text, '䔓')
1716 self.assertEqual(Entity("#x1768").text, 'ᝨ')
1717 self.assertEqual(Entity("#x98AF").text, '颯')
1718
1720 Entity = self.etree.Entity
1721 self.assertRaises(ValueError, Entity, 'a b c')
1722 self.assertRaises(ValueError, Entity, 'a,b')
1723 self.assertRaises(ValueError, Entity, 'a\0b')
1724 self.assertRaises(ValueError, Entity, '#abc')
1725 self.assertRaises(ValueError, Entity, '#xxyz')
1726
1728 CDATA = self.etree.CDATA
1729 Element = self.etree.Element
1730 tostring = self.etree.tostring
1731
1732 root = Element("root")
1733 root.text = CDATA('test')
1734
1735 self.assertEqual('test',
1736 root.text)
1737 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1738 tostring(root))
1739
1741 CDATA = self.etree.CDATA
1742 Element = self.etree.Element
1743 SubElement = self.etree.SubElement
1744 tostring = self.etree.tostring
1745
1746 root = Element("root")
1747 child = SubElement(root, 'child')
1748 child.tail = CDATA('test')
1749
1750 self.assertEqual('test', child.tail)
1751 self.assertEqual(_bytes('<root><child/><![CDATA[test]]></root>'),
1752 tostring(root))
1753
1754 root = Element("root")
1755 root.tail = CDATA('test')
1756
1757 self.assertEqual('test', root.tail)
1758 self.assertEqual(_bytes('<root/><![CDATA[test]]>'),
1759 tostring(root))
1760
1762 CDATA = self.etree.CDATA
1763 Element = self.etree.Element
1764 root = Element("root")
1765
1766 root.text = CDATA("test")
1767 self.assertEqual('test', root.text)
1768
1769 root.text = CDATA(_str("test"))
1770 self.assertEqual('test', root.text)
1771
1772 self.assertRaises(TypeError, CDATA, 1)
1773
1775 CDATA = self.etree.CDATA
1776 Element = self.etree.Element
1777
1778 root = Element("root")
1779 cdata = CDATA('test')
1780
1781 self.assertRaises(TypeError,
1782 root.set, 'attr', cdata)
1783 self.assertRaises(TypeError,
1784 operator.setitem, root.attrib, 'attr', cdata)
1785
1787 tostring = self.etree.tostring
1788 parser = self.etree.XMLParser(strip_cdata=False)
1789 root = self.etree.XML(_bytes('<root><![CDATA[test]]></root>'), parser)
1790
1791 self.assertEqual('test', root.text)
1792 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1793 tostring(root))
1794
1796 tostring = self.etree.tostring
1797 parser = self.etree.XMLParser(strip_cdata=False)
1798 root = self.etree.XML(_bytes('<root><![CDATA[test]]></root>'), parser)
1799 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1800 tostring(root))
1801
1802 self.assertEqual(['test'], root.xpath('//text()'))
1803
1804 # TypeError in etree, AssertionError in ElementTree;
1806 Element = self.etree.Element
1807 SubElement = self.etree.SubElement
1808
1809 a = Element('a')
1810 b = SubElement(a, 'b')
1811
1812 self.assertRaises(TypeError,
1813 a.__setitem__, 0, 'foo')
1814
1816 Element = self.etree.Element
1817 root = Element('root')
1818 # raises AssertionError in ElementTree
1819 self.assertRaises(TypeError, root.append, None)
1820 self.assertRaises(TypeError, root.extend, [None])
1821 self.assertRaises(TypeError, root.extend, [Element('one'), None])
1822 self.assertEqual('one', root[0].tag)
1823
1825 Element = self.etree.Element
1826 SubElement = self.etree.SubElement
1827 root = Element('root')
1828 self.assertRaises(ValueError, root.append, root)
1829 child = SubElement(root, 'child')
1830 self.assertRaises(ValueError, child.append, root)
1831 child2 = SubElement(child, 'child2')
1832 self.assertRaises(ValueError, child2.append, root)
1833 self.assertRaises(ValueError, child2.append, child)
1834 self.assertEqual('child2', root[0][0].tag)
1835
1837 Element = self.etree.Element
1838 SubElement = self.etree.SubElement
1839 root = Element('root')
1840 SubElement(root, 'a')
1841 SubElement(root, 'b')
1842
1843 self.assertEqual(['a', 'b'],
1844 [c.tag for c in root])
1845 root[1].addnext(root[0])
1846 self.assertEqual(['b', 'a'],
1847 [c.tag for c in root])
1848
1850 Element = self.etree.Element
1851 SubElement = self.etree.SubElement
1852 root = Element('root')
1853 SubElement(root, 'a')
1854 SubElement(root, 'b')
1855
1856 self.assertEqual(['a', 'b'],
1857 [c.tag for c in root])
1858 root[0].addprevious(root[1])
1859 self.assertEqual(['b', 'a'],
1860 [c.tag for c in root])
1861
1863 Element = self.etree.Element
1864 SubElement = self.etree.SubElement
1865 root = Element('root')
1866 a = SubElement(root, 'a')
1867 b = SubElement(a, 'b')
1868 # appending parent as sibling is forbidden
1869 self.assertRaises(ValueError, b.addnext, a)
1870 self.assertEqual(['a'], [c.tag for c in root])
1871 self.assertEqual(['b'], [c.tag for c in a])
1872
1874 Element = self.etree.Element
1875 SubElement = self.etree.SubElement
1876 root = Element('root')
1877 a = SubElement(root, 'a')
1878 b = SubElement(a, 'b')
1879 # appending parent as sibling is forbidden
1880 self.assertRaises(ValueError, b.addprevious, a)
1881 self.assertEqual(['a'], [c.tag for c in root])
1882 self.assertEqual(['b'], [c.tag for c in a])
1883
1885 Element = self.etree.Element
1886 SubElement = self.etree.SubElement
1887 root = Element('root')
1888 a = SubElement(root, 'a')
1889 b = SubElement(a, 'b')
1890 c = SubElement(b, 'c')
1891 # appending parent as sibling is forbidden
1892 self.assertRaises(ValueError, c.addnext, a)
1893
1895 Element = self.etree.Element
1896 SubElement = self.etree.SubElement
1897 root = Element('root')
1898 a = SubElement(root, 'a')
1899 b = SubElement(a, 'b')
1900 c = SubElement(b, 'c')
1901 # appending parent as sibling is forbidden
1902 self.assertRaises(ValueError, c.addprevious, a)
1903
1905 Element = self.etree.Element
1906 SubElement = self.etree.SubElement
1907 root = Element('root')
1908 a = SubElement(root, 'a')
1909 b = SubElement(root, 'b')
1910 a.addprevious(a)
1911 self.assertEqual('a', root[0].tag)
1912 self.assertEqual('b', root[1].tag)
1913 b.addprevious(b)
1914 self.assertEqual('a', root[0].tag)
1915 self.assertEqual('b', root[1].tag)
1916 b.addprevious(a)
1917 self.assertEqual('a', root[0].tag)
1918 self.assertEqual('b', root[1].tag)
1919
1921 Element = self.etree.Element
1922 SubElement = self.etree.SubElement
1923 root = Element('root')
1924 a = SubElement(root, 'a')
1925 b = SubElement(root, 'b')
1926 a.addnext(a)
1927 self.assertEqual('a', root[0].tag)
1928 self.assertEqual('b', root[1].tag)
1929 b.addnext(b)
1930 self.assertEqual('a', root[0].tag)
1931 self.assertEqual('b', root[1].tag)
1932 a.addnext(b)
1933 self.assertEqual('a', root[0].tag)
1934 self.assertEqual('b', root[1].tag)
1935
1937 Element = self.etree.Element
1938 a = Element('a')
1939 b = Element('b')
1940 self.assertRaises(TypeError, a.addnext, b)
1941
1943 Element = self.etree.Element
1944 SubElement = self.etree.SubElement
1945 PI = self.etree.PI
1946 root = Element('root')
1947 SubElement(root, 'a')
1948 pi = PI('TARGET', 'TEXT')
1949 pi.tail = "TAIL"
1950
1951 self.assertEqual(_bytes('<root><a></a></root>'),
1952 self._writeElement(root))
1953 root[0].addprevious(pi)
1954 self.assertEqual(_bytes('<root><?TARGET TEXT?>TAIL<a></a></root>'),
1955 self._writeElement(root))
1956
1958 Element = self.etree.Element
1959 PI = self.etree.PI
1960 root = Element('root')
1961 pi = PI('TARGET', 'TEXT')
1962 pi.tail = "TAIL"
1963
1964 self.assertEqual(_bytes('<root></root>'),
1965 self._writeElement(root))
1966 root.addprevious(pi)
1967 self.assertEqual(_bytes('<?TARGET TEXT?>\n<root></root>'),
1968 self._writeElement(root))
1969
1971 Element = self.etree.Element
1972 SubElement = self.etree.SubElement
1973 PI = self.etree.PI
1974 root = Element('root')
1975 SubElement(root, 'a')
1976 pi = PI('TARGET', 'TEXT')
1977 pi.tail = "TAIL"
1978
1979 self.assertEqual(_bytes('<root><a></a></root>'),
1980 self._writeElement(root))
1981 root[0].addnext(pi)
1982 self.assertEqual(_bytes('<root><a></a><?TARGET TEXT?>TAIL</root>'),
1983 self._writeElement(root))
1984
1986 Element = self.etree.Element
1987 PI = self.etree.PI
1988 root = Element('root')
1989 pi = PI('TARGET', 'TEXT')
1990 pi.tail = "TAIL"
1991
1992 self.assertEqual(_bytes('<root></root>'),
1993 self._writeElement(root))
1994 root.addnext(pi)
1995 self.assertEqual(_bytes('<root></root>\n<?TARGET TEXT?>'),
1996 self._writeElement(root))
1997
1999 Element = self.etree.Element
2000 SubElement = self.etree.SubElement
2001 Comment = self.etree.Comment
2002 root = Element('root')
2003 SubElement(root, 'a')
2004 comment = Comment('TEXT ')
2005 comment.tail = "TAIL"
2006
2007 self.assertEqual(_bytes('<root><a></a></root>'),
2008 self._writeElement(root))
2009 root[0].addnext(comment)
2010 self.assertEqual(_bytes('<root><a></a><!--TEXT -->TAIL</root>'),
2011 self._writeElement(root))
2012
2014 Element = self.etree.Element
2015 Comment = self.etree.Comment
2016 root = Element('root')
2017 comment = Comment('TEXT ')
2018 comment.tail = "TAIL"
2019
2020 self.assertEqual(_bytes('<root></root>'),
2021 self._writeElement(root))
2022 root.addnext(comment)
2023 self.assertEqual(_bytes('<root></root>\n<!--TEXT -->'),
2024 self._writeElement(root))
2025
2027 Element = self.etree.Element
2028 SubElement = self.etree.SubElement
2029 Comment = self.etree.Comment
2030 root = Element('root')
2031 SubElement(root, 'a')
2032 comment = Comment('TEXT ')
2033 comment.tail = "TAIL"
2034
2035 self.assertEqual(_bytes('<root><a></a></root>'),
2036 self._writeElement(root))
2037 root[0].addprevious(comment)
2038 self.assertEqual(_bytes('<root><!--TEXT -->TAIL<a></a></root>'),
2039 self._writeElement(root))
2040
2042 Element = self.etree.Element
2043 Comment = self.etree.Comment
2044 root = Element('root')
2045 comment = Comment('TEXT ')
2046 comment.tail = "TAIL"
2047
2048 self.assertEqual(_bytes('<root></root>'),
2049 self._writeElement(root))
2050 root.addprevious(comment)
2051 self.assertEqual(_bytes('<!--TEXT -->\n<root></root>'),
2052 self._writeElement(root))
2053
2054 # ET's Elements have items() and key(), but not values()
2056 XML = self.etree.XML
2057
2058 root = XML(_bytes('<doc alpha="Alpha" beta="Beta" gamma="Gamma"/>'))
2059 values = root.values()
2060 values.sort()
2061 self.assertEqual(['Alpha', 'Beta', 'Gamma'], values)
2062
2063 # gives error in ElementTree
2065 Element = self.etree.Element
2066 Comment = self.etree.Comment
2067
2068 a = Element('a')
2069 a.append(Comment())
2070 self.assertEqual(
2071 _bytes('<a><!----></a>'),
2072 self._writeElement(a))
2073
2074 # ElementTree ignores comments
2076 ElementTree = self.etree.ElementTree
2077 tostring = self.etree.tostring
2078
2079 xml = _bytes('<a><b/><!----><c/></a>')
2080 f = BytesIO(xml)
2081 doc = ElementTree(file=f)
2082 a = doc.getroot()
2083 self.assertEqual(
2084 '',
2085 a[1].text)
2086 self.assertEqual(
2087 xml,
2088 tostring(a))
2089
2090 # ElementTree ignores comments
2092 ElementTree = self.etree.ElementTree
2093
2094 f = BytesIO('<a><b></b><!-- hoi --><c></c></a>')
2095 doc = ElementTree(file=f)
2096 a = doc.getroot()
2097 self.assertEqual(
2098 ' hoi ',
2099 a[1].text)
2100
2101 # does not raise an exception in ElementTree
2103 Element = self.etree.Element
2104 Comment = self.etree.Comment
2105
2106 c = Comment()
2107 el = Element('myel')
2108
2109 self.assertRaises(TypeError, c.append, el)
2110 self.assertRaises(TypeError, c.insert, 0, el)
2111 self.assertRaises(TypeError, c.set, "myattr", "test")
2112
2114 c = self.etree.Comment()
2115 self.assertEqual(0, len(c.attrib))
2116
2117 self.assertFalse(c.attrib.__contains__('nope'))
2118 self.assertFalse('nope' in c.attrib)
2119 self.assertFalse('nope' in c.attrib.keys())
2120 self.assertFalse('nope' in c.attrib.values())
2121 self.assertFalse(('nope', 'huhu') in c.attrib.items())
2122
2123 self.assertEqual([], list(c.attrib))
2124 self.assertEqual([], list(c.attrib.keys()))
2125 self.assertEqual([], list(c.attrib.items()))
2126 self.assertEqual([], list(c.attrib.values()))
2127 self.assertEqual([], list(c.attrib.iterkeys()))
2128 self.assertEqual([], list(c.attrib.iteritems()))
2129 self.assertEqual([], list(c.attrib.itervalues()))
2130
2131 self.assertEqual('HUHU', c.attrib.pop('nope', 'HUHU'))
2132 self.assertRaises(KeyError, c.attrib.pop, 'nope')
2133
2134 self.assertRaises(KeyError, c.attrib.__getitem__, 'only')
2135 self.assertRaises(KeyError, c.attrib.__getitem__, 'names')
2136 self.assertRaises(KeyError, c.attrib.__getitem__, 'nope')
2137 self.assertRaises(KeyError, c.attrib.__setitem__, 'nope', 'yep')
2138 self.assertRaises(KeyError, c.attrib.__delitem__, 'nope')
2139
2140 # test passing 'None' to dump()
2143
2145 ElementTree = self.etree.ElementTree
2146
2147 f = BytesIO('<a xmlns:foo="http://www.infrae.com/ns/1"><foo:b/></a>')
2148 doc = ElementTree(file=f)
2149 a = doc.getroot()
2150 self.assertEqual(
2151 None,
2152 a.prefix)
2153 self.assertEqual(
2154 'foo',
2155 a[0].prefix)
2156
2158 ElementTree = self.etree.ElementTree
2159
2160 f = BytesIO('<a xmlns="http://www.infrae.com/ns/1"><b/></a>')
2161 doc = ElementTree(file=f)
2162 a = doc.getroot()
2163 self.assertEqual(
2164 None,
2165 a.prefix)
2166 self.assertEqual(
2167 None,
2168 a[0].prefix)
2169
2171 Element = self.etree.Element
2172 SubElement = self.etree.SubElement
2173
2174 a = Element('a')
2175 b = SubElement(a, 'b')
2176 c = SubElement(a, 'c')
2177 d = SubElement(b, 'd')
2178 self.assertEqual(
2179 None,
2180 a.getparent())
2181 self.assertEqual(
2182 a,
2183 b.getparent())
2184 self.assertEqual(
2185 b.getparent(),
2186 c.getparent())
2187 self.assertEqual(
2188 b,
2189 d.getparent())
2190
2192 XML = self.etree.XML
2193
2194 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2195 result = []
2196 for el in root.iterchildren():
2197 result.append(el.tag)
2198 self.assertEqual(['one', 'two', 'three'], result)
2199
2201 XML = self.etree.XML
2202
2203 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2204 result = []
2205 for el in root.iterchildren(reversed=True):
2206 result.append(el.tag)
2207 self.assertEqual(['three', 'two', 'one'], result)
2208
2210 XML = self.etree.XML
2211
2212 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2213 result = []
2214 for el in root.iterchildren(tag='two'):
2215 result.append(el.text)
2216 self.assertEqual(['Two', 'Bla'], result)
2217
2219 XML = self.etree.XML
2220
2221 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2222 result = []
2223 for el in root.iterchildren('two'):
2224 result.append(el.text)
2225 self.assertEqual(['Two', 'Bla'], result)
2226
2228 XML = self.etree.XML
2229
2230 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2231 result = []
2232 for el in root.iterchildren(reversed=True, tag='two'):
2233 result.append(el.text)
2234 self.assertEqual(['Bla', 'Two'], result)
2235
2237 XML = self.etree.XML
2238
2239 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2240 result = []
2241 for el in root.iterchildren(tag=['two', 'three']):
2242 result.append(el.text)
2243 self.assertEqual(['Two', 'Bla', None], result)
2244
2246 XML = self.etree.XML
2247
2248 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2249 result = []
2250 for el in root.iterchildren('two', 'three'):
2251 result.append(el.text)
2252 self.assertEqual(['Two', 'Bla', None], result)
2253
2255 XML = self.etree.XML
2256
2257 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2258 result = []
2259 for el in root.iterchildren(reversed=True, tag=['two', 'three']):
2260 result.append(el.text)
2261 self.assertEqual([None, 'Bla', 'Two'], result)
2262
2264 Element = self.etree.Element
2265 SubElement = self.etree.SubElement
2266
2267 a = Element('a')
2268 b = SubElement(a, 'b')
2269 c = SubElement(a, 'c')
2270 d = SubElement(b, 'd')
2271 self.assertEqual(
2272 [],
2273 list(a.iterancestors()))
2274 self.assertEqual(
2275 [a],
2276 list(b.iterancestors()))
2277 self.assertEqual(
2278 [a],
2279 list(c.iterancestors()))
2280 self.assertEqual(
2281 [b, a],
2282 list(d.iterancestors()))
2283
2285 Element = self.etree.Element
2286 SubElement = self.etree.SubElement
2287
2288 a = Element('a')
2289 b = SubElement(a, 'b')
2290 c = SubElement(a, 'c')
2291 d = SubElement(b, 'd')
2292 self.assertEqual(
2293 [a],
2294 list(d.iterancestors('a')))
2295 self.assertEqual(
2296 [a],
2297 list(d.iterancestors(tag='a')))
2298
2299 self.assertEqual(
2300 [b, a],
2301 list(d.iterancestors('*')))
2302 self.assertEqual(
2303 [b, a],
2304 list(d.iterancestors(tag='*')))
2305
2307 Element = self.etree.Element
2308 SubElement = self.etree.SubElement
2309
2310 a = Element('a')
2311 b = SubElement(a, 'b')
2312 c = SubElement(a, 'c')
2313 d = SubElement(b, 'd')
2314 self.assertEqual(
2315 [b, a],
2316 list(d.iterancestors(tag=('a', 'b'))))
2317 self.assertEqual(
2318 [b, a],
2319 list(d.iterancestors('a', 'b')))
2320
2321 self.assertEqual(
2322 [],
2323 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2324 self.assertEqual(
2325 [],
2326 list(d.iterancestors('w', 'x', 'y', 'z')))
2327
2328 self.assertEqual(
2329 [],
2330 list(d.iterancestors(tag=('d', 'x'))))
2331 self.assertEqual(
2332 [],
2333 list(d.iterancestors('d', 'x')))
2334
2335 self.assertEqual(
2336 [b, a],
2337 list(d.iterancestors(tag=('b', '*'))))
2338 self.assertEqual(
2339 [b, a],
2340 list(d.iterancestors('b', '*')))
2341
2342 self.assertEqual(
2343 [b],
2344 list(d.iterancestors(tag=('b', 'c'))))
2345 self.assertEqual(
2346 [b],
2347 list(d.iterancestors('b', 'c')))
2348
2350 Element = self.etree.Element
2351 SubElement = self.etree.SubElement
2352
2353 a = Element('a')
2354 b = SubElement(a, 'b')
2355 c = SubElement(a, 'c')
2356 d = SubElement(b, 'd')
2357 e = SubElement(c, 'e')
2358
2359 self.assertEqual(
2360 [b, d, c, e],
2361 list(a.iterdescendants()))
2362 self.assertEqual(
2363 [],
2364 list(d.iterdescendants()))
2365
2367 Element = self.etree.Element
2368 SubElement = self.etree.SubElement
2369
2370 a = Element('a')
2371 b = SubElement(a, 'b')
2372 c = SubElement(a, 'c')
2373 d = SubElement(b, 'd')
2374 e = SubElement(c, 'e')
2375
2376 self.assertEqual(
2377 [],
2378 list(a.iterdescendants('a')))
2379 self.assertEqual(
2380 [],
2381 list(a.iterdescendants(tag='a')))
2382
2383 a2 = SubElement(e, 'a')
2384 self.assertEqual(
2385 [a2],
2386 list(a.iterdescendants('a')))
2387
2388 self.assertEqual(
2389 [a2],
2390 list(c.iterdescendants('a')))
2391 self.assertEqual(
2392 [a2],
2393 list(c.iterdescendants(tag='a')))
2394
2396 Element = self.etree.Element
2397 SubElement = self.etree.SubElement
2398
2399 a = Element('a')
2400 b = SubElement(a, 'b')
2401 c = SubElement(a, 'c')
2402 d = SubElement(b, 'd')
2403 e = SubElement(c, 'e')
2404
2405 self.assertEqual(
2406 [b, e],
2407 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2408 self.assertEqual(
2409 [b, e],
2410 list(a.iterdescendants('a', 'b', 'e')))
2411
2412 a2 = SubElement(e, 'a')
2413 self.assertEqual(
2414 [b, a2],
2415 list(a.iterdescendants(tag=('a', 'b'))))
2416 self.assertEqual(
2417 [b, a2],
2418 list(a.iterdescendants('a', 'b')))
2419
2420 self.assertEqual(
2421 [],
2422 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2423 self.assertEqual(
2424 [],
2425 list(c.iterdescendants('x', 'y', 'z')))
2426
2427 self.assertEqual(
2428 [b, d, c, e, a2],
2429 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2430 self.assertEqual(
2431 [b, d, c, e, a2],
2432 list(a.iterdescendants('x', 'y', 'z', '*')))
2433
2435 Element = self.etree.Element
2436 SubElement = self.etree.SubElement
2437
2438 a = Element('a')
2439 b = SubElement(a, 'b')
2440 c = SubElement(a, 'c')
2441 d = SubElement(b, 'd')
2442 self.assertEqual(
2443 a,
2444 a.getroottree().getroot())
2445 self.assertEqual(
2446 a,
2447 b.getroottree().getroot())
2448 self.assertEqual(
2449 a,
2450 d.getroottree().getroot())
2451
2453 Element = self.etree.Element
2454 SubElement = self.etree.SubElement
2455
2456 a = Element('a')
2457 b = SubElement(a, 'b')
2458 c = SubElement(a, 'c')
2459 self.assertEqual(
2460 None,
2461 a.getnext())
2462 self.assertEqual(
2463 c,
2464 b.getnext())
2465 self.assertEqual(
2466 None,
2467 c.getnext())
2468
2470 Element = self.etree.Element
2471 SubElement = self.etree.SubElement
2472
2473 a = Element('a')
2474 b = SubElement(a, 'b')
2475 c = SubElement(a, 'c')
2476 d = SubElement(b, 'd')
2477 self.assertEqual(
2478 None,
2479 a.getprevious())
2480 self.assertEqual(
2481 b,
2482 c.getprevious())
2483 self.assertEqual(
2484 None,
2485 b.getprevious())
2486
2488 Element = self.etree.Element
2489 SubElement = self.etree.SubElement
2490
2491 a = Element('a')
2492 b = SubElement(a, 'b')
2493 c = SubElement(a, 'c')
2494 d = SubElement(b, 'd')
2495 self.assertEqual(
2496 [],
2497 list(a.itersiblings()))
2498 self.assertEqual(
2499 [c],
2500 list(b.itersiblings()))
2501 self.assertEqual(
2502 [],
2503 list(c.itersiblings()))
2504 self.assertEqual(
2505 [b],
2506 list(c.itersiblings(preceding=True)))
2507 self.assertEqual(
2508 [],
2509 list(b.itersiblings(preceding=True)))
2510
2512 Element = self.etree.Element
2513 SubElement = self.etree.SubElement
2514
2515 a = Element('a')
2516 b = SubElement(a, 'b')
2517 c = SubElement(a, 'c')
2518 d = SubElement(b, 'd')
2519 self.assertEqual(
2520 [],
2521 list(a.itersiblings(tag='XXX')))
2522 self.assertEqual(
2523 [c],
2524 list(b.itersiblings(tag='c')))
2525 self.assertEqual(
2526 [c],
2527 list(b.itersiblings(tag='*')))
2528 self.assertEqual(
2529 [b],
2530 list(c.itersiblings(preceding=True, tag='b')))
2531 self.assertEqual(
2532 [],
2533 list(c.itersiblings(preceding=True, tag='c')))
2534
2536 Element = self.etree.Element
2537 SubElement = self.etree.SubElement
2538
2539 a = Element('a')
2540 b = SubElement(a, 'b')
2541 c = SubElement(a, 'c')
2542 d = SubElement(b, 'd')
2543 e = SubElement(a, 'e')
2544 self.assertEqual(
2545 [],
2546 list(a.itersiblings(tag=('XXX', 'YYY'))))
2547 self.assertEqual(
2548 [c, e],
2549 list(b.itersiblings(tag=('c', 'd', 'e'))))
2550 self.assertEqual(
2551 [b],
2552 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2553 self.assertEqual(
2554 [c, b],
2555 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2556
2558 parseid = self.etree.parseid
2559 XML = self.etree.XML
2560 xml_text = _bytes('''
2561 <!DOCTYPE document [
2562 <!ELEMENT document (h1,p)*>
2563 <!ELEMENT h1 (#PCDATA)>
2564 <!ATTLIST h1 myid ID #REQUIRED>
2565 <!ELEMENT p (#PCDATA)>
2566 <!ATTLIST p someid ID #REQUIRED>
2567 ]>
2568 <document>
2569 <h1 myid="chapter1">...</h1>
2570 <p id="note1" class="note">...</p>
2571 <p>Regular paragraph.</p>
2572 <p xml:id="xmlid">XML:ID paragraph.</p>
2573 <p someid="warn1" class="warning">...</p>
2574 </document>
2575 ''')
2576
2577 tree, dic = parseid(BytesIO(xml_text))
2578 root = tree.getroot()
2579 root2 = XML(xml_text)
2580 self.assertEqual(self._writeElement(root),
2581 self._writeElement(root2))
2582 expected = {
2583 "chapter1" : root[0],
2584 "xmlid" : root[3],
2585 "warn1" : root[4]
2586 }
2587 self.assertTrue("chapter1" in dic)
2588 self.assertTrue("warn1" in dic)
2589 self.assertTrue("xmlid" in dic)
2590 self._checkIDDict(dic, expected)
2591
2593 XMLDTDID = self.etree.XMLDTDID
2594 XML = self.etree.XML
2595 xml_text = _bytes('''
2596 <!DOCTYPE document [
2597 <!ELEMENT document (h1,p)*>
2598 <!ELEMENT h1 (#PCDATA)>
2599 <!ATTLIST h1 myid ID #REQUIRED>
2600 <!ELEMENT p (#PCDATA)>
2601 <!ATTLIST p someid ID #REQUIRED>
2602 ]>
2603 <document>
2604 <h1 myid="chapter1">...</h1>
2605 <p id="note1" class="note">...</p>
2606 <p>Regular paragraph.</p>
2607 <p xml:id="xmlid">XML:ID paragraph.</p>
2608 <p someid="warn1" class="warning">...</p>
2609 </document>
2610 ''')
2611
2612 root, dic = XMLDTDID(xml_text)
2613 root2 = XML(xml_text)
2614 self.assertEqual(self._writeElement(root),
2615 self._writeElement(root2))
2616 expected = {
2617 "chapter1" : root[0],
2618 "xmlid" : root[3],
2619 "warn1" : root[4]
2620 }
2621 self.assertTrue("chapter1" in dic)
2622 self.assertTrue("warn1" in dic)
2623 self.assertTrue("xmlid" in dic)
2624 self._checkIDDict(dic, expected)
2625
2627 XMLDTDID = self.etree.XMLDTDID
2628 XML = self.etree.XML
2629 xml_text = _bytes('''
2630 <document>
2631 <h1 myid="chapter1">...</h1>
2632 <p id="note1" class="note">...</p>
2633 <p>Regular paragraph.</p>
2634 <p someid="warn1" class="warning">...</p>
2635 </document>
2636 ''')
2637
2638 root, dic = XMLDTDID(xml_text)
2639 root2 = XML(xml_text)
2640 self.assertEqual(self._writeElement(root),
2641 self._writeElement(root2))
2642 expected = {}
2643 self._checkIDDict(dic, expected)
2644
2646 XMLDTDID = self.etree.XMLDTDID
2647 XML = self.etree.XML
2648 xml_text = _bytes('''
2649 <!DOCTYPE document [
2650 <!ELEMENT document (h1,p)*>
2651 <!ELEMENT h1 (#PCDATA)>
2652 <!ATTLIST h1 myid ID #REQUIRED>
2653 <!ELEMENT p (#PCDATA)>
2654 <!ATTLIST p someid ID #REQUIRED>
2655 ]>
2656 <document>
2657 <h1 myid="chapter1">...</h1>
2658 <p id="note1" class="note">...</p>
2659 <p>Regular paragraph.</p>
2660 <p xml:id="xmlid">XML:ID paragraph.</p>
2661 <p someid="warn1" class="warning">...</p>
2662 </document>
2663 ''')
2664
2665 parser = etree.XMLParser(collect_ids=False)
2666 root, dic = XMLDTDID(xml_text, parser=parser)
2667 root2 = XML(xml_text)
2668 self.assertEqual(self._writeElement(root),
2669 self._writeElement(root2))
2670 self.assertFalse(dic)
2671 self._checkIDDict(dic, {})
2672
2674 self.assertEqual(len(dic),
2675 len(expected))
2676 self.assertEqual(sorted(dic.items()),
2677 sorted(expected.items()))
2678 if sys.version_info < (3,):
2679 self.assertEqual(sorted(dic.iteritems()),
2680 sorted(expected.iteritems()))
2681 self.assertEqual(sorted(dic.keys()),
2682 sorted(expected.keys()))
2683 if sys.version_info < (3,):
2684 self.assertEqual(sorted(dic.iterkeys()),
2685 sorted(expected.iterkeys()))
2686 if sys.version_info < (3,):
2687 self.assertEqual(sorted(dic.values()),
2688 sorted(expected.values()))
2689 self.assertEqual(sorted(dic.itervalues()),
2690 sorted(expected.itervalues()))
2691
2693 self.assertRaises(ValueError, self.etree.register_namespace,
2694 "XML", "http://www.w3.org/XML/1998/namespace")
2695 self.assertRaises(ValueError, self.etree.register_namespace,
2696 "xml", "http://www.w3.org/XML/2345")
2697 self.etree.register_namespace("xml", "http://www.w3.org/XML/1998/namespace") # ok
2698
2700 etree = self.etree
2701
2702 r = {'foo': 'http://ns.infrae.com/foo'}
2703 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2704 self.assertEqual(
2705 'foo',
2706 e.prefix)
2707 self.assertEqual(
2708 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2709 self._writeElement(e))
2710
2712 etree = self.etree
2713
2714 r = {None: 'http://ns.infrae.com/foo'}
2715 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2716 self.assertEqual(
2717 None,
2718 e.prefix)
2719 self.assertEqual(
2720 '{http://ns.infrae.com/foo}bar',
2721 e.tag)
2722 self.assertEqual(
2723 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2724 self._writeElement(e))
2725
2727 etree = self.etree
2728
2729 r = {None: 'http://ns.infrae.com/foo', 'p': 'http://test/'}
2730 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2731 self.assertEqual(None, e.prefix)
2732 self.assertEqual('{http://ns.infrae.com/foo}bar', e.tag)
2733 self.assertEqual(
2734 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>'),
2735 self._writeElement(e))
2736
2738 etree = self.etree
2739
2740 r = {None: 'http://ns.infrae.com/foo',
2741 'hoi': 'http://ns.infrae.com/hoi'}
2742 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2743 e.set('{http://ns.infrae.com/hoi}test', 'value')
2744 self.assertEqual(
2745 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2746 self._writeElement(e))
2747
2749 etree = self.etree
2750
2751 root = etree.Element('{http://test/ns}root',
2752 nsmap={None: 'http://test/ns'})
2753 sub = etree.Element('{http://test/ns}sub',
2754 nsmap={'test': 'http://test/ns'})
2755
2756 sub.attrib['{http://test/ns}attr'] = 'value'
2757 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2758 self.assertEqual(
2759 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2760 etree.tostring(sub))
2761
2762 root.append(sub)
2763 self.assertEqual(
2764 _bytes('<root xmlns="http://test/ns">'
2765 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2766 '</root>'),
2767 etree.tostring(root))
2768
2770 etree = self.etree
2771
2772 root = etree.Element('root')
2773 sub = etree.Element('{http://test/ns}sub',
2774 nsmap={'test': 'http://test/ns'})
2775
2776 sub.attrib['{http://test/ns}attr'] = 'value'
2777 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2778 self.assertEqual(
2779 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2780 etree.tostring(sub))
2781
2782 root.append(sub)
2783 self.assertEqual(
2784 _bytes('<root>'
2785 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2786 '</root>'),
2787 etree.tostring(root))
2788
2790 etree = self.etree
2791
2792 root = etree.Element('root')
2793 sub = etree.Element('{http://test/ns}sub',
2794 nsmap={None: 'http://test/ns'})
2795
2796 sub.attrib['{http://test/ns}attr'] = 'value'
2797 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2798 self.assertEqual(
2799 _bytes('<sub xmlns="http://test/ns" '
2800 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2801 etree.tostring(sub))
2802
2803 root.append(sub)
2804 self.assertEqual(
2805 _bytes('<root>'
2806 '<sub xmlns="http://test/ns"'
2807 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2808 '</root>'),
2809 etree.tostring(root))
2810
2812 etree = self.etree
2813
2814 root = etree.Element('{http://test/ns}root',
2815 nsmap={'test': 'http://test/ns',
2816 None: 'http://test/ns'})
2817 sub = etree.Element('{http://test/ns}sub',
2818 nsmap={None: 'http://test/ns'})
2819
2820 sub.attrib['{http://test/ns}attr'] = 'value'
2821 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2822 self.assertEqual(
2823 _bytes('<sub xmlns="http://test/ns" '
2824 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2825 etree.tostring(sub))
2826
2827 root.append(sub)
2828 self.assertEqual(
2829 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2830 '<test:sub test:attr="value"/>'
2831 '</test:root>'),
2832 etree.tostring(root))
2833
2835 etree = self.etree
2836 r = {None: 'http://ns.infrae.com/foo',
2837 'hoi': 'http://ns.infrae.com/hoi'}
2838 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2839 tree = etree.ElementTree(element=e)
2840 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2841 self.assertEqual(
2842 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2843 self._writeElement(e))
2844
2846 etree = self.etree
2847
2848 r = {None: 'http://ns.infrae.com/foo'}
2849 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2850 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2851
2852 e1.append(e2)
2853
2854 self.assertEqual(
2855 None,
2856 e1.prefix)
2857 self.assertEqual(
2858 None,
2859 e1[0].prefix)
2860 self.assertEqual(
2861 '{http://ns.infrae.com/foo}bar',
2862 e1.tag)
2863 self.assertEqual(
2864 '{http://ns.infrae.com/foo}bar',
2865 e1[0].tag)
2866
2868 etree = self.etree
2869
2870 r = {None: 'http://ns.infrae.com/BAR'}
2871 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2872 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2873
2874 e1.append(e2)
2875
2876 self.assertEqual(
2877 None,
2878 e1.prefix)
2879 self.assertNotEqual(
2880 None,
2881 e2.prefix)
2882 self.assertEqual(
2883 '{http://ns.infrae.com/BAR}bar',
2884 e1.tag)
2885 self.assertEqual(
2886 '{http://ns.infrae.com/foo}bar',
2887 e2.tag)
2888
2890 ns_href = "http://a.b.c"
2891 one = self.etree.fromstring(
2892 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2893 baz = one[0][0]
2894
2895 two = self.etree.fromstring(
2896 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2897 two.append(baz)
2898 del one # make sure the source document is deallocated
2899
2900 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2901 self.assertEqual(
2902 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2903 self.etree.tostring(two))
2904
2906 xml = _bytes(
2907 '<foo xmlns="F" xmlns:x="x">'
2908 '<bar xmlns:ns="NS" xmlns:b="b" xmlns="B">'
2909 '<ns:baz/>'
2910 '</bar></foo>'
2911 )
2912 root = self.etree.fromstring(xml)
2913 self.assertEqual(xml, self.etree.tostring(root))
2914 self.etree.cleanup_namespaces(root)
2915 self.assertEqual(
2916 _bytes('<foo xmlns="F"><bar xmlns:ns="NS" xmlns="B"><ns:baz/></bar></foo>'),
2917 self.etree.tostring(root))
2918
2920 xml = _bytes(
2921 '<foo xmlns="F" xmlns:x="X" xmlns:a="A">'
2922 '<bar xmlns:ns="NS" xmlns:b="b" xmlns="B">'
2923 '<ns:baz a:test="attr"/>'
2924 '</bar></foo>'
2925 )
2926 root = self.etree.fromstring(xml)
2927 self.assertEqual(xml, self.etree.tostring(root))
2928 self.etree.cleanup_namespaces(root)
2929 self.assertEqual(
2930 _bytes('<foo xmlns="F" xmlns:a="A">'
2931 '<bar xmlns:ns="NS" xmlns="B">'
2932 '<ns:baz a:test="attr"/>'
2933 '</bar></foo>'),
2934 self.etree.tostring(root))
2935
2937 xml = ('<n12:foo ' +
2938 ' '.join('xmlns:n{n}="NS{n}"'.format(n=i) for i in range(100)) +
2939 '><n68:a/></n12:foo>').encode('utf8')
2940 root = self.etree.fromstring(xml)
2941 self.assertEqual(xml, self.etree.tostring(root))
2942 self.etree.cleanup_namespaces(root)
2943 self.assertEqual(
2944 b'<n12:foo xmlns:n12="NS12" xmlns:n68="NS68"><n68:a/></n12:foo>',
2945 self.etree.tostring(root))
2946
2948 xml = ('<root>' +
2949 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2950 '<n64:x/>' + '</a>'*100 + '</root>').encode('utf8')
2951 root = self.etree.fromstring(xml)
2952 self.assertEqual(xml, self.etree.tostring(root))
2953 self.etree.cleanup_namespaces(root)
2954 self.assertEqual(
2955 b'<root>' + b'<a>'*64 + b'<a xmlns:n64="NS64">' + b'<a>'*35 +
2956 b'<n64:x/>' + b'</a>'*100 + b'</root>',
2957 self.etree.tostring(root))
2958
2960 xml = ('<root>' +
2961 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2962 '<n64:x xmlns:a="A" a:attr="X"/>' +
2963 '</a>'*100 +
2964 '</root>').encode('utf8')
2965 root = self.etree.fromstring(xml)
2966 self.assertEqual(xml, self.etree.tostring(root))
2967 self.etree.cleanup_namespaces(root, top_nsmap={'n64': 'NS64'})
2968 self.assertEqual(
2969 b'<root xmlns:n64="NS64">' + b'<a>'*100 +
2970 b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>'*100 + b'</root>',
2971 self.etree.tostring(root))
2972
2974 xml = ('<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2975 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2976 '<foo>foo:bar</foo>'
2977 '</root>').encode('utf8')
2978 root = self.etree.fromstring(xml)
2979 self.assertEqual(xml, self.etree.tostring(root))
2980 self.etree.cleanup_namespaces(root, keep_ns_prefixes=['foo'])
2981 self.assertEqual(
2982 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2983 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2984 b'<foo>foo:bar</foo>'
2985 b'</root>',
2986 self.etree.tostring(root))
2987
2989 xml = ('<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2990 '<sub xmlns:foo="FOO">'
2991 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2992 '<foo>foo:bar</foo>'
2993 '</sub>'
2994 '</root>').encode('utf8')
2995 root = self.etree.fromstring(xml)
2996 self.assertEqual(xml, self.etree.tostring(root))
2997 self.etree.cleanup_namespaces(
2998 root,
2999 top_nsmap={'foo': 'FOO', 'unused1': 'UNUSED'},
3000 keep_ns_prefixes=['foo'])
3001 self.assertEqual(
3002 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
3003 b'<sub>'
3004 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
3005 b'<foo>foo:bar</foo>'
3006 b'</sub>'
3007 b'</root>',
3008 self.etree.tostring(root))
3009
3011 etree = self.etree
3012
3013 r = {None: 'http://ns.infrae.com/foo',
3014 'hoi': 'http://ns.infrae.com/hoi'}
3015 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
3016 self.assertEqual(
3017 r,
3018 e.nsmap)
3019
3021 etree = self.etree
3022
3023 re = {None: 'http://ns.infrae.com/foo',
3024 'hoi': 'http://ns.infrae.com/hoi'}
3025 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
3026
3027 rs = {None: 'http://ns.infrae.com/honk',
3028 'top': 'http://ns.infrae.com/top'}
3029 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
3030
3031 r = re.copy()
3032 r.update(rs)
3033 self.assertEqual(re, e.nsmap)
3034 self.assertEqual(r, s.nsmap)
3035
3037 etree = self.etree
3038 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
3039 self.assertEqual({'hha': None}, el.nsmap)
3040
3042 Element = self.etree.Element
3043 SubElement = self.etree.SubElement
3044
3045 a = Element('a')
3046 b = SubElement(a, 'b')
3047 c = SubElement(a, 'c')
3048 d = SubElement(b, 'd')
3049 e = SubElement(c, 'e')
3050 self.assertEqual(
3051 _bytes('<a><b><d></d></b><c><e></e></c></a>'),
3052 self.etree.tostring(a, method="c14n"))
3053 self.assertEqual(
3054 [b, c],
3055 a.getchildren())
3056 self.assertEqual(
3057 [d],
3058 b.getchildren())
3059 self.assertEqual(
3060 [],
3061 d.getchildren())
3062
3064 Element = self.etree.Element
3065 SubElement = self.etree.SubElement
3066
3067 a = Element('a')
3068 b = SubElement(a, 'b')
3069 c = SubElement(a, 'c')
3070 d = SubElement(b, 'd')
3071 e = SubElement(c, 'e')
3072
3073 self.assertEqual(
3074 [a, b, d, c, e],
3075 list(a.getiterator()))
3076 self.assertEqual(
3077 [d],
3078 list(d.getiterator()))
3079
3081 Element = self.etree.Element
3082 SubElement = self.etree.SubElement
3083
3084 a = Element('a')
3085 b = SubElement(a, 'b')
3086 c = SubElement(a, 'c')
3087 d = SubElement(b, 'd')
3088 e = SubElement(c, 'e')
3089
3090 self.assertEqual(
3091 [],
3092 list(a.getiterator('none')))
3093 self.assertEqual(
3094 [],
3095 list(e.getiterator('none')))
3096 self.assertEqual(
3097 [e],
3098 list(e.getiterator()))
3099
3101 Element = self.etree.Element
3102 SubElement = self.etree.SubElement
3103
3104 a = Element('a')
3105 b = SubElement(a, 'b')
3106 c = SubElement(a, 'c')
3107 d = SubElement(b, 'd')
3108 e = SubElement(c, 'e')
3109
3110 self.assertEqual(
3111 [a],
3112 list(a.getiterator('a')))
3113 a2 = SubElement(e, 'a')
3114 self.assertEqual(
3115 [a, a2],
3116 list(a.getiterator('a')))
3117 self.assertEqual(
3118 [a2],
3119 list(c.getiterator('a')))
3120
3122 Element = self.etree.Element
3123 SubElement = self.etree.SubElement
3124
3125 a = Element('a')
3126 b = SubElement(a, 'b')
3127 c = SubElement(a, 'c')
3128 d = SubElement(b, 'd')
3129 e = SubElement(c, 'e')
3130
3131 self.assertEqual(
3132 [a, b, d, c, e],
3133 list(a.getiterator('*')))
3134
3136 Element = self.etree.Element
3137 Comment = self.etree.Comment
3138 SubElement = self.etree.SubElement
3139
3140 a = Element('a')
3141 b = SubElement(a, 'b')
3142 comment_b = Comment("TEST-b")
3143 b.append(comment_b)
3144
3145 self.assertEqual(
3146 [comment_b],
3147 list(a.getiterator(Comment)))
3148
3149 comment_a = Comment("TEST-a")
3150 a.append(comment_a)
3151
3152 self.assertEqual(
3153 [comment_b, comment_a],
3154 list(a.getiterator(Comment)))
3155
3156 self.assertEqual(
3157 [comment_b],
3158 list(b.getiterator(Comment)))
3159
3161 Element = self.etree.Element
3162 PI = self.etree.ProcessingInstruction
3163 SubElement = self.etree.SubElement
3164
3165 a = Element('a')
3166 b = SubElement(a, 'b')
3167 pi_b = PI("TEST-b")
3168 b.append(pi_b)
3169
3170 self.assertEqual(
3171 [pi_b],
3172 list(a.getiterator(PI)))
3173
3174 pi_a = PI("TEST-a")
3175 a.append(pi_a)
3176
3177 self.assertEqual(
3178 [pi_b, pi_a],
3179 list(a.getiterator(PI)))
3180
3181 self.assertEqual(
3182 [pi_b],
3183 list(b.getiterator(PI)))
3184
3186 Element = self.etree.Element
3187 SubElement = self.etree.SubElement
3188
3189 a = Element('a')
3190 a.text = 'a'
3191 b = SubElement(a, 'b')
3192 b.text = 'b'
3193 b.tail = 'b1'
3194 c = SubElement(a, 'c')
3195 c.text = 'c'
3196 c.tail = 'c1'
3197 d = SubElement(b, 'd')
3198 d.text = 'd'
3199 d.tail = 'd1'
3200 e = SubElement(c, 'e')
3201 e.text = 'e'
3202 e.tail = 'e1'
3203
3204 self.assertEqual(
3205 [a, b, d, c, e],
3206 list(a.getiterator()))
3207 #self.assertEqual(
3208 # [d],
3209 # list(d.getiterator()))
3210
3212 Element = self.etree.Element
3213 SubElement = self.etree.SubElement
3214
3215 a = Element('a')
3216 a.text = 'a'
3217 b = SubElement(a, 'b')
3218 b.text = 'b'
3219 b.tail = 'b1'
3220 c = SubElement(a, 'c')
3221 c.text = 'c'
3222 c.tail = 'c1'
3223 d = SubElement(b, 'd')
3224 d.text = 'd'
3225 d.tail = 'd1'
3226 e = SubElement(c, 'e')
3227 e.text = 'e'
3228 e.tail = 'e1'
3229
3230 self.assertEqual(
3231 [a],
3232 list(a.getiterator('a')))
3233 a2 = SubElement(e, 'a')
3234 self.assertEqual(
3235 [a, a2],
3236 list(a.getiterator('a')))
3237 self.assertEqual(
3238 [a2],
3239 list(e.getiterator('a')))
3240
3242 Element = self.etree.Element
3243 SubElement = self.etree.SubElement
3244
3245 a = Element('a')
3246 b = SubElement(a, 'b')
3247 c = SubElement(a, 'c')
3248 d = SubElement(b, 'd')
3249 e = SubElement(c, 'e')
3250 f = SubElement(c, 'f')
3251
3252 self.assertEqual(
3253 [a, b],
3254 list(a.getiterator('a', 'b')))
3255 self.assertEqual(
3256 [],
3257 list(a.getiterator('x', 'y')))
3258 self.assertEqual(
3259 [a, f],
3260 list(a.getiterator('f', 'a')))
3261 self.assertEqual(
3262 [c, e, f],
3263 list(c.getiterator('c', '*', 'a')))
3264 self.assertEqual(
3265 [],
3266 list(a.getiterator( (), () )))
3267
3269 Element = self.etree.Element
3270 SubElement = self.etree.SubElement
3271
3272 a = Element('a')
3273 b = SubElement(a, 'b')
3274 c = SubElement(a, 'c')
3275 d = SubElement(b, 'd')
3276 e = SubElement(c, 'e')
3277 f = SubElement(c, 'f')
3278
3279 self.assertEqual(
3280 [a, b],
3281 list(a.getiterator( ('a', 'b') )))
3282 self.assertEqual(
3283 [],
3284 list(a.getiterator( ('x', 'y') )))
3285 self.assertEqual(
3286 [a, f],
3287 list(a.getiterator( ('f', 'a') )))
3288 self.assertEqual(
3289 [c, e, f],
3290 list(c.getiterator( ('c', '*', 'a') )))
3291 self.assertEqual(
3292 [],
3293 list(a.getiterator( () )))
3294
3296 Element = self.etree.Element
3297 SubElement = self.etree.SubElement
3298
3299 a = Element('{a}a')
3300 b = SubElement(a, '{a}b')
3301 c = SubElement(a, '{a}c')
3302 d = SubElement(b, '{b}d')
3303 e = SubElement(c, '{a}e')
3304 f = SubElement(c, '{b}f')
3305 g = SubElement(c, 'g')
3306
3307 self.assertEqual(
3308 [a],
3309 list(a.getiterator('{a}a')))
3310 self.assertEqual(
3311 [],
3312 list(a.getiterator('{b}a')))
3313 self.assertEqual(
3314 [],
3315 list(a.getiterator('a')))
3316 self.assertEqual(
3317 [a,b,d,c,e,f,g],
3318 list(a.getiterator('*')))
3319 self.assertEqual(
3320 [f],
3321 list(c.getiterator('{b}*')))
3322 self.assertEqual(
3323 [d, f],
3324 list(a.getiterator('{b}*')))
3325 self.assertEqual(
3326 [g],
3327 list(a.getiterator('g')))
3328 self.assertEqual(
3329 [g],
3330 list(a.getiterator('{}g')))
3331 self.assertEqual(
3332 [g],
3333 list(a.getiterator('{}*')))
3334
3336 Element = self.etree.Element
3337 Comment = self.etree.Comment
3338 SubElement = self.etree.SubElement
3339
3340 a = Element('{a}a')
3341 b = SubElement(a, '{nsA}b')
3342 c = SubElement(b, '{nsB}b')
3343 d = SubElement(a, 'b')
3344 e = SubElement(a, '{nsA}e')
3345 f = SubElement(e, '{nsB}e')
3346 g = SubElement(e, 'e')
3347 a.append(Comment('test'))
3348
3349 self.assertEqual(
3350 [b, c, d],
3351 list(a.getiterator('{*}b')))
3352 self.assertEqual(
3353 [e, f, g],
3354 list(a.getiterator('{*}e')))
3355 self.assertEqual(
3356 [a, b, c, d, e, f, g],
3357 list(a.getiterator('{*}*')))
3358
3360 Element = self.etree.Element
3361 Entity = self.etree.Entity
3362 SubElement = self.etree.SubElement
3363
3364 a = Element('a')
3365 b = SubElement(a, 'b')
3366 entity_b = Entity("TEST-b")
3367 b.append(entity_b)
3368
3369 self.assertEqual(
3370 [entity_b],
3371 list(a.getiterator(Entity)))
3372
3373 entity_a = Entity("TEST-a")
3374 a.append(entity_a)
3375
3376 self.assertEqual(
3377 [entity_b, entity_a],
3378 list(a.getiterator(Entity)))
3379
3380 self.assertEqual(
3381 [entity_b],
3382 list(b.getiterator(Entity)))
3383
3385 Element = self.etree.Element
3386 Comment = self.etree.Comment
3387 PI = self.etree.PI
3388 SubElement = self.etree.SubElement
3389
3390 a = Element('a')
3391 b = SubElement(a, 'b')
3392 a.append(Comment("test"))
3393 a.append(PI("pi", "content"))
3394 c = SubElement(a, 'c')
3395
3396 self.assertEqual(
3397 [a, b, c],
3398 list(a.getiterator(Element)))
3399
3401 # ElementTree iterates over everything here
3402 Element = self.etree.Element
3403 Comment = self.etree.Comment
3404 PI = self.etree.PI
3405 SubElement = self.etree.SubElement
3406
3407 a = Element('a')
3408 b = SubElement(a, 'b')
3409 a.append(Comment("test"))
3410 a.append(PI("pi", "content"))
3411 c = SubElement(a, 'c')
3412
3413 self.assertEqual(
3414 [a, b, c],
3415 list(a.getiterator('*')))
3416
3418 Element = self.etree.Element
3419 SubElement = self.etree.SubElement
3420 ElementTree = self.etree.ElementTree
3421
3422 a = Element('a')
3423 b = SubElement(a, 'b')
3424 c = SubElement(a, 'c')
3425 d = SubElement(b, 'd')
3426 e = SubElement(c, 'e')
3427 t = ElementTree(element=a)
3428
3429 self.assertEqual(
3430 [a, b, d, c, e],
3431 list(t.getiterator()))
3432
3434 Element = self.etree.Element
3435 SubElement = self.etree.SubElement
3436 ElementTree = self.etree.ElementTree
3437 a = Element('a')
3438 b = SubElement(a, 'b')
3439 c = SubElement(a, 'c')
3440 d = SubElement(b, 'd')
3441 e = SubElement(c, 'e')
3442 t = ElementTree(element=a)
3443
3444 self.assertEqual(
3445 [a],
3446 list(t.getiterator('a')))
3447 a2 = SubElement(e, 'a')
3448 self.assertEqual(
3449 [a, a2],
3450 list(t.getiterator('a')))
3451
3453 a = etree.Element("a")
3454 b = etree.SubElement(a, "b")
3455 c = etree.SubElement(a, "c")
3456 d1 = etree.SubElement(c, "d")
3457 d2 = etree.SubElement(c, "d")
3458 c.text = d1.text = 'TEXT'
3459
3460 tree = etree.ElementTree(a)
3461 self.assertEqual('.', tree.getelementpath(a))
3462 self.assertEqual('c/d[1]', tree.getelementpath(d1))
3463 self.assertEqual('c/d[2]', tree.getelementpath(d2))
3464
3465 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3466 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3467
3468 tree = etree.ElementTree(c)
3469 self.assertEqual('.', tree.getelementpath(c))
3470 self.assertEqual('d[2]', tree.getelementpath(d2))
3471 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3472
3473 tree = etree.ElementTree(b) # not a parent of a/c/d1/d2
3474 self.assertEqual('.', tree.getelementpath(b))
3475 self.assertRaises(ValueError, tree.getelementpath, a)
3476 self.assertRaises(ValueError, tree.getelementpath, c)
3477 self.assertRaises(ValueError, tree.getelementpath, d2)
3478
3480 a = etree.Element("{http://ns1/}a")
3481 b = etree.SubElement(a, "{http://ns1/}b")
3482 c = etree.SubElement(a, "{http://ns1/}c")
3483 d1 = etree.SubElement(c, "{http://ns1/}d")
3484 d2 = etree.SubElement(c, "{http://ns2/}d")
3485 d3 = etree.SubElement(c, "{http://ns1/}d")
3486
3487 tree = etree.ElementTree(a)
3488 self.assertEqual('.', tree.getelementpath(a))
3489 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
3490 tree.getelementpath(d1))
3491 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
3492 tree.getelementpath(d2))
3493 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
3494 tree.getelementpath(d3))
3495
3496 self.assertEqual(a, tree.find(tree.getelementpath(a)))
3497 self.assertEqual(b, tree.find(tree.getelementpath(b)))
3498 self.assertEqual(c, tree.find(tree.getelementpath(c)))
3499 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3500 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3501 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3502
3503 tree = etree.ElementTree(c)
3504 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
3505 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
3506 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
3507 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3508 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3509 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3510
3511 tree = etree.ElementTree(b) # not a parent of d1/d2
3512 self.assertRaises(ValueError, tree.getelementpath, d1)
3513 self.assertRaises(ValueError, tree.getelementpath, d2)
3514
3516 XML = self.etree.XML
3517 ElementTree = self.etree.ElementTree
3518 QName = self.etree.QName
3519 tree = ElementTree(XML(
3520 _bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>')))
3521 self.assertEqual(
3522 list(tree.iter(QName("b"))),
3523 list(tree.iter("b")),
3524 )
3525 self.assertEqual(
3526 list(tree.iter(QName("X", "b"))),
3527 list(tree.iter("{X}b")),
3528 )
3529
3530 self.assertEqual(
3531 [e.tag for e in tree.iter(QName("X", "b"), QName("b"))],
3532 ['{X}b', 'b', '{X}b', 'b', 'b']
3533 )
3534 self.assertEqual(
3535 list(tree.iter(QName("X", "b"), QName("b"))),
3536 list(tree.iter("{X}b", "b"))
3537 )
3538
3540 XML = self.etree.XML
3541 ElementTree = self.etree.ElementTree
3542 QName = self.etree.QName
3543 tree = ElementTree(XML(_bytes('<a><b><c/></b><b/><c><b/></c></a>')))
3544 self.assertEqual(tree.find(QName("c")), tree.getroot()[2])
3545
3547 XML = self.etree.XML
3548 ElementTree = self.etree.ElementTree
3549 QName = self.etree.QName
3550 tree = ElementTree(XML(_bytes('<a><b><c/></b><b/><c><b/></c></a>')))
3551 self.assertEqual(len(list(tree.findall(QName("c")))), 1)
3552
3554 XML = self.etree.XML
3555 ElementTree = self.etree.ElementTree
3556 QName = self.etree.QName
3557 tree = ElementTree(XML(
3558 _bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>')))
3559 self.assertEqual(len(list(tree.findall(QName("b")))), 2)
3560 self.assertEqual(len(list(tree.findall(QName("X", "b")))), 1)
3561
3563 XML = self.etree.XML
3564 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
3565 self.assertEqual(len(root.findall(".//{X}b")), 2)
3566 self.assertEqual(len(root.findall(".//{X}*")), 2)
3567 self.assertEqual(len(root.findall(".//b")), 3)
3568
3570 XML = self.etree.XML
3571 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3572 nsmap = {'xx': 'X'}
3573 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3574 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
3575 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3576 nsmap = {'xx': 'Y'}
3577 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3578 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
3579 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3580
3582 XML = self.etree.XML
3583 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3584 nsmap = {'xx': 'X'}
3585 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3586 nsmap = {'xx': 'X', None: 'Y'}
3587 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3588 nsmap = {'xx': 'X', '': 'Y'}
3589 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3590
3592 XML = self.etree.XML
3593 root = XML(_bytes('<a><b><c/></b><b/><c><b/><b/></c><b/></a>'))
3594 self.assertRaises(SyntaxError, root.findall, '')
3595 self.assertRaises(SyntaxError, root.findall, '//') # absolute path on Element
3596 self.assertRaises(SyntaxError, root.findall, './//')
3597
3599 etree = self.etree
3600 e = etree.Element('foo')
3601 for i in range(10):
3602 etree.SubElement(e, 'a%s' % i)
3603 for i in range(10):
3604 self.assertEqual(
3605 i,
3606 e.index(e[i]))
3607 self.assertEqual(
3608 3, e.index(e[3], 3))
3609 self.assertRaises(
3610 ValueError, e.index, e[3], 4)
3611 self.assertRaises(
3612 ValueError, e.index, e[3], 0, 2)
3613 self.assertRaises(
3614 ValueError, e.index, e[8], 0, -3)
3615 self.assertRaises(
3616 ValueError, e.index, e[8], -5, -3)
3617 self.assertEqual(
3618 8, e.index(e[8], 0, -1))
3619 self.assertEqual(
3620 8, e.index(e[8], -12, -1))
3621 self.assertEqual(
3622 0, e.index(e[0], -12, -1))
3623
3625 etree = self.etree
3626 e = etree.Element('foo')
3627 for i in range(10):
3628 el = etree.SubElement(e, 'a%s' % i)
3629 el.text = "text%d" % i
3630 el.tail = "tail%d" % i
3631
3632 child0 = e[0]
3633 child1 = e[1]
3634 child2 = e[2]
3635
3636 e.replace(e[0], e[1])
3637 self.assertEqual(
3638 9, len(e))
3639 self.assertEqual(
3640 child1, e[0])
3641 self.assertEqual(
3642 child1.text, "text1")
3643 self.assertEqual(
3644 child1.tail, "tail1")
3645 self.assertEqual(
3646 child0.tail, "tail0")
3647 self.assertEqual(
3648 child2, e[1])
3649
3650 e.replace(e[-1], e[0])
3651 self.assertEqual(
3652 child1, e[-1])
3653 self.assertEqual(
3654 child1.text, "text1")
3655 self.assertEqual(
3656 child1.tail, "tail1")
3657 self.assertEqual(
3658 child2, e[0])
3659
3661 etree = self.etree
3662 e = etree.Element('foo')
3663 for i in range(10):
3664 etree.SubElement(e, 'a%s' % i)
3665
3666 new_element = etree.Element("test")
3667 new_element.text = "TESTTEXT"
3668 new_element.tail = "TESTTAIL"
3669 child1 = e[1]
3670 e.replace(e[0], new_element)
3671 self.assertEqual(
3672 new_element, e[0])
3673 self.assertEqual(
3674 "TESTTEXT",
3675 e[0].text)
3676 self.assertEqual(
3677 "TESTTAIL",
3678 e[0].tail)
3679 self.assertEqual(
3680 child1, e[1])
3681
3683 Element = self.etree.Element
3684 SubElement = self.etree.SubElement
3685
3686 a = Element('a')
3687
3688 e = Element('e')
3689 f = Element('f')
3690 g = Element('g')
3691
3692 a[:] = [e, f, g]
3693 self.assertEqual(
3694 [e, f, g],
3695 list(a))
3696
3697 a[::-1] = [e, f, g]
3698 self.assertEqual(
3699 [g, f, e],
3700 list(a))
3701
3703 Element = self.etree.Element
3704 SubElement = self.etree.SubElement
3705
3706 a = Element('a')
3707 b = SubElement(a, 'b')
3708 c = SubElement(a, 'c')
3709 d = SubElement(a, 'd')
3710 e = SubElement(a, 'e')
3711
3712 x = Element('x')
3713 y = Element('y')
3714
3715 a[1::2] = [x, y]
3716 self.assertEqual(
3717 [b, x, d, y],
3718 list(a))
3719
3721 Element = self.etree.Element
3722 SubElement = self.etree.SubElement
3723
3724 a = Element('a')
3725 b = SubElement(a, 'b')
3726 c = SubElement(a, 'c')
3727 d = SubElement(a, 'd')
3728 e = SubElement(a, 'e')
3729
3730 x = Element('x')
3731 y = Element('y')
3732
3733 a[1::-1] = [x, y]
3734 self.assertEqual(
3735 [y, x, d, e],
3736 list(a))
3737
3739 Element = self.etree.Element
3740 SubElement = self.etree.SubElement
3741
3742 a = Element('a')
3743 b = SubElement(a, 'b')
3744 c = SubElement(a, 'c')
3745 d = SubElement(a, 'd')
3746 e = SubElement(a, 'e')
3747
3748 x = Element('x')
3749 y = Element('y')
3750
3751 a[::-2] = [x, y]
3752 self.assertEqual(
3753 [b, y, d, x],
3754 list(a))
3755
3757 Element = self.etree.Element
3758 SubElement = self.etree.SubElement
3759 try:
3760 slice
3761 except NameError:
3762 print("slice() not found")
3763 return
3764
3765 a = Element('a')
3766 b = SubElement(a, 'b')
3767 c = SubElement(a, 'c')
3768 d = SubElement(a, 'd')
3769 e = SubElement(a, 'e')
3770
3771 x = Element('x')
3772 y = Element('y')
3773 z = Element('z')
3774
3775 self.assertRaises(
3776 ValueError,
3777 operator.setitem, a, slice(1,None,2), [x, y, z])
3778
3779 self.assertEqual(
3780 [b, c, d, e],
3781 list(a))
3782
3784 XML = self.etree.XML
3785 root = XML(_bytes('''<?xml version="1.0"?>
3786 <root><test>
3787
3788 <bla/></test>
3789 </root>
3790 '''))
3791
3792 self.assertEqual(
3793 [2, 2, 4],
3794 [ el.sourceline for el in root.getiterator() ])
3795
3797 XML = self.etree.XML
3798 root = XML(_bytes(
3799 '<?xml version="1.0"?>\n'
3800 '<root>' + '\n' * 65536 +
3801 '<p>' + '\n' * 65536 + '</p>\n' +
3802 '<br/>\n'
3803 '</root>'))
3804
3805 if self.etree.LIBXML_VERSION >= (2, 9):
3806 expected = [2, 131074, 131076]
3807 else:
3808 expected = [2, 65535, 65535]
3809
3810 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3811
3813 parse = self.etree.parse
3814 tree = parse(fileInTestDir('include/test_xinclude.xml'))
3815
3816 self.assertEqual(
3817 [1, 2, 3],
3818 [ el.sourceline for el in tree.getiterator() ])
3819
3821 iterparse = self.etree.iterparse
3822 lines = [ el.sourceline for (event, el) in
3823 iterparse(fileInTestDir('include/test_xinclude.xml')) ]
3824
3825 self.assertEqual(
3826 [2, 3, 1],
3827 lines)
3828
3830 iterparse = self.etree.iterparse
3831 lines = [ el.sourceline for (event, el) in
3832 iterparse(fileInTestDir('include/test_xinclude.xml'),
3833 events=("start",)) ]
3834
3835 self.assertEqual(
3836 [1, 2, 3],
3837 lines)
3838
3840 Element = self.etree.Element
3841 SubElement = self.etree.SubElement
3842 el = Element("test")
3843 self.assertEqual(None, el.sourceline)
3844
3845 child = SubElement(el, "test")
3846 self.assertEqual(None, el.sourceline)
3847 self.assertEqual(None, child.sourceline)
3848
3850 etree = self.etree
3851 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3852 docinfo = root.getroottree().docinfo
3853 self.assertEqual(docinfo.URL, "http://no/such/url")
3854
3856 etree = self.etree
3857 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3858 docinfo = root.getroottree().docinfo
3859 self.assertEqual(docinfo.URL, "http://no/such/url")
3860 docinfo.URL = "https://secret/url"
3861 self.assertEqual(docinfo.URL, "https://secret/url")
3862
3864 etree = self.etree
3865 tree = etree.parse(BytesIO("<root/>"), base_url="http://no/such/url")
3866 docinfo = tree.docinfo
3867 self.assertEqual(docinfo.URL, "http://no/such/url")
3868
3870 etree = self.etree
3871 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3872 base_url="http://no/such/url")
3873 docinfo = tree.docinfo
3874 self.assertEqual(docinfo.URL, "http://no/such/url")
3875
3877 etree = self.etree
3878 root = etree.HTML(_bytes("<html/>"), base_url="http://no/such/url")
3879 docinfo = root.getroottree().docinfo
3880 self.assertEqual(docinfo.URL, "http://no/such/url")
3881
3883 etree = self.etree
3884 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3885 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3886 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3887 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3888
3889 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3890
3891 tree = etree.parse(BytesIO(xml))
3892 docinfo = tree.docinfo
3893 self.assertEqual(docinfo.encoding, "ascii")
3894 self.assertEqual(docinfo.xml_version, "1.0")
3895 self.assertEqual(docinfo.public_id, pub_id)
3896 self.assertEqual(docinfo.system_url, sys_id)
3897 self.assertEqual(docinfo.root_name, 'html')
3898 self.assertEqual(docinfo.doctype, doctype_string)
3899
3901 etree = self.etree
3902 xml_header = '<?xml version="1.0" encoding="UTF-8"?>'
3903 sys_id = "some.dtd"
3904 doctype_string = '<!DOCTYPE html SYSTEM "%s">' % sys_id
3905 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3906
3907 tree = etree.parse(BytesIO(xml))
3908 docinfo = tree.docinfo
3909 self.assertEqual(docinfo.encoding, "UTF-8")
3910 self.assertEqual(docinfo.xml_version, "1.0")
3911 self.assertEqual(docinfo.public_id, None)
3912 self.assertEqual(docinfo.system_url, sys_id)
3913 self.assertEqual(docinfo.root_name, 'html')
3914 self.assertEqual(docinfo.doctype, doctype_string)
3915
3917 etree = self.etree
3918 xml = _bytes('<html><body></body></html>')
3919 tree = etree.parse(BytesIO(xml))
3920 docinfo = tree.docinfo
3921 self.assertEqual(docinfo.encoding, "UTF-8")
3922 self.assertEqual(docinfo.xml_version, "1.0")
3923 self.assertEqual(docinfo.public_id, None)
3924 self.assertEqual(docinfo.system_url, None)
3925 self.assertEqual(docinfo.root_name, 'html')
3926 self.assertEqual(docinfo.doctype, '')
3927
3929 etree = self.etree
3930 xml = _bytes('<!DOCTYPE root><root></root>')
3931 tree = etree.parse(BytesIO(xml))
3932 docinfo = tree.docinfo
3933 self.assertEqual(docinfo.encoding, "UTF-8")
3934 self.assertEqual(docinfo.xml_version, "1.0")
3935 self.assertEqual(docinfo.public_id, None)
3936 self.assertEqual(docinfo.system_url, None)
3937 self.assertEqual(docinfo.root_name, 'root')
3938 self.assertEqual(docinfo.doctype, '<!DOCTYPE root>')
3939
3941 etree = self.etree
3942 xml = _bytes('<!DOCTYPE root>\n<root/>')
3943 tree = etree.parse(BytesIO(xml))
3944 self.assertEqual(xml, etree.tostring(tree))
3945
3947 etree = self.etree
3948 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3949 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3950 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3951
3952 xml = _bytes('<!DOCTYPE root>\n<root/>')
3953 tree = etree.parse(BytesIO(xml))
3954 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3955 etree.tostring(tree, doctype=doctype_string))
3956
3958 etree = self.etree
3959 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3960 self.assertEqual(root.base, "http://no/such/url")
3961 self.assertEqual(
3962 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3963 root.base = "https://secret/url"
3964 self.assertEqual(root.base, "https://secret/url")
3965 self.assertEqual(
3966 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3967 "https://secret/url")
3968
3970 etree = self.etree
3971 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3972 self.assertEqual(root.base, "http://no/such/url")
3973 self.assertEqual(
3974 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3975 root.set('{http://www.w3.org/XML/1998/namespace}base',
3976 "https://secret/url")
3977 self.assertEqual(root.base, "https://secret/url")
3978 self.assertEqual(
3979 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3980 "https://secret/url")
3981
3983 etree = self.etree
3984 root = etree.HTML(_bytes("<html><body></body></html>"),
3985 base_url="http://no/such/url")
3986 self.assertEqual(root.base, "http://no/such/url")
3987
3989 etree = self.etree
3990 root = etree.HTML(_bytes('<html><head><base href="http://no/such/url"></head></html>'))
3991 self.assertEqual(root.base, "http://no/such/url")
3992
3994 ET = self.etree
3995 elem = ET.XML("<root></root>")
3996 ET.indent(elem)
3997 self.assertEqual(ET.tostring(elem), b'<root/>')
3998
3999 elem = ET.XML("<html><body>text</body></html>")
4000 ET.indent(elem)
4001 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
4002
4003 elem = ET.XML("<html> <body>text</body> </html>")
4004 ET.indent(elem)
4005 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
4006
4007 elem = ET.XML("<html> <body>text</body> </html>")
4008 ET.indent(elem)
4009 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
4010
4011 elem = ET.XML("<html><body>text</body>tail</html>")
4012 ET.indent(elem)
4013 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')
4014
4015 elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
4016 ET.indent(elem)
4017 self.assertEqual(
4018 ET.tostring(elem),
4019 b'<html>\n'
4020 b' <body>\n'
4021 b' <p>par</p>\n'
4022 b' <p>text</p>\n'
4023 b' <p>\n'
4024 b' <br/>\n'
4025 b' </p>\n'
4026 b' </body>\n'
4027 b'</html>'
4028 )
4029
4030 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4031 ET.indent(elem)
4032 self.assertEqual(
4033 ET.tostring(elem),
4034 b'<html>\n'
4035 b' <body>\n'
4036 b' <p>pre<br/>post</p>\n'
4037 b' <p>text</p>\n'
4038 b' </body>\n'
4039 b'</html>'
4040 )
4041
4043 ET = self.etree
4044 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4045 ET.indent(elem, space='\t')
4046 self.assertEqual(
4047 ET.tostring(elem),
4048 b'<html>\n'
4049 b'\t<body>\n'
4050 b'\t\t<p>pre<br/>post</p>\n'
4051 b'\t\t<p>text</p>\n'
4052 b'\t</body>\n'
4053 b'</html>'
4054 )
4055
4056 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4057 ET.indent(elem, space='')
4058 self.assertEqual(
4059 ET.tostring(elem),
4060 b'<html>\n'
4061 b'<body>\n'
4062 b'<p>pre<br/>post</p>\n'
4063 b'<p>text</p>\n'
4064 b'</body>\n'
4065 b'</html>'
4066 )
4067
4069 ET = self.etree
4070 elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
4071 ET.indent(elem)
4072 self.assertEqual(
4073 {el.tail for el in elem.iter()},
4074 {None, "\n", "\n ", "\n "}
4075 )
4076 self.assertEqual(
4077 {el.text for el in elem.iter()},
4078 {None, "\n ", "\n ", "\n ", "par", "text"}
4079 )
4080 # NOTE: lxml does not reuse Python text strings across elements.
4081 #self.assertEqual(
4082 # len({el.tail for el in elem.iter()}),
4083 # len({id(el.tail) for el in elem.iter()}),
4084 #)
4085
4087 ET = self.etree
4088 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4089 try:
4090 ET.indent(elem, level=-1)
4091 except ValueError:
4092 pass
4093 else:
4094 self.assertTrue(False, "ValueError not raised")
4095 self.assertEqual(
4096 ET.tostring(elem),
4097 b"<html><body><p>pre<br/>post</p><p>text</p></body></html>"
4098 )
4099
4100 ET.indent(elem, level=2)
4101 self.assertEqual(
4102 ET.tostring(elem),
4103 b'<html>\n'
4104 b' <body>\n'
4105 b' <p>pre<br/>post</p>\n'
4106 b' <p>text</p>\n'
4107 b' </body>\n'
4108 b' </html>'
4109 )
4110
4111 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4112 ET.indent(elem, level=1, space=' ')
4113 self.assertEqual(
4114 ET.tostring(elem),
4115 b'<html>\n'
4116 b' <body>\n'
4117 b' <p>pre<br/>post</p>\n'
4118 b' <p>text</p>\n'
4119 b' </body>\n'
4120 b' </html>'
4121 )
4122
4124 # parse from a file object that returns unicode strings
4125 f = LargeFileLikeUnicode()
4126 tree = self.etree.parse(f)
4127 root = tree.getroot()
4128 self.assertTrue(root.tag.endswith('root'))
4129
4131 # check that DTDs that go in also go back out
4132 xml = _bytes('''\
4133 <!DOCTYPE test SYSTEM "test.dtd" [
4134 <!ENTITY entity "tasty">
4135 <!ELEMENT test (a)>
4136 <!ELEMENT a (#PCDATA)>
4137 ]>
4138 <test><a>test-test</a></test>\
4139 ''')
4140 tree = self.etree.parse(BytesIO(xml))
4141 self.assertEqual(self.etree.tostring(tree).replace(_bytes(" "), _bytes("")),
4142 xml.replace(_bytes(" "), _bytes("")))
4143
4145 Element = self.etree.Element
4146
4147 a = Element('a')
4148 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
4149 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
4150
4151 self.assertRaises(ValueError, Element, 'ha\0ho')
4152
4154 Element = self.etree.Element
4155
4156 a = Element('a')
4157 self.assertRaises(ValueError, setattr, a, "text",
4158 _str('ha\0ho'))
4159 self.assertRaises(ValueError, setattr, a, "tail",
4160 _str('ha\0ho'))
4161
4162 self.assertRaises(ValueError, Element,
4163 _str('ha\0ho'))
4164
4166 Element = self.etree.Element
4167
4168 a = Element('a')
4169 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
4170 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
4171
4172 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
4173 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
4174
4175 self.assertRaises(ValueError, Element, 'ha\x07ho')
4176 self.assertRaises(ValueError, Element, 'ha\x02ho')
4177
4179 Element = self.etree.Element
4180
4181 a = Element('a')
4182 self.assertRaises(ValueError, setattr, a, "text",
4183 _str('ha\x07ho'))
4184 self.assertRaises(ValueError, setattr, a, "text",
4185 _str('ha\x02ho'))
4186
4187 self.assertRaises(ValueError, setattr, a, "tail",
4188 _str('ha\x07ho'))
4189 self.assertRaises(ValueError, setattr, a, "tail",
4190 _str('ha\x02ho'))
4191
4192 self.assertRaises(ValueError, Element,
4193 _str('ha\x07ho'))
4194 self.assertRaises(ValueError, Element,
4195 _str('ha\x02ho'))
4196
4198 Element = self.etree.Element
4199
4200 a = Element('a')
4201 self.assertRaises(ValueError, setattr, a, "text",
4202 _str('ha\u1234\x07ho'))
4203 self.assertRaises(ValueError, setattr, a, "text",
4204 _str('ha\u1234\x02ho'))
4205
4206 self.assertRaises(ValueError, setattr, a, "tail",
4207 _str('ha\u1234\x07ho'))
4208 self.assertRaises(ValueError, setattr, a, "tail",
4209 _str('ha\u1234\x02ho'))
4210
4211 self.assertRaises(ValueError, Element,
4212 _str('ha\u1234\x07ho'))
4213 self.assertRaises(ValueError, Element,
4214 _str('ha\u1234\x02ho'))
4215
4217 # ElementTree fails to serialize this
4218 tostring = self.etree.tostring
4219 Element = self.etree.Element
4220 SubElement = self.etree.SubElement
4221
4222 a = Element('a')
4223 b = SubElement(a, 'b')
4224 c = SubElement(a, 'c')
4225
4226 result = tostring(a, encoding='UTF-16')
4227 self.assertEqual(_bytes('<a><b></b><c></c></a>'),
4228 canonicalize(result))
4229
4231 # ElementTree raises an AssertionError here
4232 tostring = self.etree.tostring
4233 self.assertRaises(TypeError, self.etree.tostring, None)
4234
4236 tostring = self.etree.tostring
4237 Element = self.etree.Element
4238 SubElement = self.etree.SubElement
4239
4240 a = Element('a')
4241 b = SubElement(a, 'b')
4242 c = SubElement(a, 'c')
4243
4244 result = tostring(a)
4245 self.assertEqual(result, _bytes("<a><b/><c/></a>"))
4246
4247 result = tostring(a, pretty_print=False)
4248 self.assertEqual(result, _bytes("<a><b/><c/></a>"))
4249
4250 result = tostring(a, pretty_print=True)
4251 self.assertEqual(result, _bytes("<a>\n <b/>\n <c/>\n</a>\n"))
4252
4254 tostring = self.etree.tostring
4255 Element = self.etree.Element
4256 SubElement = self.etree.SubElement
4257
4258 a = Element('a')
4259 a.tail = "aTAIL"
4260 b = SubElement(a, 'b')
4261 b.tail = "bTAIL"
4262 c = SubElement(a, 'c')
4263
4264 result = tostring(a)
4265 self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
4266
4267 result = tostring(a, with_tail=False)
4268 self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>"))
4269
4270 result = tostring(a, with_tail=True)
4271 self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
4272
4274 tostring = self.etree.tostring
4275 html = self.etree.fromstring(
4276 '<html><body>'
4277 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
4278 '</body></html>',
4279 parser=self.etree.HTMLParser())
4280 self.assertEqual(html.tag, 'html')
4281 div = html.find('.//div')
4282 self.assertEqual(div.tail, '\r\n')
4283 result = tostring(div, method='html')
4284 self.assertEqual(
4285 result,
4286 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
4287 result = tostring(div, method='html', with_tail=True)
4288 self.assertEqual(
4289 result,
4290 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
4291 result = tostring(div, method='html', with_tail=False)
4292 self.assertEqual(
4293 result,
4294 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
4295
4297 tostring = self.etree.tostring
4298 XML = self.etree.XML
4299 ElementTree = self.etree.ElementTree
4300 Element = self.etree.Element
4301
4302 tree = Element("root").getroottree()
4303 self.assertEqual(None, tree.docinfo.standalone)
4304
4305 tree = XML(_bytes("<root/>")).getroottree()
4306 self.assertEqual(None, tree.docinfo.standalone)
4307
4308 tree = XML(_bytes(
4309 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"
4310 )).getroottree()
4311 self.assertEqual(True, tree.docinfo.standalone)
4312
4313 tree = XML(_bytes(
4314 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"
4315 )).getroottree()
4316 self.assertEqual(False, tree.docinfo.standalone)
4317
4319 tostring = self.etree.tostring
4320 XML = self.etree.XML
4321 ElementTree = self.etree.ElementTree
4322
4323 root = XML(_bytes("<root/>"))
4324
4325 tree = ElementTree(root)
4326 self.assertEqual(None, tree.docinfo.standalone)
4327
4328 result = tostring(root, xml_declaration=True, encoding="ASCII")
4329 self.assertEqual(result, _bytes(
4330 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
4331
4332 result = tostring(root, xml_declaration=True, encoding="ASCII",
4333 standalone=True)
4334 self.assertEqual(result, _bytes(
4335 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
4336
4337 tree = ElementTree(XML(result))
4338 self.assertEqual(True, tree.docinfo.standalone)
4339
4340 result = tostring(root, xml_declaration=True, encoding="ASCII",
4341 standalone=False)
4342 self.assertEqual(result, _bytes(
4343 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
4344
4345 tree = ElementTree(XML(result))
4346 self.assertEqual(False, tree.docinfo.standalone)
4347
4349 tostring = self.etree.tostring
4350 XML = self.etree.XML
4351 ElementTree = self.etree.ElementTree
4352
4353 root = XML(_bytes(
4354 "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n<root/>"))
4355
4356 tree = ElementTree(root)
4357 self.assertEqual(True, tree.docinfo.standalone)
4358
4359 result = tostring(root, xml_declaration=True, encoding="ASCII")
4360 self.assertEqual(result, _bytes(
4361 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
4362
4363 result = tostring(root, xml_declaration=True, encoding="ASCII",
4364 standalone=True)
4365 self.assertEqual(result, _bytes(
4366 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
4367
4369 tostring = self.etree.tostring
4370 Element = self.etree.Element
4371 SubElement = self.etree.SubElement
4372
4373 a = Element('a')
4374 a.text = "A"
4375 a.tail = "tail"
4376 b = SubElement(a, 'b')
4377 b.text = "B"
4378 b.tail = _str("Søk på nettet")
4379 c = SubElement(a, 'c')
4380 c.text = "C"
4381
4382 result = tostring(a, method="text", encoding="UTF-16")
4383
4384 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
4385 result)
4386
4388 tostring = self.etree.tostring
4389 Element = self.etree.Element
4390 SubElement = self.etree.SubElement
4391
4392 a = Element('a')
4393 a.text = _str('Søk på nettetA')
4394 a.tail = "tail"
4395 b = SubElement(a, 'b')
4396 b.text = "B"
4397 b.tail = _str('Søk på nettetB')
4398 c = SubElement(a, 'c')
4399 c.text = "C"
4400
4401 self.assertRaises(UnicodeEncodeError,
4402 tostring, a, method="text")
4403
4404 self.assertEqual(
4405 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
4406 tostring(a, encoding="UTF-8", method="text"))
4407
4409 tounicode = self.etree.tounicode
4410 Element = self.etree.Element
4411 SubElement = self.etree.SubElement
4412
4413 a = Element('a')
4414 b = SubElement(a, 'b')
4415 c = SubElement(a, 'c')
4416
4417 self.assertTrue(isinstance(tounicode(a), _unicode))
4418 self.assertEqual(_bytes('<a><b></b><c></c></a>'),
4419 canonicalize(tounicode(a)))
4420
4422 tounicode = self.etree.tounicode
4423 Element = self.etree.Element
4424 SubElement = self.etree.SubElement
4425
4426 a = Element('a')
4427 b = SubElement(a, 'b')
4428 c = SubElement(a, 'c')
4429 d = SubElement(c, 'd')
4430 self.assertTrue(isinstance(tounicode(b), _unicode))
4431 self.assertTrue(isinstance(tounicode(c), _unicode))
4432 self.assertEqual(_bytes('<b></b>'),
4433 canonicalize(tounicode(b)))
4434 self.assertEqual(_bytes('<c><d></d></c>'),
4435 canonicalize(tounicode(c)))
4436
4440
4442 tounicode = self.etree.tounicode
4443 Element = self.etree.Element
4444 SubElement = self.etree.SubElement
4445
4446 a = Element('a')
4447 b = SubElement(a, 'b')
4448 c = SubElement(a, 'c')
4449 d = SubElement(c, 'd')
4450 b.tail = 'Foo'
4451
4452 self.assertTrue(isinstance(tounicode(b), _unicode))
4453 self.assertTrue(tounicode(b) == '<b/>Foo' or
4454 tounicode(b) == '<b />Foo')
4455
4457 tounicode = self.etree.tounicode
4458 Element = self.etree.Element
4459 SubElement = self.etree.SubElement
4460
4461 a = Element('a')
4462 b = SubElement(a, 'b')
4463 c = SubElement(a, 'c')
4464
4465 result = tounicode(a)
4466 self.assertEqual(result, "<a><b/><c/></a>")
4467
4468 result = tounicode(a, pretty_print=False)
4469 self.assertEqual(result, "<a><b/><c/></a>")
4470
4471 result = tounicode(a, pretty_print=True)
4472 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
4473
4475 tostring = self.etree.tostring
4476 Element = self.etree.Element
4477 SubElement = self.etree.SubElement
4478
4479 a = Element('a')
4480 b = SubElement(a, 'b')
4481 c = SubElement(a, 'c')
4482
4483 self.assertTrue(isinstance(tostring(a, encoding=_unicode), _unicode))
4484 self.assertEqual(_bytes('<a><b></b><c></c></a>'),
4485 canonicalize(tostring(a, encoding=_unicode)))
4486
4488 tostring = self.etree.tostring
4489 Element = self.etree.Element
4490 SubElement = self.etree.SubElement
4491
4492 a = Element('a')
4493 b = SubElement(a, 'b')
4494 c = SubElement(a, 'c')
4495 d = SubElement(c, 'd')
4496 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
4497 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
4498 self.assertEqual(_bytes('<b></b>'),
4499 canonicalize(tostring(b, encoding=_unicode)))
4500 self.assertEqual(_bytes('<c><d></d></c>'),
4501 canonicalize(tostring(c, encoding=_unicode)))
4502
4504 tostring = self.etree.tostring
4505 self.assertRaises(TypeError, self.etree.tostring,
4506 None, encoding=_unicode)
4507
4509 tostring = self.etree.tostring
4510 Element = self.etree.Element
4511 SubElement = self.etree.SubElement
4512
4513 a = Element('a')
4514 b = SubElement(a, 'b')
4515 c = SubElement(a, 'c')
4516 d = SubElement(c, 'd')
4517 b.tail = 'Foo'
4518
4519 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
4520 self.assertTrue(tostring(b, encoding=_unicode) == '<b/>Foo' or
4521 tostring(b, encoding=_unicode) == '<b />Foo')
4522
4524 tostring = self.etree.tostring
4525 Element = self.etree.Element
4526 SubElement = self.etree.SubElement
4527
4528 a = Element('a')
4529 b = SubElement(a, 'b')
4530 c = SubElement(a, 'c')
4531
4532 result = tostring(a, encoding=_unicode)
4533 self.assertEqual(result, "<a><b/><c/></a>")
4534
4535 result = tostring(a, encoding=_unicode, pretty_print=False)
4536 self.assertEqual(result, "<a><b/><c/></a>")
4537
4538 result = tostring(a, encoding=_unicode, pretty_print=True)
4539 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
4540
4542 root = etree.Element('parent')
4543 etree.SubElement(root, 'child')
4544
4545 self.assertEqual(len(root), 1)
4546 self.assertEqual(root[0].tag, 'child')
4547
4548 # in PyPy, GC used to kill the Python proxy instance without cleanup
4549 gc.collect()
4550 self.assertEqual(len(root), 1)
4551 self.assertEqual(root[0].tag, 'child')
4552
4556
4557 el1 = SubEl()
4558 el2 = SubEl()
4559 self.assertEqual('SubEl', el1.tag)
4560 self.assertEqual('SubEl', el2.tag)
4561 el1.other = el2
4562 el2.other = el1
4563
4564 del el1, el2
4565 gc.collect()
4566 # not really testing anything here, but it shouldn't crash
4567
4569 root = etree.Element('parent')
4570 c1 = etree.SubElement(root, 'child1')
4571 c2 = etree.SubElement(root, 'child2')
4572
4573 root.remove(c1)
4574 root.remove(c2)
4575 c1.addnext(c2)
4576 del c1
4577 # trigger deallocation attempt of c1
4578 c2.getprevious()
4579 # make sure it wasn't deallocated
4580 self.assertEqual('child1', c2.getprevious().tag)
4581
4583 root = etree.Element('parent')
4584 c1 = etree.SubElement(root, 'child1')
4585 c2 = etree.SubElement(root, 'child2')
4586
4587 root.remove(c1)
4588 root.remove(c2)
4589 c1.addnext(c2)
4590 c1.tail = 'abc'
4591 c2.tail = 'xyz'
4592 del c1
4593 # trigger deallocation attempt of c1
4594 c2.getprevious()
4595 # make sure it wasn't deallocated
4596 self.assertEqual('child1', c2.getprevious().tag)
4597 self.assertEqual('abc', c2.getprevious().tail)
4598
4599 # helper methods
4600
4602 """Write out element for comparison.
4603 """
4604 ElementTree = self.etree.ElementTree
4605 f = BytesIO()
4606 tree = ElementTree(element=element)
4607 tree.write(f, encoding=encoding, compression=compression)
4608 data = f.getvalue()
4609 if compression:
4610 data = zlib.decompress(data)
4611 return canonicalize(data)
4612
4613
4616 filename = fileInTestDir('test_broken.xml')
4617 root = etree.XML(_bytes('''\
4618 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4619 <xi:include href="%s" parse="text"/>
4620 </doc>
4621 ''' % path2url(filename)))
4622 old_text = root.text
4623 content = read_file(filename)
4624 old_tail = root[0].tail
4625
4626 self.include( etree.ElementTree(root) )
4627 self.assertEqual(old_text + content + old_tail,
4628 root.text)
4629
4631 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'))
4632 self.assertNotEqual(
4633 'a',
4634 tree.getroot()[1].tag)
4635 # process xincludes
4636 self.include( tree )
4637 # check whether we find it replaced with included data
4638 self.assertEqual(
4639 'a',
4640 tree.getroot()[1].tag)
4641
4643 class res(etree.Resolver):
4644 include_text = read_file(fileInTestDir('test.xml'))
4645 called = {}
4646 def resolve(self, url, id, context):
4647 if url.endswith(".dtd"):
4648 self.called["dtd"] = True
4649 return self.resolve_filename(
4650 fileInTestDir('test.dtd'), context)
4651 elif url.endswith("test_xinclude.xml"):
4652 self.called["input"] = True
4653 return None # delegate to default resolver
4654 else:
4655 self.called["include"] = True
4656 return self.resolve_string(self.include_text, context)
4657
4658 res_instance = res()
4659 parser = etree.XMLParser(load_dtd = True)
4660 parser.resolvers.add(res_instance)
4661
4662 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4663 parser = parser)
4664
4665 self.include(tree)
4666
4667 called = list(res_instance.called.items())
4668 called.sort()
4669 self.assertEqual(
4670 [("dtd", True), ("include", True), ("input", True)],
4671 called)
4672
4674 data = textwrap.dedent('''
4675 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4676 <foo/>
4677 <xi:include href="./test.xml" />
4678 </doc>
4679 ''')
4680
4681 class Resolver(etree.Resolver):
4682 called = {}
4683
4684 def resolve(self, url, id, context):
4685 if url.endswith("test_xinclude.xml"):
4686 assert not self.called.get("input")
4687 self.called["input"] = True
4688 return None # delegate to default resolver
4689 elif url.endswith('/test5.xml'):
4690 assert not self.called.get("DONE")
4691 self.called["DONE"] = True
4692 return self.resolve_string('<DONE/>', context)
4693 else:
4694 _, filename = url.rsplit('/', 1)
4695 assert not self.called.get(filename)
4696 self.called[filename] = True
4697 next_data = data.replace(
4698 'test.xml', 'test%d.xml' % len(self.called))
4699 return self.resolve_string(next_data, context)
4700
4701 res_instance = Resolver()
4702 parser = etree.XMLParser(load_dtd=True)
4703 parser.resolvers.add(res_instance)
4704
4705 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4706 parser=parser)
4707
4708 self.include(tree)
4709
4710 called = list(res_instance.called.items())
4711 called.sort()
4712 self.assertEqual(
4713 [("DONE", True), ("input", True), ("test.xml", True),
4714 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
4715 called)
4716
4717
4721
4722
4724 from lxml import ElementInclude
4725
4728
4729 XINCLUDE = {}
4730
4731 XINCLUDE["Recursive1.xml"] = """\
4732 <?xml version='1.0'?>
4733 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4734 <p>The following is the source code of Recursive2.xml:</p>
4735 <xi:include href="Recursive2.xml"/>
4736 </document>
4737 """
4738
4739 XINCLUDE["Recursive2.xml"] = """\
4740 <?xml version='1.0'?>
4741 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4742 <p>The following is the source code of Recursive3.xml:</p>
4743 <xi:include href="Recursive3.xml"/>
4744 </document>
4745 """
4746
4747 XINCLUDE["Recursive3.xml"] = """\
4748 <?xml version='1.0'?>
4749 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4750 <p>The following is the source code of Recursive1.xml:</p>
4751 <xi:include href="Recursive1.xml"/>
4752 </document>
4753 """
4754
4755 XINCLUDE["NonRecursive1.xml"] = """\
4756 <?xml version='1.0'?>
4757 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4758 <p>The following is multiple times the source code of NonRecursive3.xml:</p>
4759 <xi:include href="NonRecursive3.xml"/>
4760 <xi:include href="NonRecursive3.xml"/>
4761 <p>The following is multiple times the source code of Leaf.xml:</p>
4762 <xi:include href="Leaf.xml"/>
4763 <xi:include href="Leaf.xml"/>
4764 <xi:include href="Leaf.xml"/>
4765 <p>One more time the source code of NonRecursive3.xml:</p>
4766 <xi:include href="NonRecursive3.xml"/>
4767 </document>
4768 """
4769
4770 XINCLUDE["NonRecursive2.xml"] = """\
4771 <?xml version='1.0'?>
4772 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4773 <p>The following is multiple times the source code of NonRecursive3.xml:</p>
4774 <xi:include href="NonRecursive3.xml"/>
4775 <xi:include href="NonRecursive3.xml"/>
4776 </document>
4777 """
4778
4779 XINCLUDE["NonRecursive3.xml"] = """\
4780 <?xml version='1.0'?>
4781 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4782 <p>The following is multiple times the source code of Leaf.xml:</p>
4783 <xi:include href="Leaf.xml"/>
4784 <xi:include href="Leaf.xml"/>
4785 </document>
4786 """
4787
4788 XINCLUDE["Leaf.xml"] = """\
4789 <?xml version='1.0'?>
4790 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4791 <p>No further includes</p>
4792 </document>
4793 """
4794
4796 try:
4797 data = textwrap.dedent(self.XINCLUDE[href])
4798 except KeyError:
4799 raise OSError("resource not found")
4800 if parse == "xml":
4801 data = etree.fromstring(data)
4802 return data
4803
4805 # Test infinitely recursive includes.
4806 document = self.xinclude_loader("Recursive1.xml").getroottree()
4807 with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4808 self.include(document, self.xinclude_loader)
4809 self.assertEqual(str(cm.exception),
4810 "recursive include of 'Recursive2.xml' detected")
4811
4812 # Test 'max_depth' limitation.
4813 document = self.xinclude_loader("Recursive1.xml").getroottree()
4814 with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4815 self.include(document, self.xinclude_loader, max_depth=None)
4816 self.assertEqual(str(cm.exception),
4817 "recursive include of 'Recursive2.xml' detected")
4818
4819 document = self.xinclude_loader("Recursive1.xml").getroottree()
4820 with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4821 self.include(document, self.xinclude_loader, max_depth=0)
4822 self.assertEqual(str(cm.exception),
4823 "maximum xinclude depth reached when including file Recursive2.xml")
4824
4825 document = self.xinclude_loader("Recursive1.xml").getroottree()
4826 with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4827 self.include(document, self.xinclude_loader, max_depth=1)
4828 self.assertEqual(str(cm.exception),
4829 "maximum xinclude depth reached when including file Recursive3.xml")
4830
4831 document = self.xinclude_loader("Recursive1.xml").getroottree()
4832 with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4833 self.include(document, self.xinclude_loader, max_depth=2)
4834 self.assertEqual(str(cm.exception),
4835 "maximum xinclude depth reached when including file Recursive1.xml")
4836
4837 document = self.xinclude_loader("Recursive1.xml").getroottree()
4838 with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4839 self.include(document, self.xinclude_loader, max_depth=3)
4840 self.assertEqual(str(cm.exception),
4841 "recursive include of 'Recursive2.xml' detected")
4842
4844 # Test that including the same file multiple times, but on the same level
4845 # is not detected as recursive include
4846 document = self.xinclude_loader("NonRecursive3.xml").getroottree()
4847 self.include(document, self.xinclude_loader)
4848
4849 # same but for more than one level
4850 document = self.xinclude_loader("NonRecursive1.xml").getroottree()
4851 self.include(document, self.xinclude_loader)
4852
4853 # same but no Leaf.xml in top-level file
4854 document = self.xinclude_loader("NonRecursive2.xml").getroottree()
4855 self.include(document, self.xinclude_loader)
4856
4857
4860 tree = self.parse(_bytes('<a><b/></a>'))
4861 f = BytesIO()
4862 tree.write_c14n(f)
4863 s = f.getvalue()
4864 self.assertEqual(_bytes('<a><b></b></a>'),
4865 s)
4866
4868 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4869 f = BytesIO()
4870 tree.write_c14n(f, compression=9)
4871 with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
4872 s = gzfile.read()
4873 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4874 s)
4875
4877 tree = self.parse(_bytes('<a><b/></a>'))
4878 with tmpfile() as filename:
4879 tree.write_c14n(filename)
4880 data = read_file(filename, 'rb')
4881 self.assertEqual(_bytes('<a><b></b></a>'),
4882 data)
4883
4885 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4886 with tmpfile() as filename:
4887 tree.write_c14n(filename, compression=9)
4888 with gzip.open(filename, 'rb') as f:
4889 data = f.read()
4890 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4891 data)
4892
4894 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4895 with tmpfile() as filename:
4896 tree.write(filename, method='c14n2', compression=9)
4897 with gzip.open(filename, 'rb') as f:
4898 data = f.read()
4899 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4900 data)
4901
4903 tree = self.parse(
4904 b'<?xml version="1.0"?> <a> abc \n <b> btext </b> btail <c/> ctail </a> ')
4905 f = BytesIO()
4906 tree.write(f, method='c14n2')
4907 s = f.getvalue()
4908 self.assertEqual(b'<a> abc \n <b> btext </b> btail <c></c> ctail </a>',
4909 s)
4910
4911 f = BytesIO()
4912 tree.write(f, method='c14n2', strip_text=True)
4913 s = f.getvalue()
4914 self.assertEqual(b'<a>abc<b>btext</b>btail<c></c>ctail</a>',
4915 s)
4916
4918 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4919 f = BytesIO()
4920 tree.write_c14n(f)
4921 s = f.getvalue()
4922 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4923 s)
4924 f = BytesIO()
4925 tree.write_c14n(f, with_comments=True)
4926 s = f.getvalue()
4927 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4928 s)
4929 f = BytesIO()
4930 tree.write_c14n(f, with_comments=False)
4931 s = f.getvalue()
4932 self.assertEqual(_bytes('<a><b></b></a>'),
4933 s)
4934
4936 tree = self.parse(b'<!--hi--> <a> <!-- ho --> <b/> </a> <!-- hu -->')
4937 self.assertEqual(
4938 b'<!--hi-->\n<a> <!-- ho --> <b></b> </a>\n<!-- hu -->',
4939 etree.tostring(tree, method='c14n2'))
4940
4941 self.assertEqual(
4942 b'<!--hi-->\n<a> <!-- ho --> <b></b> </a>\n<!-- hu -->',
4943 etree.tostring(tree, method='c14n2', with_comments=True))
4944
4945 self.assertEqual(
4946 b'<a> <b></b> </a>',
4947 etree.tostring(tree, method='c14n2', with_comments=False))
4948
4950 tree = self.parse(b'<!--hi--> <a> <!-- ho --> <b/> </a> <!-- hu -->')
4951 self.assertEqual(
4952 b'<!--hi-->\n<a><!-- ho --><b></b></a>\n<!-- hu -->',
4953 etree.tostring(tree, method='c14n2', with_comments=True, strip_text=True))
4954 self.assertEqual(
4955 b'<a><b></b></a>',
4956 etree.tostring(tree, method='c14n2', with_comments=False, strip_text=True))
4957
4959 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4960 s = etree.tostring(tree, method='c14n')
4961 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4962 s)
4963 s = etree.tostring(tree, method='c14n', with_comments=True)
4964 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4965 s)
4966 s = etree.tostring(tree, method='c14n', with_comments=False)
4967 self.assertEqual(_bytes('<a><b></b></a>'),
4968 s)
4969
4971 tree = self.parse(b'<!--hi--><a><!--ho--><b/></a><!--hu-->')
4972 s = etree.tostring(tree, method='c14n2')
4973 self.assertEqual(b'<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->',
4974 s)
4975 s = etree.tostring(tree, method='c14n2', with_comments=True)
4976 self.assertEqual(b'<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->',
4977 s)
4978 s = etree.tostring(tree, method='c14n2', with_comments=False)
4979 self.assertEqual(b'<a><b></b></a>',
4980 s)
4981
4983 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4984 s = etree.tostring(tree.getroot(), method='c14n')
4985 self.assertEqual(_bytes('<a><!--ho--><b></b></a>'),
4986 s)
4987 s = etree.tostring(tree.getroot(), method='c14n', with_comments=True)
4988 self.assertEqual(_bytes('<a><!--ho--><b></b></a>'),
4989 s)
4990 s = etree.tostring(tree.getroot(), method='c14n', with_comments=False)
4991 self.assertEqual(_bytes('<a><b></b></a>'),
4992 s)
4993
4995 tree = self.parse(_bytes(
4996 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4997 f = BytesIO()
4998 tree.write_c14n(f)
4999 s = f.getvalue()
5000 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5001 s)
5002 f = BytesIO()
5003 tree.write_c14n(f, exclusive=False)
5004 s = f.getvalue()
5005 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5006 s)
5007 f = BytesIO()
5008 tree.write_c14n(f, exclusive=True)
5009 s = f.getvalue()
5010 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
5011 s)
5012
5013 f = BytesIO()
5014 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
5015 s = f.getvalue()
5016 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
5017 s)
5018
5020 tree = self.parse(_bytes(
5021 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
5022 s = etree.tostring(tree, method='c14n')
5023 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5024 s)
5025 s = etree.tostring(tree, method='c14n', exclusive=False)
5026 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5027 s)
5028 s = etree.tostring(tree, method='c14n', exclusive=True)
5029 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
5030 s)
5031
5032 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
5033 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
5034 s)
5035
5037 tree = self.parse(_bytes(
5038 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
5039 s = etree.tostring(tree.getroot(), method='c14n')
5040 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5041 s)
5042 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
5043 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5044 s)
5045 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
5046 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
5047 s)
5048
5049 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
5050 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
5051 s)
5052 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
5053 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
5054 s)
5055
5056 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
5057 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
5058 s)
5059
5061 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
5062 tree = self.parse(_bytes(
5063 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
5064
5065 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
5066 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5067 s)
5068
5069
5072 tree = self.parse(_bytes('<a><b/></a>'))
5073 f = BytesIO()
5074 tree.write(f)
5075 s = f.getvalue()
5076 self.assertEqual(_bytes('<a><b/></a>'),
5077 s)
5078
5080 tree = self.parse(_bytes('<a><b/></a>'))
5081 f = BytesIO()
5082 tree.write(f, doctype='HUHU')
5083 s = f.getvalue()
5084 self.assertEqual(_bytes('HUHU\n<a><b/></a>'),
5085 s)
5086
5088 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5089 f = BytesIO()
5090 tree.write(f, compression=9)
5091 with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
5092 s = gzfile.read()
5093 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5094 s)
5095
5097 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5098 f = BytesIO()
5099 tree.write(f, compression=9, doctype='<!DOCTYPE a>')
5100 with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
5101 s = gzfile.read()
5102 self.assertEqual(_bytes('<!DOCTYPE a>\n<a>'+'<b/>'*200+'</a>'),
5103 s)
5104
5106 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5107 f = BytesIO()
5108 tree.write(f, compression=0)
5109 s0 = f.getvalue()
5110
5111 f = BytesIO()
5112 tree.write(f)
5113 self.assertEqual(f.getvalue(), s0)
5114
5115 f = BytesIO()
5116 tree.write(f, compression=1)
5117 s = f.getvalue()
5118 self.assertTrue(len(s) <= len(s0))
5119 with gzip.GzipFile(fileobj=BytesIO(s)) as gzfile:
5120 s1 = gzfile.read()
5121
5122 f = BytesIO()
5123 tree.write(f, compression=9)
5124 s = f.getvalue()
5125 self.assertTrue(len(s) <= len(s0))
5126 with gzip.GzipFile(fileobj=BytesIO(s)) as gzfile:
5127 s9 = gzfile.read()
5128
5129 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5130 s0)
5131 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5132 s1)
5133 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5134 s9)
5135
5137 tree = self.parse(_bytes('<a><b/></a>'))
5138 with tmpfile() as filename:
5139 tree.write(filename)
5140 data = read_file(filename, 'rb')
5141 self.assertEqual(_bytes('<a><b/></a>'),
5142 data)
5143
5145 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5146 with tmpfile() as filename:
5147 tree.write(filename, compression=9)
5148 with gzip.open(filename, 'rb') as f:
5149 data = f.read()
5150 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5151 data)
5152
5154 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5155 with tmpfile() as filename:
5156 tree.write(filename, compression=9)
5157 data = etree.tostring(etree.parse(filename))
5158 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5159 data)
5160
5162 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5163 with tmpfile() as filename:
5164 tree.write(filename, compression=9)
5165 with gzip.GzipFile(filename) as f:
5166 data = etree.tostring(etree.parse(f))
5167 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5168 data)
5169
5171 xml = _bytes('<a>'+'<b/>'*200+'</a>')
5172 tree = self.parse(xml)
5173 with tmpfile(prefix="p+%20", suffix=".xml") as filename:
5174 url = 'file://' + (filename if sys.platform != 'win32'
5175 else '/' + filename.replace('\\', '/'))
5176 tree.write(url)
5177 data = read_file(filename, 'rb').replace(_bytes('\n'), _bytes(''))
5178 self.assertEqual(data, xml)
5179
5180
5182 etree = etree
5183
5185 parse = self.etree.parse
5186 f = BytesIO('<a><b></c></b></a>')
5187 self.etree.clear_error_log()
5188 try:
5189 parse(f)
5190 logs = None
5191 except SyntaxError:
5192 e = sys.exc_info()[1]
5193 logs = e.error_log
5194 f.close()
5195 self.assertTrue([ log for log in logs
5196 if 'mismatch' in log.message ])
5197 self.assertTrue([ log for log in logs
5198 if 'PARSER' in log.domain_name])
5199 self.assertTrue([ log for log in logs
5200 if 'ERR_TAG_NAME_MISMATCH' in log.type_name ])
5201 self.assertTrue([ log for log in logs
5202 if 1 == log.line ])
5203 self.assertTrue([ log for log in logs
5204 if 15 == log.column ])
5205
5216
5217 self.etree.use_global_python_log(Logger())
5218 f = BytesIO('<a><b></c></b></a>')
5219 try:
5220 parse(f)
5221 except SyntaxError:
5222 pass
5223 f.close()
5224
5225 self.assertTrue([ message for message in messages
5226 if 'mismatch' in message ])
5227 self.assertTrue([ message for message in messages
5228 if ':PARSER:' in message])
5229 self.assertTrue([ message for message in messages
5230 if ':ERR_TAG_NAME_MISMATCH:' in message ])
5231 self.assertTrue([ message for message in messages
5232 if ':1:15:' in message ])
5233
5234
5248 def close(self):
5249 return 'close()'
5250
5251 parser = self.etree.XMLPullParser(target=Target())
5252 events = parser.read_events()
5253
5254 parser.feed('<root><element>')
5255 self.assertFalse(list(events))
5256 self.assertFalse(list(events))
5257 parser.feed('</element><child>')
5258 self.assertEqual([('end', 'end(element)')], list(events))
5259 parser.feed('</child>')
5260 self.assertEqual([('end', 'end(child)')], list(events))
5261 parser.feed('</root>')
5262 self.assertEqual([('end', 'end(root)')], list(events))
5263 self.assertFalse(list(events))
5264 self.assertEqual('close()', parser.close())
5265
5270 def end(self, tag):
5271 return 'end(%s)' % tag
5272 def close(self):
5273 return 'close()'
5274
5275 parser = self.etree.XMLPullParser(
5276 ['start', 'end'], target=Target())
5277 events = parser.read_events()
5278
5279 parser.feed('<root><element>')
5280 self.assertEqual(
5281 [('start', 'start(root)'), ('start', 'start(element)')],
5282 list(events))
5283 self.assertFalse(list(events))
5284 parser.feed('</element><child>')
5285 self.assertEqual(
5286 [('end', 'end(element)'), ('start', 'start(child)')],
5287 list(events))
5288 parser.feed('</child>')
5289 self.assertEqual(
5290 [('end', 'end(child)')],
5291 list(events))
5292 parser.feed('</root>')
5293 self.assertEqual(
5294 [('end', 'end(root)')],
5295 list(events))
5296 self.assertFalse(list(events))
5297 self.assertEqual('close()', parser.close())
5298
5300 parser = self.etree.XMLPullParser(
5301 ['start', 'end'], target=etree.TreeBuilder())
5302 events = parser.read_events()
5303
5304 parser.feed('<root><element>')
5305 self.assert_event_tags(
5306 events, [('start', 'root'), ('start', 'element')])
5307 self.assertFalse(list(events))
5308 parser.feed('</element><child>')
5309 self.assert_event_tags(
5310 events, [('end', 'element'), ('start', 'child')])
5311 parser.feed('</child>')
5312 self.assert_event_tags(
5313 events, [('end', 'child')])
5314 parser.feed('</root>')
5315 self.assert_event_tags(
5316 events, [('end', 'root')])
5317 self.assertFalse(list(events))
5318 root = parser.close()
5319 self.assertEqual('root', root.tag)
5320
5322 class Target(etree.TreeBuilder):
5323 def end(self, tag):
5324 el = super(Target, self).end(tag)
5325 el.tag += '-huhu'
5326 return el
5327
5328 parser = self.etree.XMLPullParser(
5329 ['start', 'end'], target=Target())
5330 events = parser.read_events()
5331
5332 parser.feed('<root><element>')
5333 self.assert_event_tags(
5334 events, [('start', 'root'), ('start', 'element')])
5335 self.assertFalse(list(events))
5336 parser.feed('</element><child>')
5337 self.assert_event_tags(
5338 events, [('end', 'element-huhu'), ('start', 'child')])
5339 parser.feed('</child>')
5340 self.assert_event_tags(
5341 events, [('end', 'child-huhu')])
5342 parser.feed('</root>')
5343 self.assert_event_tags(
5344 events, [('end', 'root-huhu')])
5345 self.assertFalse(list(events))
5346 root = parser.close()
5347 self.assertEqual('root-huhu', root.tag)
5348
5349
5351 suite = unittest.TestSuite()
5352 suite.addTests([unittest.makeSuite(ETreeOnlyTestCase)])
5353 suite.addTests([unittest.makeSuite(ETreeXIncludeTestCase)])
5354 suite.addTests([unittest.makeSuite(ElementIncludeTestCase)])
5355 suite.addTests([unittest.makeSuite(ETreeC14NTestCase)])
5356 suite.addTests([unittest.makeSuite(ETreeWriteTestCase)])
5357 suite.addTests([unittest.makeSuite(ETreeErrorLogTest)])
5358 suite.addTests([unittest.makeSuite(XMLPullParserTest)])
5359
5360 # add original doctests from ElementTree selftest modules
5361 from . import selftest, selftest2
5362 suite.addTests(doctest.DocTestSuite(selftest))
5363 suite.addTests(doctest.DocTestSuite(selftest2))
5364
5365 # add doctests
5366 suite.addTests(doctest.DocTestSuite(etree))
5367 suite.addTests(
5368 [make_doctest('../../../doc/tutorial.txt')])
5369 suite.addTests(
5370 [make_doctest('../../../doc/api.txt')])
5371 suite.addTests(
5372 [make_doctest('../../../doc/FAQ.txt')])
5373 suite.addTests(
5374 [make_doctest('../../../doc/parsing.txt')])
5375 suite.addTests(
5376 [make_doctest('../../../doc/resolvers.txt')])
5377 return suite
5378
5379
5380 if __name__ == '__main__':
5381 print('to test use test.py %s' % __file__)
5382
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Sat Jul 11 05:50:19 2020 | http://epydoc.sourceforge.net |