| Home | Trees | Indices | Help |
|
|---|
|
|
1 # -*- coding: utf-8 -*-
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 from __future__ import absolute_import
11
12 import os.path
13 import unittest
14 import copy
15 import sys
16 import re
17 import gc
18 import operator
19 import tempfile
20 import textwrap
21 import zlib
22 import gzip
23 from contextlib import closing, contextmanager
24
25 from .common_imports import etree, StringIO, BytesIO, HelperTestCase
26 from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url
27 from .common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
28 from .common_imports import canonicalize, _str, _bytes
29
30 print("")
31 print("TESTED VERSION: %s" % etree.__version__)
32 print(" Python: " + repr(sys.version_info))
33 print(" lxml.etree: " + repr(etree.LXML_VERSION))
34 print(" libxml used: " + repr(etree.LIBXML_VERSION))
35 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
36 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
37 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
38 print("")
39
40 try:
41 _unicode = unicode
42 except NameError:
43 # Python 3
44 _unicode = str
49 handle, filename = tempfile.mkstemp()
50 try:
51 yield filename
52 finally:
53 os.close(handle)
54 os.remove(filename)
55
58 """Tests only for etree, not ElementTree"""
59 etree = etree
60
62 self.assertTrue(isinstance(etree.__version__, _unicode))
63 self.assertTrue(isinstance(etree.LXML_VERSION, tuple))
64 self.assertEqual(len(etree.LXML_VERSION), 4)
65 self.assertTrue(isinstance(etree.LXML_VERSION[0], int))
66 self.assertTrue(isinstance(etree.LXML_VERSION[1], int))
67 self.assertTrue(isinstance(etree.LXML_VERSION[2], int))
68 self.assertTrue(isinstance(etree.LXML_VERSION[3], int))
69 self.assertTrue(etree.__version__.startswith(
70 str(etree.LXML_VERSION[0])))
71
73 if hasattr(self.etree, '__pyx_capi__'):
74 # newer Pyrex compatible C-API
75 self.assertTrue(isinstance(self.etree.__pyx_capi__, dict))
76 self.assertTrue(len(self.etree.__pyx_capi__) > 0)
77 else:
78 # older C-API mechanism
79 self.assertTrue(hasattr(self.etree, '_import_c_api'))
80
82 import lxml
83 includes = lxml.get_include()
84 self.assertTrue(includes)
85 self.assertTrue(len(includes) >= 2)
86 self.assertTrue(os.path.join(os.path.dirname(lxml.__file__), 'includes') in includes,
87 includes)
88
90 Element = self.etree.Element
91 el = Element('name')
92 self.assertEqual(el.tag, 'name')
93 el = Element('{}name')
94 self.assertEqual(el.tag, 'name')
95
97 Element = self.etree.Element
98 el = Element('name')
99 self.assertRaises(ValueError, Element, '{}')
100 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
101
102 self.assertRaises(ValueError, Element, '{test}')
103 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
104
106 Element = self.etree.Element
107 self.assertRaises(ValueError, Element, 'p:name')
108 self.assertRaises(ValueError, Element, '{test}p:name')
109
110 el = Element('name')
111 self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
112
114 Element = self.etree.Element
115 self.assertRaises(ValueError, Element, "p'name")
116 self.assertRaises(ValueError, Element, 'p"name')
117
118 self.assertRaises(ValueError, Element, "{test}p'name")
119 self.assertRaises(ValueError, Element, '{test}p"name')
120
121 el = Element('name')
122 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
123 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
124
126 Element = self.etree.Element
127 self.assertRaises(ValueError, Element, ' name ')
128 self.assertRaises(ValueError, Element, 'na me')
129 self.assertRaises(ValueError, Element, '{test} name')
130
131 el = Element('name')
132 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
133
135 Element = self.etree.Element
136 SubElement = self.etree.SubElement
137
138 el = Element('name')
139 self.assertRaises(ValueError, SubElement, el, '{}')
140 self.assertRaises(ValueError, SubElement, el, '{test}')
141
143 Element = self.etree.Element
144 SubElement = self.etree.SubElement
145
146 el = Element('name')
147 self.assertRaises(ValueError, SubElement, el, 'p:name')
148 self.assertRaises(ValueError, SubElement, el, '{test}p:name')
149
151 Element = self.etree.Element
152 SubElement = self.etree.SubElement
153
154 el = Element('name')
155 self.assertRaises(ValueError, SubElement, el, "p'name")
156 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
157
158 self.assertRaises(ValueError, SubElement, el, 'p"name')
159 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
160
162 Element = self.etree.Element
163 SubElement = self.etree.SubElement
164
165 el = Element('name')
166 self.assertRaises(ValueError, SubElement, el, ' name ')
167 self.assertRaises(ValueError, SubElement, el, 'na me')
168 self.assertRaises(ValueError, SubElement, el, '{test} name')
169
171 Element = self.etree.Element
172 SubElement = self.etree.SubElement
173
174 el = Element('name')
175 self.assertRaises(ValueError, SubElement, el, 'name', {'a b c' : 'abc'})
176 self.assertRaises(ValueError, SubElement, el, 'name', {'a' : 'a\0\n'})
177 self.assertEqual(0, len(el))
178
180 QName = self.etree.QName
181 self.assertRaises(ValueError, QName, '')
182 self.assertRaises(ValueError, QName, None)
183 self.assertRaises(ValueError, QName, None, None)
184 self.assertRaises(ValueError, QName, 'test', '')
185
187 QName = self.etree.QName
188 q = QName(None, 'TAG')
189 self.assertEqual('TAG', q)
190 self.assertEqual('TAG', q.localname)
191 self.assertEqual(None, q.namespace)
192
194 QName = self.etree.QName
195 self.assertRaises(ValueError, QName, 'p:name')
196 self.assertRaises(ValueError, QName, 'test', 'p:name')
197
199 QName = self.etree.QName
200 self.assertRaises(ValueError, QName, ' name ')
201 self.assertRaises(ValueError, QName, 'na me')
202 self.assertRaises(ValueError, QName, 'test', ' name')
203
205 # ET doesn't have namespace/localname properties on QNames
206 QName = self.etree.QName
207 namespace, localname = 'http://myns', 'a'
208 qname = QName(namespace, localname)
209 self.assertEqual(namespace, qname.namespace)
210 self.assertEqual(localname, qname.localname)
211
213 # ET doesn't have namespace/localname properties on QNames
214 QName = self.etree.QName
215 qname1 = QName('http://myns', 'a')
216 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
217
218 qname2 = QName(a)
219 self.assertEqual(a.tag, qname1.text)
220 self.assertEqual(a.tag, qname1)
221 self.assertEqual(qname1.text, qname2.text)
222 self.assertEqual(qname1, qname2.text)
223 self.assertEqual(qname1.text, qname2)
224 self.assertEqual(qname1, qname2)
225
227 # ET doesn't resove QNames as text values
228 etree = self.etree
229 qname = etree.QName('http://myns', 'a')
230 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
231 a.text = qname
232
233 self.assertEqual("p:a", a.text)
234
236 etree = self.etree
237 self.assertRaises(ValueError,
238 etree.Element, "root", nsmap={'"' : 'testns'})
239 self.assertRaises(ValueError,
240 etree.Element, "root", nsmap={'&' : 'testns'})
241 self.assertRaises(ValueError,
242 etree.Element, "root", nsmap={'a:b' : 'testns'})
243
245 # ET in Py 3.x has no "attrib.has_key()" method
246 XML = self.etree.XML
247
248 root = XML(_bytes('<foo bar="Bar" xmlns:ns="http://ns.codespeak.net/test" ns:baz="Baz" />'))
249 self.assertEqual(
250 True, root.attrib.has_key('bar'))
251 self.assertEqual(
252 False, root.attrib.has_key('baz'))
253 self.assertEqual(
254 False, root.attrib.has_key('hah'))
255 self.assertEqual(
256 True,
257 root.attrib.has_key('{http://ns.codespeak.net/test}baz'))
258
260 Element = self.etree.Element
261 root = Element("root")
262 root.set("attr", "TEST")
263 self.assertEqual("TEST", root.get("attr"))
264
266 # ElementTree accepts arbitrary attribute values
267 # lxml.etree allows only strings
268 Element = self.etree.Element
269
270 root = Element("root")
271 root.set("attr", "TEST")
272 self.assertEqual("TEST", root.get("attr"))
273 self.assertRaises(TypeError, root.set, "newattr", 5)
274
276 Element = self.etree.Element
277
278 root = Element("root")
279 root.set("attr", "TEST")
280 self.assertEqual("TEST", root.attrib["attr"])
281
282 root2 = Element("root2", root.attrib, attr2='TOAST')
283 self.assertEqual("TEST", root2.attrib["attr"])
284 self.assertEqual("TOAST", root2.attrib["attr2"])
285 self.assertEqual(None, root.attrib.get("attr2"))
286
288 Element = self.etree.Element
289
290 keys = ["attr%d" % i for i in range(10)]
291 values = ["TEST-%d" % i for i in range(10)]
292 items = list(zip(keys, values))
293
294 root = Element("root")
295 for key, value in items:
296 root.set(key, value)
297 self.assertEqual(keys, root.attrib.keys())
298 self.assertEqual(values, root.attrib.values())
299
300 root2 = Element("root2", root.attrib,
301 attr_99='TOAST-1', attr_98='TOAST-2')
302 self.assertEqual(['attr_98', 'attr_99'] + keys,
303 root2.attrib.keys())
304 self.assertEqual(['TOAST-2', 'TOAST-1'] + values,
305 root2.attrib.values())
306
307 self.assertEqual(keys, root.attrib.keys())
308 self.assertEqual(values, root.attrib.values())
309
311 # ElementTree accepts arbitrary attribute values
312 # lxml.etree allows only strings, or None for (html5) boolean attributes
313 Element = self.etree.Element
314 root = Element("root")
315 self.assertRaises(TypeError, root.set, "newattr", 5)
316 self.assertRaises(TypeError, root.set, "newattr", object)
317 self.assertRaises(TypeError, root.set, "newattr", None)
318 self.assertRaises(TypeError, root.set, "newattr")
319
321 XML = self.etree.XML
322 xml = _bytes('<test a="5" b="10" c="20"><x a="4" b="2"/></test>')
323
324 root = XML(xml)
325 self.etree.strip_attributes(root, 'a')
326 self.assertEqual(_bytes('<test b="10" c="20"><x b="2"></x></test>'),
327 self._writeElement(root))
328
329 root = XML(xml)
330 self.etree.strip_attributes(root, 'b', 'c')
331 self.assertEqual(_bytes('<test a="5"><x a="4"></x></test>'),
332 self._writeElement(root))
333
335 XML = self.etree.XML
336 xml = _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20" n:a="5"><x a="4" n:b="2"/></test>')
337
338 root = XML(xml)
339 self.etree.strip_attributes(root, 'a')
340 self.assertEqual(
341 _bytes('<test xmlns:n="http://test/ns" b="10" c="20" n:a="5"><x n:b="2"></x></test>'),
342 self._writeElement(root))
343
344 root = XML(xml)
345 self.etree.strip_attributes(root, '{http://test/ns}a', 'c')
346 self.assertEqual(
347 _bytes('<test xmlns:n="http://test/ns" a="6" b="10"><x a="4" n:b="2"></x></test>'),
348 self._writeElement(root))
349
350 root = XML(xml)
351 self.etree.strip_attributes(root, '{http://test/ns}*')
352 self.assertEqual(
353 _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20"><x a="4"></x></test>'),
354 self._writeElement(root))
355
357 XML = self.etree.XML
358 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
359
360 root = XML(xml)
361 self.etree.strip_elements(root, 'a')
362 self.assertEqual(_bytes('<test><x></x></test>'),
363 self._writeElement(root))
364
365 root = XML(xml)
366 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
367 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
368 self._writeElement(root))
369
370 root = XML(xml)
371 self.etree.strip_elements(root, 'c')
372 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
373 self._writeElement(root))
374
376 XML = self.etree.XML
377 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
378
379 root = XML(xml)
380 self.etree.strip_elements(root, 'a')
381 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
382 self._writeElement(root))
383
384 root = XML(xml)
385 self.etree.strip_elements(root, '{urn:a}b', 'c')
386 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
387 self._writeElement(root))
388
389 root = XML(xml)
390 self.etree.strip_elements(root, '{urn:a}*', 'c')
391 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
392 self._writeElement(root))
393
394 root = XML(xml)
395 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
396 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
397 self._writeElement(root))
398
417
443
470
497
516
529
531 # lxml.etree separates target and text
532 Element = self.etree.Element
533 SubElement = self.etree.SubElement
534 ProcessingInstruction = self.etree.ProcessingInstruction
535
536 a = Element('a')
537 a.append(ProcessingInstruction('foo', 'some more text'))
538 self.assertEqual(a[0].target, 'foo')
539 self.assertEqual(a[0].text, 'some more text')
540
542 XML = self.etree.XML
543 root = XML(_bytes("<test><?mypi my test ?></test>"))
544 self.assertEqual(root[0].target, "mypi")
545 self.assertEqual(root[0].text, "my test ")
546
548 XML = self.etree.XML
549 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
550 self.assertEqual(root[0].target, "mypi")
551 self.assertEqual(root[0].get('my'), "1")
552 self.assertEqual(root[0].get('test'), " abc ")
553 self.assertEqual(root[0].get('quotes'), "' '")
554 self.assertEqual(root[0].get('only'), None)
555 self.assertEqual(root[0].get('names'), None)
556 self.assertEqual(root[0].get('nope'), None)
557
559 XML = self.etree.XML
560 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
561 self.assertEqual(root[0].target, "mypi")
562 self.assertEqual(root[0].attrib['my'], "1")
563 self.assertEqual(root[0].attrib['test'], " abc ")
564 self.assertEqual(root[0].attrib['quotes'], "' '")
565 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
566 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
567 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
568
570 # previously caused a crash
571 ProcessingInstruction = self.etree.ProcessingInstruction
572
573 a = ProcessingInstruction("PI", "ONE")
574 b = copy.deepcopy(a)
575 b.text = "ANOTHER"
576
577 self.assertEqual('ONE', a.text)
578 self.assertEqual('ANOTHER', b.text)
579
581 XML = self.etree.XML
582 tostring = self.etree.tostring
583 root = XML(_bytes("<?mypi my test ?><test/><!--comment -->"))
584 tree1 = self.etree.ElementTree(root)
585 self.assertEqual(_bytes("<?mypi my test ?><test/><!--comment -->"),
586 tostring(tree1))
587
588 tree2 = copy.deepcopy(tree1)
589 self.assertEqual(_bytes("<?mypi my test ?><test/><!--comment -->"),
590 tostring(tree2))
591
592 root2 = copy.deepcopy(tree1.getroot())
593 self.assertEqual(_bytes("<test/>"),
594 tostring(root2))
595
597 XML = self.etree.XML
598 tostring = self.etree.tostring
599 xml = _bytes('<!DOCTYPE test [\n<!ENTITY entity "tasty">\n]>\n<test/>')
600 root = XML(xml)
601 tree1 = self.etree.ElementTree(root)
602 self.assertEqual(xml, tostring(tree1))
603
604 tree2 = copy.deepcopy(tree1)
605 self.assertEqual(xml, tostring(tree2))
606
607 root2 = copy.deepcopy(tree1.getroot())
608 self.assertEqual(_bytes("<test/>"),
609 tostring(root2))
610
612 XML = self.etree.XML
613 tostring = self.etree.tostring
614 xml = _bytes('<!-- comment --><!DOCTYPE test [\n<!ENTITY entity "tasty">\n]>\n<test/>')
615 root = XML(xml)
616 tree1 = self.etree.ElementTree(root)
617 self.assertEqual(xml, tostring(tree1))
618
619 tree2 = copy.deepcopy(tree1)
620 self.assertEqual(xml, tostring(tree2))
621
623 fromstring = self.etree.fromstring
624 tostring = self.etree.tostring
625 XMLParser = self.etree.XMLParser
626
627 xml = _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
628 parser = XMLParser(remove_comments=True)
629 root = fromstring(xml, parser)
630 self.assertEqual(
631 _bytes('<a><b><c/></b></a>'),
632 tostring(root))
633
635 parse = self.etree.parse
636 tostring = self.etree.tostring
637 XMLParser = self.etree.XMLParser
638
639 xml = _bytes('<?test?><a><?A?><b><?B?><c/></b><?C?></a><?tail?>')
640
641 f = BytesIO(xml)
642 tree = parse(f)
643 self.assertEqual(
644 xml,
645 tostring(tree))
646
647 parser = XMLParser(remove_pis=True)
648 tree = parse(f, parser)
649 self.assertEqual(
650 _bytes('<a><b><c/></b></a>'),
651 tostring(tree))
652
654 # ET raises IOError only
655 parse = self.etree.parse
656 self.assertRaises(TypeError, parse, 'notthere.xml', object())
657
659 # ET removes comments
660 iterparse = self.etree.iterparse
661 tostring = self.etree.tostring
662
663 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
664 events = list(iterparse(f))
665 root = events[-1][1]
666 self.assertEqual(3, len(events))
667 self.assertEqual(
668 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
669 tostring(root))
670
672 # ET removes comments
673 iterparse = self.etree.iterparse
674 tostring = self.etree.tostring
675
676 def name(event, el):
677 if event == 'comment':
678 return el.text
679 else:
680 return el.tag
681
682 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
683 events = list(iterparse(f, events=('end', 'comment')))
684 root = events[-1][1]
685 self.assertEqual(6, len(events))
686 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
687 [ name(*item) for item in events ])
688 self.assertEqual(
689 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
690 tostring(root))
691
693 # ET removes pis
694 iterparse = self.etree.iterparse
695 tostring = self.etree.tostring
696 ElementTree = self.etree.ElementTree
697
698 def name(event, el):
699 if event == 'pi':
700 return el.target, el.text
701 else:
702 return el.tag
703
704 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
705 events = list(iterparse(f, events=('end', 'pi')))
706 root = events[-2][1]
707 self.assertEqual(8, len(events))
708 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
709 ('pid','d'), 'a', ('pie','e')],
710 [ name(*item) for item in events ])
711 self.assertEqual(
712 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
713 tostring(ElementTree(root)))
714
716 iterparse = self.etree.iterparse
717 tostring = self.etree.tostring
718
719 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
720 events = list(iterparse(f, remove_comments=True,
721 events=('end', 'comment')))
722 root = events[-1][1]
723 self.assertEqual(3, len(events))
724 self.assertEqual(['c', 'b', 'a'],
725 [ el.tag for (event, el) in events ])
726 self.assertEqual(
727 _bytes('<a><b><c/></b></a>'),
728 tostring(root))
729
731 iterparse = self.etree.iterparse
732 f = BytesIO('<a><b><c/></a>')
733 # ET raises ExpatError, lxml raises XMLSyntaxError
734 self.assertRaises(self.etree.XMLSyntaxError, list, iterparse(f))
735
737 iterparse = self.etree.iterparse
738 f = BytesIO('<a><b><c/></a>')
739 it = iterparse(f, events=('start', 'end'), recover=True)
740 events = [(ev, el.tag) for ev, el in it]
741 root = it.root
742 self.assertTrue(root is not None)
743
744 self.assertEqual(1, events.count(('start', 'a')))
745 self.assertEqual(1, events.count(('end', 'a')))
746
747 self.assertEqual(1, events.count(('start', 'b')))
748 self.assertEqual(1, events.count(('end', 'b')))
749
750 self.assertEqual(1, events.count(('start', 'c')))
751 self.assertEqual(1, events.count(('end', 'c')))
752
754 iterparse = self.etree.iterparse
755 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
756 it = iterparse(f, events=('start', 'end'), recover=True)
757 events = [(ev, el.tag) for ev, el in it]
758 root = it.root
759 self.assertTrue(root is not None)
760
761 self.assertEqual(1, events.count(('start', 'a')))
762 self.assertEqual(1, events.count(('end', 'a')))
763
764 self.assertEqual(2, events.count(('start', 'b')))
765 self.assertEqual(2, events.count(('end', 'b')))
766
767 self.assertEqual(2, events.count(('start', 'c')))
768 self.assertEqual(2, events.count(('end', 'c')))
769
771 iterparse = self.etree.iterparse
772 f = BytesIO("""
773 <a> \n \n <b> b test </b> \n
774
775 \n\t <c> \n </c> </a> \n """)
776 iterator = iterparse(f, remove_blank_text=True)
777 text = [ (element.text, element.tail)
778 for event, element in iterator ]
779 self.assertEqual(
780 [(" b test ", None), (" \n ", None), (None, None)],
781 text)
782
784 iterparse = self.etree.iterparse
785 f = BytesIO('<a><b><d/></b><c/></a>')
786
787 iterator = iterparse(f, tag="b", events=('start', 'end'))
788 events = list(iterator)
789 root = iterator.root
790 self.assertEqual(
791 [('start', root[0]), ('end', root[0])],
792 events)
793
795 iterparse = self.etree.iterparse
796 f = BytesIO('<a><b><d/></b><c/></a>')
797
798 iterator = iterparse(f, tag="*", events=('start', 'end'))
799 events = list(iterator)
800 self.assertEqual(
801 8,
802 len(events))
803
805 iterparse = self.etree.iterparse
806 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
807
808 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
809 events = list(iterator)
810 root = iterator.root
811 self.assertEqual(
812 [('start', root[0]), ('end', root[0])],
813 events)
814
816 iterparse = self.etree.iterparse
817 f = BytesIO('<a><b><d/></b><c/></a>')
818 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
819 events = list(iterator)
820 root = iterator.root
821 self.assertEqual(
822 [('start', root[0]), ('end', root[0])],
823 events)
824
825 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
826 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
827 events = list(iterator)
828 root = iterator.root
829 self.assertEqual([], events)
830
832 iterparse = self.etree.iterparse
833 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
834 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
835 events = list(iterator)
836 self.assertEqual(8, len(events))
837
839 iterparse = self.etree.iterparse
840 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
841 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
842 events = list(iterator)
843 self.assertEqual([], events)
844
845 f = BytesIO('<a><b><d/></b><c/></a>')
846 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
847 events = list(iterator)
848 self.assertEqual(8, len(events))
849
851 text = _str('Søk på nettet')
852 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
853 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
854 ).encode('iso-8859-1')
855
856 self.assertRaises(self.etree.ParseError,
857 list, self.etree.iterparse(BytesIO(xml_latin1)))
858
860 text = _str('Søk på nettet', encoding="UTF-8")
861 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
862 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
863 ).encode('iso-8859-1')
864
865 iterator = self.etree.iterparse(BytesIO(xml_latin1),
866 encoding="iso-8859-1")
867 self.assertEqual(1, len(list(iterator)))
868
869 a = iterator.root
870 self.assertEqual(a.text, text)
871
873 tostring = self.etree.tostring
874 f = BytesIO('<root><![CDATA[test]]></root>')
875 context = self.etree.iterparse(f, strip_cdata=False)
876 content = [ el.text for event,el in context ]
877
878 self.assertEqual(['test'], content)
879 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
880 tostring(context.root))
881
885
887 self.etree.XMLParser(encoding="ascii")
888 self.etree.XMLParser(encoding="utf-8")
889 self.etree.XMLParser(encoding="iso-8859-1")
890
892 parser = self.etree.XMLParser(recover=True)
893
894 parser.feed('<?xml version=')
895 parser.feed('"1.0"?><ro')
896 parser.feed('ot><')
897 parser.feed('a test="works"')
898 parser.feed('><othertag/></root') # <a> not closed!
899 parser.feed('>')
900
901 root = parser.close()
902
903 self.assertEqual(root.tag, "root")
904 self.assertEqual(len(root), 1)
905 self.assertEqual(root[0].tag, "a")
906 self.assertEqual(root[0].get("test"), "works")
907 self.assertEqual(len(root[0]), 1)
908 self.assertEqual(root[0][0].tag, "othertag")
909 # FIXME: would be nice to get some errors logged ...
910 #self.assertTrue(len(parser.error_log) > 0, "error log is empty")
911
913 # test that recover mode plays nicely with the no-id-dict setup
914 parser = self.etree.XMLParser(recover=True, collect_ids=False)
915
916 parser.feed('<?xml version=')
917 parser.feed('"1.0"?><ro')
918 parser.feed('ot xml:id="123"><')
919 parser.feed('a test="works" xml:id=')
920 parser.feed('"321"><othertag/></root') # <a> not closed!
921 parser.feed('>')
922
923 root = parser.close()
924
925 self.assertEqual(root.tag, "root")
926 self.assertEqual(len(root), 1)
927 self.assertEqual(root[0].tag, "a")
928 self.assertEqual(root[0].get("test"), "works")
929 self.assertEqual(root[0].attrib, {
930 'test': 'works',
931 '{http://www.w3.org/XML/1998/namespace}id': '321'})
932 self.assertEqual(len(root[0]), 1)
933 self.assertEqual(root[0][0].tag, "othertag")
934 # FIXME: would be nice to get some errors logged ...
935 #self.assertTrue(len(parser.error_log) > 0, "error log is empty")
936
938 assertEqual = self.assertEqual
939 assertFalse = self.assertFalse
940
941 events = []
942 class Target(object):
943 def start(self, tag, attrib):
944 events.append("start")
945 assertFalse(attrib)
946 assertEqual("TAG", tag)
947 def end(self, tag):
948 events.append("end")
949 assertEqual("TAG", tag)
950 def close(self):
951 return "DONE" # no Element!
952
953 parser = self.etree.XMLParser(target=Target())
954 tree = self.etree.ElementTree()
955
956 self.assertRaises(TypeError,
957 tree.parse, BytesIO("<TAG/>"), parser=parser)
958 self.assertEqual(["start", "end"], events)
959
961 # ET doesn't call .close() on errors
962 events = []
963 class Target(object):
964 def start(self, tag, attrib):
965 events.append("start-" + tag)
966 def end(self, tag):
967 events.append("end-" + tag)
968 if tag == 'a':
969 raise ValueError("dead and gone")
970 def data(self, data):
971 events.append("data-" + data)
972 def close(self):
973 events.append("close")
974 return "DONE"
975
976 parser = self.etree.XMLParser(target=Target())
977
978 try:
979 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
980 done = parser.close()
981 self.fail("error expected, but parsing succeeded")
982 except ValueError:
983 done = 'value error received as expected'
984
985 self.assertEqual(["start-root", "data-A", "start-a",
986 "data-ca", "end-a", "close"],
987 events)
988
990 # ET doesn't call .close() on errors
991 events = []
992 class Target(object):
993 def start(self, tag, attrib):
994 events.append("start-" + tag)
995 def end(self, tag):
996 events.append("end-" + tag)
997 if tag == 'a':
998 raise ValueError("dead and gone")
999 def data(self, data):
1000 events.append("data-" + data)
1001 def close(self):
1002 events.append("close")
1003 return "DONE"
1004
1005 parser = self.etree.XMLParser(target=Target())
1006
1007 try:
1008 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
1009 parser=parser)
1010 self.fail("error expected, but parsing succeeded")
1011 except ValueError:
1012 done = 'value error received as expected'
1013
1014 self.assertEqual(["start-root", "data-A", "start-a",
1015 "data-ca", "end-a", "close"],
1016 events)
1017
1019 # test that target parsing works nicely with the no-id-hash setup
1020 events = []
1021 class Target(object):
1022 def start(self, tag, attrib):
1023 events.append("start-" + tag)
1024 def end(self, tag):
1025 events.append("end-" + tag)
1026 def data(self, data):
1027 events.append("data-" + data)
1028 def comment(self, text):
1029 events.append("comment-" + text)
1030 def close(self):
1031 return "DONE"
1032
1033 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
1034
1035 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
1036 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
1037 done = parser.close()
1038
1039 self.assertEqual("DONE", done)
1040 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1041 "start-sub", "end-sub", "data-B", "end-root"],
1042 events)
1043
1045 events = []
1046 class Target(object):
1047 def start(self, tag, attrib):
1048 events.append("start-" + tag)
1049 def end(self, tag):
1050 events.append("end-" + tag)
1051 def data(self, data):
1052 events.append("data-" + data)
1053 def comment(self, text):
1054 events.append("comment-" + text)
1055 def close(self):
1056 return "DONE"
1057
1058 parser = self.etree.XMLParser(target=Target())
1059
1060 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1061 done = parser.close()
1062
1063 self.assertEqual("DONE", done)
1064 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1065 "start-sub", "end-sub", "comment-c", "data-B",
1066 "end-root", "comment-d"],
1067 events)
1068
1070 events = []
1071 class Target(object):
1072 def start(self, tag, attrib):
1073 events.append("start-" + tag)
1074 def end(self, tag):
1075 events.append("end-" + tag)
1076 def data(self, data):
1077 events.append("data-" + data)
1078 def pi(self, target, data):
1079 events.append("pi-" + target + "-" + data)
1080 def close(self):
1081 return "DONE"
1082
1083 parser = self.etree.XMLParser(target=Target())
1084
1085 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1086 done = parser.close()
1087
1088 self.assertEqual("DONE", done)
1089 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1090 "data-B", "end-root", "pi-test-c"],
1091 events)
1092
1094 events = []
1095 class Target(object):
1096 def start(self, tag, attrib):
1097 events.append("start-" + tag)
1098 def end(self, tag):
1099 events.append("end-" + tag)
1100 def data(self, data):
1101 events.append("data-" + data)
1102 def close(self):
1103 return "DONE"
1104
1105 parser = self.etree.XMLParser(target=Target(),
1106 strip_cdata=False)
1107
1108 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1109 done = parser.close()
1110
1111 self.assertEqual("DONE", done)
1112 self.assertEqual(["start-root", "data-A", "start-a",
1113 "data-ca", "end-a", "data-B", "end-root"],
1114 events)
1115
1117 events = []
1118 class Target(object):
1119 def start(self, tag, attrib):
1120 events.append("start-" + tag)
1121 def end(self, tag):
1122 events.append("end-" + tag)
1123 def data(self, data):
1124 events.append("data-" + data)
1125 def close(self):
1126 events.append("close")
1127 return "DONE"
1128
1129 parser = self.etree.XMLParser(target=Target(),
1130 recover=True)
1131
1132 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1133 done = parser.close()
1134
1135 self.assertEqual("DONE", done)
1136 self.assertEqual(["start-root", "data-A", "start-a",
1137 "data-ca", "end-a", "data-B",
1138 "end-root", "close"],
1139 events)
1140
1142 iterwalk = self.etree.iterwalk
1143 root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1144
1145 iterator = iterwalk(root, tag="b", events=('start', 'end'))
1146 events = list(iterator)
1147 self.assertEqual(
1148 [('start', root[0]), ('end', root[0])],
1149 events)
1150
1152 iterwalk = self.etree.iterwalk
1153 root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1154
1155 iterator = iterwalk(root, tag="*", events=('start', 'end'))
1156 events = list(iterator)
1157 self.assertEqual(
1158 8,
1159 len(events))
1160
1162 iterwalk = self.etree.iterwalk
1163 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1164
1165 events = list(iterwalk(root))
1166 self.assertEqual(
1167 [('end', root[0]), ('end', root[1]), ('end', root)],
1168 events)
1169
1171 iterwalk = self.etree.iterwalk
1172 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1173
1174 iterator = iterwalk(root, events=('start',))
1175 events = list(iterator)
1176 self.assertEqual(
1177 [('start', root), ('start', root[0]), ('start', root[1])],
1178 events)
1179
1181 iterwalk = self.etree.iterwalk
1182 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1183
1184 iterator = iterwalk(root, events=('start','end'))
1185 events = list(iterator)
1186 self.assertEqual(
1187 [('start', root), ('start', root[0]), ('end', root[0]),
1188 ('start', root[1]), ('end', root[1]), ('end', root)],
1189 events)
1190
1200
1210
1224
1226 iterwalk = self.etree.iterwalk
1227 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1228
1229 iterator = iterwalk(root)
1230 for event, elem in iterator:
1231 elem.clear()
1232
1233 self.assertEqual(0,
1234 len(root))
1235
1237 iterwalk = self.etree.iterwalk
1238 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1239
1240 attr_name = '{testns}bla'
1241 events = []
1242 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1243 for event, elem in iterator:
1244 events.append(event)
1245 if event == 'start':
1246 if elem.tag != '{ns1}a':
1247 elem.set(attr_name, 'value')
1248
1249 self.assertEqual(
1250 ['start-ns', 'start', 'start', 'start-ns', 'start',
1251 'end', 'end-ns', 'end', 'end', 'end-ns'],
1252 events)
1253
1254 self.assertEqual(
1255 None,
1256 root.get(attr_name))
1257 self.assertEqual(
1258 'value',
1259 root[0].get(attr_name))
1260
1262 iterwalk = self.etree.iterwalk
1263 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1264
1265 iterator = iterwalk(root)
1266 tags = []
1267 for event, elem in iterator:
1268 tags.append(elem.tag)
1269 # requesting a skip after an 'end' event should never have an effect
1270 iterator.skip_subtree()
1271
1272 self.assertEqual(['c', 'b', 'e', 'd', 'a'], tags)
1273
1275 iterwalk = self.etree.iterwalk
1276 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1277
1278 iterator = iterwalk(root, events=('start', 'end'))
1279 tags = []
1280 for event, elem in iterator:
1281 tags.append((event, elem.tag))
1282 if elem.tag in ('b', 'e'):
1283 # skipping should only have an effect on 'start', not on 'end'
1284 iterator.skip_subtree()
1285
1286 self.assertEqual(
1287 [('start', 'a'),
1288 ('start', 'b'), ('end', 'b'), # ignored child 'c'
1289 ('start', 'd'),
1290 ('start', 'e'), ('end', 'e'),
1291 ('end', 'd'),
1292 ('end', 'a')],
1293 tags)
1294
1296 iterwalk = self.etree.iterwalk
1297 root = self.etree.XML(_bytes(
1298 '<a xmlns="ns1"><b xmlns="nsb"><c xmlns="ns2"/></b><d xmlns="ns2"><e/></d></a>'))
1299
1300 events = []
1301 iterator = iterwalk(root, events=('start','start-ns','end-ns'))
1302 for event, elem in iterator:
1303 if event in ('start-ns', 'end-ns'):
1304 events.append((event, elem))
1305 if event == 'start-ns' and elem == ('', 'nsb'):
1306 events.append('skip')
1307 iterator.skip_subtree()
1308 else:
1309 events.append((event, elem.tag))
1310
1311 self.assertEqual(
1312 [('start-ns', ('', 'ns1')),
1313 ('start', '{ns1}a'),
1314 ('start-ns', ('', 'nsb')),
1315 'skip',
1316 ('start', '{nsb}b'),
1317 ('end-ns', None),
1318 ('start-ns', ('', 'ns2')),
1319 ('start', '{ns2}d'),
1320 ('start', '{ns2}e'),
1321 ('end-ns', None),
1322 ('end-ns', None)
1323 ],
1324 events)
1325
1327 iterwalk = self.etree.iterwalk
1328 root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1329
1330 counts = []
1331 for event, elem in iterwalk(root):
1332 counts.append(len(list(elem.getiterator())))
1333 self.assertEqual(
1334 [1,2,1,4],
1335 counts)
1336
1338 parse = self.etree.parse
1339 parser = self.etree.XMLParser(dtd_validation=True)
1340 assertEqual = self.assertEqual
1341 test_url = _str("__nosuch.dtd")
1342
1343 class MyResolver(self.etree.Resolver):
1344 def resolve(self, url, id, context):
1345 assertEqual(url, test_url)
1346 return self.resolve_string(
1347 _str('''<!ENTITY myentity "%s">
1348 <!ELEMENT doc ANY>''') % url, context)
1349
1350 parser.resolvers.add(MyResolver())
1351
1352 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1353 tree = parse(StringIO(xml), parser)
1354 root = tree.getroot()
1355 self.assertEqual(root.text, test_url)
1356
1358 parse = self.etree.parse
1359 parser = self.etree.XMLParser(dtd_validation=True)
1360 assertEqual = self.assertEqual
1361 test_url = _str("__nosuch.dtd")
1362
1363 class MyResolver(self.etree.Resolver):
1364 def resolve(self, url, id, context):
1365 assertEqual(url, test_url)
1366 return self.resolve_string(
1367 (_str('''<!ENTITY myentity "%s">
1368 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1369 context)
1370
1371 parser.resolvers.add(MyResolver())
1372
1373 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1374 tree = parse(StringIO(xml), parser)
1375 root = tree.getroot()
1376 self.assertEqual(root.text, test_url)
1377
1379 parse = self.etree.parse
1380 parser = self.etree.XMLParser(dtd_validation=True)
1381 assertEqual = self.assertEqual
1382 test_url = _str("__nosuch.dtd")
1383
1384 class MyResolver(self.etree.Resolver):
1385 def resolve(self, url, id, context):
1386 assertEqual(url, test_url)
1387 return self.resolve_file(
1388 SillyFileLike(
1389 _str('''<!ENTITY myentity "%s">
1390 <!ELEMENT doc ANY>''') % url), context)
1391
1392 parser.resolvers.add(MyResolver())
1393
1394 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1395 tree = parse(StringIO(xml), parser)
1396 root = tree.getroot()
1397 self.assertEqual(root.text, test_url)
1398
1400 parse = self.etree.parse
1401 parser = self.etree.XMLParser(attribute_defaults=True)
1402 assertEqual = self.assertEqual
1403 test_url = _str("__nosuch.dtd")
1404
1405 class MyResolver(self.etree.Resolver):
1406 def resolve(self, url, id, context):
1407 assertEqual(url, test_url)
1408 return self.resolve_filename(
1409 fileInTestDir('test.dtd'), context)
1410
1411 parser.resolvers.add(MyResolver())
1412
1413 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1414 tree = parse(StringIO(xml), parser)
1415 root = tree.getroot()
1416 self.assertEqual(
1417 root.attrib, {'default': 'valueA'})
1418 self.assertEqual(
1419 root[0].attrib, {'default': 'valueB'})
1420
1422 parse = self.etree.parse
1423 parser = self.etree.XMLParser(attribute_defaults=True)
1424 assertEqual = self.assertEqual
1425 test_url = _str("__nosuch.dtd")
1426
1427 class MyResolver(self.etree.Resolver):
1428 def resolve(self, url, id, context):
1429 expected = fileUrlInTestDir(test_url)
1430 url = url.replace('file://', 'file:') # depends on libxml2 version
1431 expected = expected.replace('file://', 'file:')
1432 assertEqual(url, expected)
1433 return self.resolve_filename(
1434 fileUrlInTestDir('test.dtd'), context)
1435
1436 parser.resolvers.add(MyResolver())
1437
1438 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1439 tree = parse(StringIO(xml), parser,
1440 base_url=fileUrlInTestDir('__test.xml'))
1441 root = tree.getroot()
1442 self.assertEqual(
1443 root.attrib, {'default': 'valueA'})
1444 self.assertEqual(
1445 root[0].attrib, {'default': 'valueB'})
1446
1448 parse = self.etree.parse
1449 parser = self.etree.XMLParser(attribute_defaults=True)
1450 assertEqual = self.assertEqual
1451 test_url = _str("__nosuch.dtd")
1452
1453 class MyResolver(self.etree.Resolver):
1454 def resolve(self, url, id, context):
1455 assertEqual(url, test_url)
1456 return self.resolve_file(
1457 open(fileInTestDir('test.dtd'), 'rb'), context)
1458
1459 parser.resolvers.add(MyResolver())
1460
1461 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1462 tree = parse(StringIO(xml), parser)
1463 root = tree.getroot()
1464 self.assertEqual(
1465 root.attrib, {'default': 'valueA'})
1466 self.assertEqual(
1467 root[0].attrib, {'default': 'valueB'})
1468
1470 parse = self.etree.parse
1471 parser = self.etree.XMLParser(load_dtd=True)
1472 assertEqual = self.assertEqual
1473 test_url = _str("__nosuch.dtd")
1474
1475 class check(object):
1476 resolved = False
1477
1478 class MyResolver(self.etree.Resolver):
1479 def resolve(self, url, id, context):
1480 assertEqual(url, test_url)
1481 check.resolved = True
1482 return self.resolve_empty(context)
1483
1484 parser.resolvers.add(MyResolver())
1485
1486 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1487 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1488 self.assertTrue(check.resolved)
1489
1491 parse = self.etree.parse
1492 parser = self.etree.XMLParser(dtd_validation=True)
1493
1494 class _LocalException(Exception):
1495 pass
1496
1497 class MyResolver(self.etree.Resolver):
1498 def resolve(self, url, id, context):
1499 raise _LocalException
1500
1501 parser.resolvers.add(MyResolver())
1502
1503 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1504 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1505
1507 parse = self.etree.parse
1508 tostring = self.etree.tostring
1509 parser = self.etree.XMLParser(resolve_entities=False)
1510 Entity = self.etree.Entity
1511
1512 xml = _bytes('<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>')
1513 tree = parse(BytesIO(xml), parser)
1514 root = tree.getroot()
1515 self.assertEqual(root[0].tag, Entity)
1516 self.assertEqual(root[0].text, "&myentity;")
1517 self.assertEqual(root[0].tail, None)
1518 self.assertEqual(root[0].name, "myentity")
1519
1520 self.assertEqual(_bytes('<doc>&myentity;</doc>'),
1521 tostring(root))
1522
1524 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1525 <root>
1526 <child1/>
1527 <child2/>
1528 <child3> </child3>
1529 </root>''')
1530
1531 parser = self.etree.XMLParser(resolve_entities=False)
1532 root = etree.fromstring(xml, parser)
1533 self.assertEqual([ el.tag for el in root ],
1534 ['child1', 'child2', 'child3'])
1535
1536 root[0] = root[-1]
1537 self.assertEqual([ el.tag for el in root ],
1538 ['child3', 'child2'])
1539 self.assertEqual(root[0][0].text, ' ')
1540 self.assertEqual(root[0][0].name, 'nbsp')
1541
1543 Entity = self.etree.Entity
1544 Element = self.etree.Element
1545 tostring = self.etree.tostring
1546
1547 root = Element("root")
1548 root.append( Entity("test") )
1549
1550 self.assertEqual(root[0].tag, Entity)
1551 self.assertEqual(root[0].text, "&test;")
1552 self.assertEqual(root[0].tail, None)
1553 self.assertEqual(root[0].name, "test")
1554
1555 self.assertEqual(_bytes('<root>&test;</root>'),
1556 tostring(root))
1557
1559 Entity = self.etree.Entity
1560 Element = self.etree.Element
1561 parser = self.etree.XMLParser(resolve_entities=False)
1562 entity = self.etree.XML('''<!DOCTYPE data [
1563 <!ENTITY a "a">
1564 <!ENTITY b "&a;">
1565 ]>
1566 <data>&b;</data>
1567 ''', parser)
1568
1569 el = Element('test')
1570 el.append(entity)
1571 self.assertEqual(el.tag, 'test')
1572 self.assertEqual(el[0].tag, 'data')
1573 self.assertEqual(el[0][0].tag, Entity)
1574 self.assertEqual(el[0][0].name, 'b')
1575
1577 Entity = self.etree.Entity
1578 self.assertEqual(Entity("test").text, '&test;')
1579 self.assertEqual(Entity("#17683").text, '䔓')
1580 self.assertEqual(Entity("#x1768").text, 'ᝨ')
1581 self.assertEqual(Entity("#x98AF").text, '颯')
1582
1584 Entity = self.etree.Entity
1585 self.assertRaises(ValueError, Entity, 'a b c')
1586 self.assertRaises(ValueError, Entity, 'a,b')
1587 self.assertRaises(ValueError, Entity, 'a\0b')
1588 self.assertRaises(ValueError, Entity, '#abc')
1589 self.assertRaises(ValueError, Entity, '#xxyz')
1590
1592 CDATA = self.etree.CDATA
1593 Element = self.etree.Element
1594 tostring = self.etree.tostring
1595
1596 root = Element("root")
1597 root.text = CDATA('test')
1598
1599 self.assertEqual('test',
1600 root.text)
1601 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1602 tostring(root))
1603
1605 CDATA = self.etree.CDATA
1606 Element = self.etree.Element
1607 SubElement = self.etree.SubElement
1608 tostring = self.etree.tostring
1609
1610 root = Element("root")
1611 child = SubElement(root, 'child')
1612 child.tail = CDATA('test')
1613
1614 self.assertEqual('test', child.tail)
1615 self.assertEqual(_bytes('<root><child/><![CDATA[test]]></root>'),
1616 tostring(root))
1617
1618 root = Element("root")
1619 root.tail = CDATA('test')
1620
1621 self.assertEqual('test', root.tail)
1622 self.assertEqual(_bytes('<root/><![CDATA[test]]>'),
1623 tostring(root))
1624
1626 CDATA = self.etree.CDATA
1627 Element = self.etree.Element
1628 root = Element("root")
1629
1630 root.text = CDATA("test")
1631 self.assertEqual('test', root.text)
1632
1633 root.text = CDATA(_str("test"))
1634 self.assertEqual('test', root.text)
1635
1636 self.assertRaises(TypeError, CDATA, 1)
1637
1639 CDATA = self.etree.CDATA
1640 Element = self.etree.Element
1641
1642 root = Element("root")
1643 cdata = CDATA('test')
1644
1645 self.assertRaises(TypeError,
1646 root.set, 'attr', cdata)
1647 self.assertRaises(TypeError,
1648 operator.setitem, root.attrib, 'attr', cdata)
1649
1651 tostring = self.etree.tostring
1652 parser = self.etree.XMLParser(strip_cdata=False)
1653 root = self.etree.XML(_bytes('<root><![CDATA[test]]></root>'), parser)
1654
1655 self.assertEqual('test', root.text)
1656 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1657 tostring(root))
1658
1660 tostring = self.etree.tostring
1661 parser = self.etree.XMLParser(strip_cdata=False)
1662 root = self.etree.XML(_bytes('<root><![CDATA[test]]></root>'), parser)
1663 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1664 tostring(root))
1665
1666 self.assertEqual(['test'], root.xpath('//text()'))
1667
1668 # TypeError in etree, AssertionError in ElementTree;
1670 Element = self.etree.Element
1671 SubElement = self.etree.SubElement
1672
1673 a = Element('a')
1674 b = SubElement(a, 'b')
1675
1676 self.assertRaises(TypeError,
1677 a.__setitem__, 0, 'foo')
1678
1680 Element = self.etree.Element
1681 root = Element('root')
1682 # raises AssertionError in ElementTree
1683 self.assertRaises(TypeError, root.append, None)
1684 self.assertRaises(TypeError, root.extend, [None])
1685 self.assertRaises(TypeError, root.extend, [Element('one'), None])
1686 self.assertEqual('one', root[0].tag)
1687
1689 Element = self.etree.Element
1690 SubElement = self.etree.SubElement
1691 root = Element('root')
1692 self.assertRaises(ValueError, root.append, root)
1693 child = SubElement(root, 'child')
1694 self.assertRaises(ValueError, child.append, root)
1695 child2 = SubElement(child, 'child2')
1696 self.assertRaises(ValueError, child2.append, root)
1697 self.assertRaises(ValueError, child2.append, child)
1698 self.assertEqual('child2', root[0][0].tag)
1699
1701 Element = self.etree.Element
1702 SubElement = self.etree.SubElement
1703 root = Element('root')
1704 SubElement(root, 'a')
1705 SubElement(root, 'b')
1706
1707 self.assertEqual(['a', 'b'],
1708 [c.tag for c in root])
1709 root[1].addnext(root[0])
1710 self.assertEqual(['b', 'a'],
1711 [c.tag for c in root])
1712
1714 Element = self.etree.Element
1715 SubElement = self.etree.SubElement
1716 root = Element('root')
1717 SubElement(root, 'a')
1718 SubElement(root, 'b')
1719
1720 self.assertEqual(['a', 'b'],
1721 [c.tag for c in root])
1722 root[0].addprevious(root[1])
1723 self.assertEqual(['b', 'a'],
1724 [c.tag for c in root])
1725
1727 Element = self.etree.Element
1728 SubElement = self.etree.SubElement
1729 root = Element('root')
1730 a = SubElement(root, 'a')
1731 b = SubElement(a, 'b')
1732 # appending parent as sibling is forbidden
1733 self.assertRaises(ValueError, b.addnext, a)
1734 self.assertEqual(['a'], [c.tag for c in root])
1735 self.assertEqual(['b'], [c.tag for c in a])
1736
1738 Element = self.etree.Element
1739 SubElement = self.etree.SubElement
1740 root = Element('root')
1741 a = SubElement(root, 'a')
1742 b = SubElement(a, 'b')
1743 # appending parent as sibling is forbidden
1744 self.assertRaises(ValueError, b.addprevious, a)
1745 self.assertEqual(['a'], [c.tag for c in root])
1746 self.assertEqual(['b'], [c.tag for c in a])
1747
1749 Element = self.etree.Element
1750 SubElement = self.etree.SubElement
1751 root = Element('root')
1752 a = SubElement(root, 'a')
1753 b = SubElement(a, 'b')
1754 c = SubElement(b, 'c')
1755 # appending parent as sibling is forbidden
1756 self.assertRaises(ValueError, c.addnext, a)
1757
1759 Element = self.etree.Element
1760 SubElement = self.etree.SubElement
1761 root = Element('root')
1762 a = SubElement(root, 'a')
1763 b = SubElement(a, 'b')
1764 c = SubElement(b, 'c')
1765 # appending parent as sibling is forbidden
1766 self.assertRaises(ValueError, c.addprevious, a)
1767
1769 Element = self.etree.Element
1770 SubElement = self.etree.SubElement
1771 root = Element('root')
1772 a = SubElement(root, 'a')
1773 b = SubElement(root, 'b')
1774 a.addprevious(a)
1775 self.assertEqual('a', root[0].tag)
1776 self.assertEqual('b', root[1].tag)
1777 b.addprevious(b)
1778 self.assertEqual('a', root[0].tag)
1779 self.assertEqual('b', root[1].tag)
1780 b.addprevious(a)
1781 self.assertEqual('a', root[0].tag)
1782 self.assertEqual('b', root[1].tag)
1783
1785 Element = self.etree.Element
1786 SubElement = self.etree.SubElement
1787 root = Element('root')
1788 a = SubElement(root, 'a')
1789 b = SubElement(root, 'b')
1790 a.addnext(a)
1791 self.assertEqual('a', root[0].tag)
1792 self.assertEqual('b', root[1].tag)
1793 b.addnext(b)
1794 self.assertEqual('a', root[0].tag)
1795 self.assertEqual('b', root[1].tag)
1796 a.addnext(b)
1797 self.assertEqual('a', root[0].tag)
1798 self.assertEqual('b', root[1].tag)
1799
1801 Element = self.etree.Element
1802 a = Element('a')
1803 b = Element('b')
1804 self.assertRaises(TypeError, a.addnext, b)
1805
1807 Element = self.etree.Element
1808 SubElement = self.etree.SubElement
1809 PI = self.etree.PI
1810 root = Element('root')
1811 SubElement(root, 'a')
1812 pi = PI('TARGET', 'TEXT')
1813 pi.tail = "TAIL"
1814
1815 self.assertEqual(_bytes('<root><a></a></root>'),
1816 self._writeElement(root))
1817 root[0].addprevious(pi)
1818 self.assertEqual(_bytes('<root><?TARGET TEXT?>TAIL<a></a></root>'),
1819 self._writeElement(root))
1820
1822 Element = self.etree.Element
1823 PI = self.etree.PI
1824 root = Element('root')
1825 pi = PI('TARGET', 'TEXT')
1826 pi.tail = "TAIL"
1827
1828 self.assertEqual(_bytes('<root></root>'),
1829 self._writeElement(root))
1830 root.addprevious(pi)
1831 self.assertEqual(_bytes('<?TARGET TEXT?>\n<root></root>'),
1832 self._writeElement(root))
1833
1835 Element = self.etree.Element
1836 SubElement = self.etree.SubElement
1837 PI = self.etree.PI
1838 root = Element('root')
1839 SubElement(root, 'a')
1840 pi = PI('TARGET', 'TEXT')
1841 pi.tail = "TAIL"
1842
1843 self.assertEqual(_bytes('<root><a></a></root>'),
1844 self._writeElement(root))
1845 root[0].addnext(pi)
1846 self.assertEqual(_bytes('<root><a></a><?TARGET TEXT?>TAIL</root>'),
1847 self._writeElement(root))
1848
1850 Element = self.etree.Element
1851 PI = self.etree.PI
1852 root = Element('root')
1853 pi = PI('TARGET', 'TEXT')
1854 pi.tail = "TAIL"
1855
1856 self.assertEqual(_bytes('<root></root>'),
1857 self._writeElement(root))
1858 root.addnext(pi)
1859 self.assertEqual(_bytes('<root></root>\n<?TARGET TEXT?>'),
1860 self._writeElement(root))
1861
1863 Element = self.etree.Element
1864 SubElement = self.etree.SubElement
1865 Comment = self.etree.Comment
1866 root = Element('root')
1867 SubElement(root, 'a')
1868 comment = Comment('TEXT ')
1869 comment.tail = "TAIL"
1870
1871 self.assertEqual(_bytes('<root><a></a></root>'),
1872 self._writeElement(root))
1873 root[0].addnext(comment)
1874 self.assertEqual(_bytes('<root><a></a><!--TEXT -->TAIL</root>'),
1875 self._writeElement(root))
1876
1878 Element = self.etree.Element
1879 Comment = self.etree.Comment
1880 root = Element('root')
1881 comment = Comment('TEXT ')
1882 comment.tail = "TAIL"
1883
1884 self.assertEqual(_bytes('<root></root>'),
1885 self._writeElement(root))
1886 root.addnext(comment)
1887 self.assertEqual(_bytes('<root></root>\n<!--TEXT -->'),
1888 self._writeElement(root))
1889
1891 Element = self.etree.Element
1892 SubElement = self.etree.SubElement
1893 Comment = self.etree.Comment
1894 root = Element('root')
1895 SubElement(root, 'a')
1896 comment = Comment('TEXT ')
1897 comment.tail = "TAIL"
1898
1899 self.assertEqual(_bytes('<root><a></a></root>'),
1900 self._writeElement(root))
1901 root[0].addprevious(comment)
1902 self.assertEqual(_bytes('<root><!--TEXT -->TAIL<a></a></root>'),
1903 self._writeElement(root))
1904
1906 Element = self.etree.Element
1907 Comment = self.etree.Comment
1908 root = Element('root')
1909 comment = Comment('TEXT ')
1910 comment.tail = "TAIL"
1911
1912 self.assertEqual(_bytes('<root></root>'),
1913 self._writeElement(root))
1914 root.addprevious(comment)
1915 self.assertEqual(_bytes('<!--TEXT -->\n<root></root>'),
1916 self._writeElement(root))
1917
1918 # ET's Elements have items() and key(), but not values()
1920 XML = self.etree.XML
1921
1922 root = XML(_bytes('<doc alpha="Alpha" beta="Beta" gamma="Gamma"/>'))
1923 values = root.values()
1924 values.sort()
1925 self.assertEqual(['Alpha', 'Beta', 'Gamma'], values)
1926
1927 # gives error in ElementTree
1929 Element = self.etree.Element
1930 Comment = self.etree.Comment
1931
1932 a = Element('a')
1933 a.append(Comment())
1934 self.assertEqual(
1935 _bytes('<a><!----></a>'),
1936 self._writeElement(a))
1937
1938 # ElementTree ignores comments
1940 ElementTree = self.etree.ElementTree
1941 tostring = self.etree.tostring
1942
1943 xml = _bytes('<a><b/><!----><c/></a>')
1944 f = BytesIO(xml)
1945 doc = ElementTree(file=f)
1946 a = doc.getroot()
1947 self.assertEqual(
1948 '',
1949 a[1].text)
1950 self.assertEqual(
1951 xml,
1952 tostring(a))
1953
1954 # ElementTree ignores comments
1956 ElementTree = self.etree.ElementTree
1957
1958 f = BytesIO('<a><b></b><!-- hoi --><c></c></a>')
1959 doc = ElementTree(file=f)
1960 a = doc.getroot()
1961 self.assertEqual(
1962 ' hoi ',
1963 a[1].text)
1964
1965 # does not raise an exception in ElementTree
1967 Element = self.etree.Element
1968 Comment = self.etree.Comment
1969
1970 c = Comment()
1971 el = Element('myel')
1972
1973 self.assertRaises(TypeError, c.append, el)
1974 self.assertRaises(TypeError, c.insert, 0, el)
1975 self.assertRaises(TypeError, c.set, "myattr", "test")
1976
1978 c = self.etree.Comment()
1979 self.assertEqual(0, len(c.attrib))
1980
1981 self.assertFalse(c.attrib.__contains__('nope'))
1982 self.assertFalse('nope' in c.attrib)
1983 self.assertFalse('nope' in c.attrib.keys())
1984 self.assertFalse('nope' in c.attrib.values())
1985 self.assertFalse(('nope', 'huhu') in c.attrib.items())
1986
1987 self.assertEqual([], list(c.attrib))
1988 self.assertEqual([], list(c.attrib.keys()))
1989 self.assertEqual([], list(c.attrib.items()))
1990 self.assertEqual([], list(c.attrib.values()))
1991 self.assertEqual([], list(c.attrib.iterkeys()))
1992 self.assertEqual([], list(c.attrib.iteritems()))
1993 self.assertEqual([], list(c.attrib.itervalues()))
1994
1995 self.assertEqual('HUHU', c.attrib.pop('nope', 'HUHU'))
1996 self.assertRaises(KeyError, c.attrib.pop, 'nope')
1997
1998 self.assertRaises(KeyError, c.attrib.__getitem__, 'only')
1999 self.assertRaises(KeyError, c.attrib.__getitem__, 'names')
2000 self.assertRaises(KeyError, c.attrib.__getitem__, 'nope')
2001 self.assertRaises(KeyError, c.attrib.__setitem__, 'nope', 'yep')
2002 self.assertRaises(KeyError, c.attrib.__delitem__, 'nope')
2003
2004 # test passing 'None' to dump()
2007
2009 ElementTree = self.etree.ElementTree
2010
2011 f = BytesIO('<a xmlns:foo="http://www.infrae.com/ns/1"><foo:b/></a>')
2012 doc = ElementTree(file=f)
2013 a = doc.getroot()
2014 self.assertEqual(
2015 None,
2016 a.prefix)
2017 self.assertEqual(
2018 'foo',
2019 a[0].prefix)
2020
2022 ElementTree = self.etree.ElementTree
2023
2024 f = BytesIO('<a xmlns="http://www.infrae.com/ns/1"><b/></a>')
2025 doc = ElementTree(file=f)
2026 a = doc.getroot()
2027 self.assertEqual(
2028 None,
2029 a.prefix)
2030 self.assertEqual(
2031 None,
2032 a[0].prefix)
2033
2035 Element = self.etree.Element
2036 SubElement = self.etree.SubElement
2037
2038 a = Element('a')
2039 b = SubElement(a, 'b')
2040 c = SubElement(a, 'c')
2041 d = SubElement(b, 'd')
2042 self.assertEqual(
2043 None,
2044 a.getparent())
2045 self.assertEqual(
2046 a,
2047 b.getparent())
2048 self.assertEqual(
2049 b.getparent(),
2050 c.getparent())
2051 self.assertEqual(
2052 b,
2053 d.getparent())
2054
2056 XML = self.etree.XML
2057
2058 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2059 result = []
2060 for el in root.iterchildren():
2061 result.append(el.tag)
2062 self.assertEqual(['one', 'two', 'three'], result)
2063
2065 XML = self.etree.XML
2066
2067 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2068 result = []
2069 for el in root.iterchildren(reversed=True):
2070 result.append(el.tag)
2071 self.assertEqual(['three', 'two', 'one'], result)
2072
2074 XML = self.etree.XML
2075
2076 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2077 result = []
2078 for el in root.iterchildren(tag='two'):
2079 result.append(el.text)
2080 self.assertEqual(['Two', 'Bla'], result)
2081
2083 XML = self.etree.XML
2084
2085 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2086 result = []
2087 for el in root.iterchildren('two'):
2088 result.append(el.text)
2089 self.assertEqual(['Two', 'Bla'], result)
2090
2092 XML = self.etree.XML
2093
2094 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2095 result = []
2096 for el in root.iterchildren(reversed=True, tag='two'):
2097 result.append(el.text)
2098 self.assertEqual(['Bla', 'Two'], result)
2099
2101 XML = self.etree.XML
2102
2103 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2104 result = []
2105 for el in root.iterchildren(tag=['two', 'three']):
2106 result.append(el.text)
2107 self.assertEqual(['Two', 'Bla', None], result)
2108
2110 XML = self.etree.XML
2111
2112 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2113 result = []
2114 for el in root.iterchildren('two', 'three'):
2115 result.append(el.text)
2116 self.assertEqual(['Two', 'Bla', None], result)
2117
2119 XML = self.etree.XML
2120
2121 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2122 result = []
2123 for el in root.iterchildren(reversed=True, tag=['two', 'three']):
2124 result.append(el.text)
2125 self.assertEqual([None, 'Bla', 'Two'], result)
2126
2128 Element = self.etree.Element
2129 SubElement = self.etree.SubElement
2130
2131 a = Element('a')
2132 b = SubElement(a, 'b')
2133 c = SubElement(a, 'c')
2134 d = SubElement(b, 'd')
2135 self.assertEqual(
2136 [],
2137 list(a.iterancestors()))
2138 self.assertEqual(
2139 [a],
2140 list(b.iterancestors()))
2141 self.assertEqual(
2142 [a],
2143 list(c.iterancestors()))
2144 self.assertEqual(
2145 [b, a],
2146 list(d.iterancestors()))
2147
2149 Element = self.etree.Element
2150 SubElement = self.etree.SubElement
2151
2152 a = Element('a')
2153 b = SubElement(a, 'b')
2154 c = SubElement(a, 'c')
2155 d = SubElement(b, 'd')
2156 self.assertEqual(
2157 [a],
2158 list(d.iterancestors('a')))
2159 self.assertEqual(
2160 [a],
2161 list(d.iterancestors(tag='a')))
2162
2163 self.assertEqual(
2164 [b, a],
2165 list(d.iterancestors('*')))
2166 self.assertEqual(
2167 [b, a],
2168 list(d.iterancestors(tag='*')))
2169
2171 Element = self.etree.Element
2172 SubElement = self.etree.SubElement
2173
2174 a = Element('a')
2175 b = SubElement(a, 'b')
2176 c = SubElement(a, 'c')
2177 d = SubElement(b, 'd')
2178 self.assertEqual(
2179 [b, a],
2180 list(d.iterancestors(tag=('a', 'b'))))
2181 self.assertEqual(
2182 [b, a],
2183 list(d.iterancestors('a', 'b')))
2184
2185 self.assertEqual(
2186 [],
2187 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2188 self.assertEqual(
2189 [],
2190 list(d.iterancestors('w', 'x', 'y', 'z')))
2191
2192 self.assertEqual(
2193 [],
2194 list(d.iterancestors(tag=('d', 'x'))))
2195 self.assertEqual(
2196 [],
2197 list(d.iterancestors('d', 'x')))
2198
2199 self.assertEqual(
2200 [b, a],
2201 list(d.iterancestors(tag=('b', '*'))))
2202 self.assertEqual(
2203 [b, a],
2204 list(d.iterancestors('b', '*')))
2205
2206 self.assertEqual(
2207 [b],
2208 list(d.iterancestors(tag=('b', 'c'))))
2209 self.assertEqual(
2210 [b],
2211 list(d.iterancestors('b', 'c')))
2212
2214 Element = self.etree.Element
2215 SubElement = self.etree.SubElement
2216
2217 a = Element('a')
2218 b = SubElement(a, 'b')
2219 c = SubElement(a, 'c')
2220 d = SubElement(b, 'd')
2221 e = SubElement(c, 'e')
2222
2223 self.assertEqual(
2224 [b, d, c, e],
2225 list(a.iterdescendants()))
2226 self.assertEqual(
2227 [],
2228 list(d.iterdescendants()))
2229
2231 Element = self.etree.Element
2232 SubElement = self.etree.SubElement
2233
2234 a = Element('a')
2235 b = SubElement(a, 'b')
2236 c = SubElement(a, 'c')
2237 d = SubElement(b, 'd')
2238 e = SubElement(c, 'e')
2239
2240 self.assertEqual(
2241 [],
2242 list(a.iterdescendants('a')))
2243 self.assertEqual(
2244 [],
2245 list(a.iterdescendants(tag='a')))
2246
2247 a2 = SubElement(e, 'a')
2248 self.assertEqual(
2249 [a2],
2250 list(a.iterdescendants('a')))
2251
2252 self.assertEqual(
2253 [a2],
2254 list(c.iterdescendants('a')))
2255 self.assertEqual(
2256 [a2],
2257 list(c.iterdescendants(tag='a')))
2258
2260 Element = self.etree.Element
2261 SubElement = self.etree.SubElement
2262
2263 a = Element('a')
2264 b = SubElement(a, 'b')
2265 c = SubElement(a, 'c')
2266 d = SubElement(b, 'd')
2267 e = SubElement(c, 'e')
2268
2269 self.assertEqual(
2270 [b, e],
2271 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2272 self.assertEqual(
2273 [b, e],
2274 list(a.iterdescendants('a', 'b', 'e')))
2275
2276 a2 = SubElement(e, 'a')
2277 self.assertEqual(
2278 [b, a2],
2279 list(a.iterdescendants(tag=('a', 'b'))))
2280 self.assertEqual(
2281 [b, a2],
2282 list(a.iterdescendants('a', 'b')))
2283
2284 self.assertEqual(
2285 [],
2286 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2287 self.assertEqual(
2288 [],
2289 list(c.iterdescendants('x', 'y', 'z')))
2290
2291 self.assertEqual(
2292 [b, d, c, e, a2],
2293 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2294 self.assertEqual(
2295 [b, d, c, e, a2],
2296 list(a.iterdescendants('x', 'y', 'z', '*')))
2297
2299 Element = self.etree.Element
2300 SubElement = self.etree.SubElement
2301
2302 a = Element('a')
2303 b = SubElement(a, 'b')
2304 c = SubElement(a, 'c')
2305 d = SubElement(b, 'd')
2306 self.assertEqual(
2307 a,
2308 a.getroottree().getroot())
2309 self.assertEqual(
2310 a,
2311 b.getroottree().getroot())
2312 self.assertEqual(
2313 a,
2314 d.getroottree().getroot())
2315
2317 Element = self.etree.Element
2318 SubElement = self.etree.SubElement
2319
2320 a = Element('a')
2321 b = SubElement(a, 'b')
2322 c = SubElement(a, 'c')
2323 self.assertEqual(
2324 None,
2325 a.getnext())
2326 self.assertEqual(
2327 c,
2328 b.getnext())
2329 self.assertEqual(
2330 None,
2331 c.getnext())
2332
2334 Element = self.etree.Element
2335 SubElement = self.etree.SubElement
2336
2337 a = Element('a')
2338 b = SubElement(a, 'b')
2339 c = SubElement(a, 'c')
2340 d = SubElement(b, 'd')
2341 self.assertEqual(
2342 None,
2343 a.getprevious())
2344 self.assertEqual(
2345 b,
2346 c.getprevious())
2347 self.assertEqual(
2348 None,
2349 b.getprevious())
2350
2352 Element = self.etree.Element
2353 SubElement = self.etree.SubElement
2354
2355 a = Element('a')
2356 b = SubElement(a, 'b')
2357 c = SubElement(a, 'c')
2358 d = SubElement(b, 'd')
2359 self.assertEqual(
2360 [],
2361 list(a.itersiblings()))
2362 self.assertEqual(
2363 [c],
2364 list(b.itersiblings()))
2365 self.assertEqual(
2366 [],
2367 list(c.itersiblings()))
2368 self.assertEqual(
2369 [b],
2370 list(c.itersiblings(preceding=True)))
2371 self.assertEqual(
2372 [],
2373 list(b.itersiblings(preceding=True)))
2374
2376 Element = self.etree.Element
2377 SubElement = self.etree.SubElement
2378
2379 a = Element('a')
2380 b = SubElement(a, 'b')
2381 c = SubElement(a, 'c')
2382 d = SubElement(b, 'd')
2383 self.assertEqual(
2384 [],
2385 list(a.itersiblings(tag='XXX')))
2386 self.assertEqual(
2387 [c],
2388 list(b.itersiblings(tag='c')))
2389 self.assertEqual(
2390 [c],
2391 list(b.itersiblings(tag='*')))
2392 self.assertEqual(
2393 [b],
2394 list(c.itersiblings(preceding=True, tag='b')))
2395 self.assertEqual(
2396 [],
2397 list(c.itersiblings(preceding=True, tag='c')))
2398
2400 Element = self.etree.Element
2401 SubElement = self.etree.SubElement
2402
2403 a = Element('a')
2404 b = SubElement(a, 'b')
2405 c = SubElement(a, 'c')
2406 d = SubElement(b, 'd')
2407 e = SubElement(a, 'e')
2408 self.assertEqual(
2409 [],
2410 list(a.itersiblings(tag=('XXX', 'YYY'))))
2411 self.assertEqual(
2412 [c, e],
2413 list(b.itersiblings(tag=('c', 'd', 'e'))))
2414 self.assertEqual(
2415 [b],
2416 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2417 self.assertEqual(
2418 [c, b],
2419 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2420
2422 parseid = self.etree.parseid
2423 XML = self.etree.XML
2424 xml_text = _bytes('''
2425 <!DOCTYPE document [
2426 <!ELEMENT document (h1,p)*>
2427 <!ELEMENT h1 (#PCDATA)>
2428 <!ATTLIST h1 myid ID #REQUIRED>
2429 <!ELEMENT p (#PCDATA)>
2430 <!ATTLIST p someid ID #REQUIRED>
2431 ]>
2432 <document>
2433 <h1 myid="chapter1">...</h1>
2434 <p id="note1" class="note">...</p>
2435 <p>Regular paragraph.</p>
2436 <p xml:id="xmlid">XML:ID paragraph.</p>
2437 <p someid="warn1" class="warning">...</p>
2438 </document>
2439 ''')
2440
2441 tree, dic = parseid(BytesIO(xml_text))
2442 root = tree.getroot()
2443 root2 = XML(xml_text)
2444 self.assertEqual(self._writeElement(root),
2445 self._writeElement(root2))
2446 expected = {
2447 "chapter1" : root[0],
2448 "xmlid" : root[3],
2449 "warn1" : root[4]
2450 }
2451 self.assertTrue("chapter1" in dic)
2452 self.assertTrue("warn1" in dic)
2453 self.assertTrue("xmlid" in dic)
2454 self._checkIDDict(dic, expected)
2455
2457 XMLDTDID = self.etree.XMLDTDID
2458 XML = self.etree.XML
2459 xml_text = _bytes('''
2460 <!DOCTYPE document [
2461 <!ELEMENT document (h1,p)*>
2462 <!ELEMENT h1 (#PCDATA)>
2463 <!ATTLIST h1 myid ID #REQUIRED>
2464 <!ELEMENT p (#PCDATA)>
2465 <!ATTLIST p someid ID #REQUIRED>
2466 ]>
2467 <document>
2468 <h1 myid="chapter1">...</h1>
2469 <p id="note1" class="note">...</p>
2470 <p>Regular paragraph.</p>
2471 <p xml:id="xmlid">XML:ID paragraph.</p>
2472 <p someid="warn1" class="warning">...</p>
2473 </document>
2474 ''')
2475
2476 root, dic = XMLDTDID(xml_text)
2477 root2 = XML(xml_text)
2478 self.assertEqual(self._writeElement(root),
2479 self._writeElement(root2))
2480 expected = {
2481 "chapter1" : root[0],
2482 "xmlid" : root[3],
2483 "warn1" : root[4]
2484 }
2485 self.assertTrue("chapter1" in dic)
2486 self.assertTrue("warn1" in dic)
2487 self.assertTrue("xmlid" in dic)
2488 self._checkIDDict(dic, expected)
2489
2491 XMLDTDID = self.etree.XMLDTDID
2492 XML = self.etree.XML
2493 xml_text = _bytes('''
2494 <document>
2495 <h1 myid="chapter1">...</h1>
2496 <p id="note1" class="note">...</p>
2497 <p>Regular paragraph.</p>
2498 <p someid="warn1" class="warning">...</p>
2499 </document>
2500 ''')
2501
2502 root, dic = XMLDTDID(xml_text)
2503 root2 = XML(xml_text)
2504 self.assertEqual(self._writeElement(root),
2505 self._writeElement(root2))
2506 expected = {}
2507 self._checkIDDict(dic, expected)
2508
2510 XMLDTDID = self.etree.XMLDTDID
2511 XML = self.etree.XML
2512 xml_text = _bytes('''
2513 <!DOCTYPE document [
2514 <!ELEMENT document (h1,p)*>
2515 <!ELEMENT h1 (#PCDATA)>
2516 <!ATTLIST h1 myid ID #REQUIRED>
2517 <!ELEMENT p (#PCDATA)>
2518 <!ATTLIST p someid ID #REQUIRED>
2519 ]>
2520 <document>
2521 <h1 myid="chapter1">...</h1>
2522 <p id="note1" class="note">...</p>
2523 <p>Regular paragraph.</p>
2524 <p xml:id="xmlid">XML:ID paragraph.</p>
2525 <p someid="warn1" class="warning">...</p>
2526 </document>
2527 ''')
2528
2529 parser = etree.XMLParser(collect_ids=False)
2530 root, dic = XMLDTDID(xml_text, parser=parser)
2531 root2 = XML(xml_text)
2532 self.assertEqual(self._writeElement(root),
2533 self._writeElement(root2))
2534 self.assertFalse(dic)
2535 self._checkIDDict(dic, {})
2536
2538 self.assertEqual(len(dic),
2539 len(expected))
2540 self.assertEqual(sorted(dic.items()),
2541 sorted(expected.items()))
2542 if sys.version_info < (3,):
2543 self.assertEqual(sorted(dic.iteritems()),
2544 sorted(expected.iteritems()))
2545 self.assertEqual(sorted(dic.keys()),
2546 sorted(expected.keys()))
2547 if sys.version_info < (3,):
2548 self.assertEqual(sorted(dic.iterkeys()),
2549 sorted(expected.iterkeys()))
2550 if sys.version_info < (3,):
2551 self.assertEqual(sorted(dic.values()),
2552 sorted(expected.values()))
2553 self.assertEqual(sorted(dic.itervalues()),
2554 sorted(expected.itervalues()))
2555
2557 etree = self.etree
2558
2559 r = {'foo': 'http://ns.infrae.com/foo'}
2560 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2561 self.assertEqual(
2562 'foo',
2563 e.prefix)
2564 self.assertEqual(
2565 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2566 self._writeElement(e))
2567
2569 etree = self.etree
2570
2571 r = {None: 'http://ns.infrae.com/foo'}
2572 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2573 self.assertEqual(
2574 None,
2575 e.prefix)
2576 self.assertEqual(
2577 '{http://ns.infrae.com/foo}bar',
2578 e.tag)
2579 self.assertEqual(
2580 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2581 self._writeElement(e))
2582
2584 etree = self.etree
2585
2586 r = {None: 'http://ns.infrae.com/foo', 'p': 'http://test/'}
2587 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2588 self.assertEqual(None, e.prefix)
2589 self.assertEqual('{http://ns.infrae.com/foo}bar', e.tag)
2590 self.assertEqual(
2591 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>'),
2592 self._writeElement(e))
2593
2595 etree = self.etree
2596
2597 r = {None: 'http://ns.infrae.com/foo',
2598 'hoi': 'http://ns.infrae.com/hoi'}
2599 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2600 e.set('{http://ns.infrae.com/hoi}test', 'value')
2601 self.assertEqual(
2602 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2603 self._writeElement(e))
2604
2606 etree = self.etree
2607
2608 root = etree.Element('{http://test/ns}root',
2609 nsmap={None: 'http://test/ns'})
2610 sub = etree.Element('{http://test/ns}sub',
2611 nsmap={'test': 'http://test/ns'})
2612
2613 sub.attrib['{http://test/ns}attr'] = 'value'
2614 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2615 self.assertEqual(
2616 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2617 etree.tostring(sub))
2618
2619 root.append(sub)
2620 self.assertEqual(
2621 _bytes('<root xmlns="http://test/ns">'
2622 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2623 '</root>'),
2624 etree.tostring(root))
2625
2627 etree = self.etree
2628
2629 root = etree.Element('root')
2630 sub = etree.Element('{http://test/ns}sub',
2631 nsmap={'test': 'http://test/ns'})
2632
2633 sub.attrib['{http://test/ns}attr'] = 'value'
2634 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2635 self.assertEqual(
2636 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2637 etree.tostring(sub))
2638
2639 root.append(sub)
2640 self.assertEqual(
2641 _bytes('<root>'
2642 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2643 '</root>'),
2644 etree.tostring(root))
2645
2647 etree = self.etree
2648
2649 root = etree.Element('root')
2650 sub = etree.Element('{http://test/ns}sub',
2651 nsmap={None: 'http://test/ns'})
2652
2653 sub.attrib['{http://test/ns}attr'] = 'value'
2654 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2655 self.assertEqual(
2656 _bytes('<sub xmlns="http://test/ns" '
2657 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2658 etree.tostring(sub))
2659
2660 root.append(sub)
2661 self.assertEqual(
2662 _bytes('<root>'
2663 '<sub xmlns="http://test/ns"'
2664 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2665 '</root>'),
2666 etree.tostring(root))
2667
2669 etree = self.etree
2670
2671 root = etree.Element('{http://test/ns}root',
2672 nsmap={'test': 'http://test/ns',
2673 None: 'http://test/ns'})
2674 sub = etree.Element('{http://test/ns}sub',
2675 nsmap={None: 'http://test/ns'})
2676
2677 sub.attrib['{http://test/ns}attr'] = 'value'
2678 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2679 self.assertEqual(
2680 _bytes('<sub xmlns="http://test/ns" '
2681 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2682 etree.tostring(sub))
2683
2684 root.append(sub)
2685 self.assertEqual(
2686 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2687 '<test:sub test:attr="value"/>'
2688 '</test:root>'),
2689 etree.tostring(root))
2690
2692 etree = self.etree
2693 r = {None: 'http://ns.infrae.com/foo',
2694 'hoi': 'http://ns.infrae.com/hoi'}
2695 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2696 tree = etree.ElementTree(element=e)
2697 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2698 self.assertEqual(
2699 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2700 self._writeElement(e))
2701
2703 etree = self.etree
2704
2705 r = {None: 'http://ns.infrae.com/foo'}
2706 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2707 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2708
2709 e1.append(e2)
2710
2711 self.assertEqual(
2712 None,
2713 e1.prefix)
2714 self.assertEqual(
2715 None,
2716 e1[0].prefix)
2717 self.assertEqual(
2718 '{http://ns.infrae.com/foo}bar',
2719 e1.tag)
2720 self.assertEqual(
2721 '{http://ns.infrae.com/foo}bar',
2722 e1[0].tag)
2723
2725 etree = self.etree
2726
2727 r = {None: 'http://ns.infrae.com/BAR'}
2728 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2729 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2730
2731 e1.append(e2)
2732
2733 self.assertEqual(
2734 None,
2735 e1.prefix)
2736 self.assertNotEqual(
2737 None,
2738 e2.prefix)
2739 self.assertEqual(
2740 '{http://ns.infrae.com/BAR}bar',
2741 e1.tag)
2742 self.assertEqual(
2743 '{http://ns.infrae.com/foo}bar',
2744 e2.tag)
2745
2747 ns_href = "http://a.b.c"
2748 one = self.etree.fromstring(
2749 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2750 baz = one[0][0]
2751
2752 two = self.etree.fromstring(
2753 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2754 two.append(baz)
2755 del one # make sure the source document is deallocated
2756
2757 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2758 self.assertEqual(
2759 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2760 self.etree.tostring(two))
2761
2763 xml = _bytes(
2764 '<foo xmlns="F" xmlns:x="x">'
2765 '<bar xmlns:ns="NS" xmlns:b="b" xmlns="B">'
2766 '<ns:baz/>'
2767 '</bar></foo>'
2768 )
2769 root = self.etree.fromstring(xml)
2770 self.assertEqual(xml, self.etree.tostring(root))
2771 self.etree.cleanup_namespaces(root)
2772 self.assertEqual(
2773 _bytes('<foo xmlns="F"><bar xmlns:ns="NS" xmlns="B"><ns:baz/></bar></foo>'),
2774 self.etree.tostring(root))
2775
2777 xml = _bytes(
2778 '<foo xmlns="F" xmlns:x="X" xmlns:a="A">'
2779 '<bar xmlns:ns="NS" xmlns:b="b" xmlns="B">'
2780 '<ns:baz a:test="attr"/>'
2781 '</bar></foo>'
2782 )
2783 root = self.etree.fromstring(xml)
2784 self.assertEqual(xml, self.etree.tostring(root))
2785 self.etree.cleanup_namespaces(root)
2786 self.assertEqual(
2787 _bytes('<foo xmlns="F" xmlns:a="A">'
2788 '<bar xmlns:ns="NS" xmlns="B">'
2789 '<ns:baz a:test="attr"/>'
2790 '</bar></foo>'),
2791 self.etree.tostring(root))
2792
2794 xml = ('<n12:foo ' +
2795 ' '.join('xmlns:n{n}="NS{n}"'.format(n=i) for i in range(100)) +
2796 '><n68:a/></n12:foo>').encode('utf8')
2797 root = self.etree.fromstring(xml)
2798 self.assertEqual(xml, self.etree.tostring(root))
2799 self.etree.cleanup_namespaces(root)
2800 self.assertEqual(
2801 b'<n12:foo xmlns:n12="NS12" xmlns:n68="NS68"><n68:a/></n12:foo>',
2802 self.etree.tostring(root))
2803
2805 xml = ('<root>' +
2806 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2807 '<n64:x/>' + '</a>'*100 + '</root>').encode('utf8')
2808 root = self.etree.fromstring(xml)
2809 self.assertEqual(xml, self.etree.tostring(root))
2810 self.etree.cleanup_namespaces(root)
2811 self.assertEqual(
2812 b'<root>' + b'<a>'*64 + b'<a xmlns:n64="NS64">' + b'<a>'*35 +
2813 b'<n64:x/>' + b'</a>'*100 + b'</root>',
2814 self.etree.tostring(root))
2815
2817 xml = ('<root>' +
2818 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2819 '<n64:x xmlns:a="A" a:attr="X"/>' +
2820 '</a>'*100 +
2821 '</root>').encode('utf8')
2822 root = self.etree.fromstring(xml)
2823 self.assertEqual(xml, self.etree.tostring(root))
2824 self.etree.cleanup_namespaces(root, top_nsmap={'n64': 'NS64'})
2825 self.assertEqual(
2826 b'<root xmlns:n64="NS64">' + b'<a>'*100 +
2827 b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>'*100 + b'</root>',
2828 self.etree.tostring(root))
2829
2831 xml = ('<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2832 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2833 '<foo>foo:bar</foo>'
2834 '</root>').encode('utf8')
2835 root = self.etree.fromstring(xml)
2836 self.assertEqual(xml, self.etree.tostring(root))
2837 self.etree.cleanup_namespaces(root, keep_ns_prefixes=['foo'])
2838 self.assertEqual(
2839 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2840 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2841 b'<foo>foo:bar</foo>'
2842 b'</root>',
2843 self.etree.tostring(root))
2844
2846 xml = ('<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2847 '<sub xmlns:foo="FOO">'
2848 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2849 '<foo>foo:bar</foo>'
2850 '</sub>'
2851 '</root>').encode('utf8')
2852 root = self.etree.fromstring(xml)
2853 self.assertEqual(xml, self.etree.tostring(root))
2854 self.etree.cleanup_namespaces(
2855 root,
2856 top_nsmap={'foo': 'FOO', 'unused1': 'UNUSED'},
2857 keep_ns_prefixes=['foo'])
2858 self.assertEqual(
2859 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2860 b'<sub>'
2861 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2862 b'<foo>foo:bar</foo>'
2863 b'</sub>'
2864 b'</root>',
2865 self.etree.tostring(root))
2866
2868 etree = self.etree
2869
2870 r = {None: 'http://ns.infrae.com/foo',
2871 'hoi': 'http://ns.infrae.com/hoi'}
2872 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2873 self.assertEqual(
2874 r,
2875 e.nsmap)
2876
2878 etree = self.etree
2879
2880 re = {None: 'http://ns.infrae.com/foo',
2881 'hoi': 'http://ns.infrae.com/hoi'}
2882 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2883
2884 rs = {None: 'http://ns.infrae.com/honk',
2885 'top': 'http://ns.infrae.com/top'}
2886 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2887
2888 r = re.copy()
2889 r.update(rs)
2890 self.assertEqual(re, e.nsmap)
2891 self.assertEqual(r, s.nsmap)
2892
2894 etree = self.etree
2895 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2896 self.assertEqual({'hha': None}, el.nsmap)
2897
2899 Element = self.etree.Element
2900 SubElement = self.etree.SubElement
2901
2902 a = Element('a')
2903 b = SubElement(a, 'b')
2904 c = SubElement(a, 'c')
2905 d = SubElement(b, 'd')
2906 e = SubElement(c, 'e')
2907 f = SubElement(c, 'f')
2908
2909 self.assertEqual(
2910 [a, b],
2911 list(a.getiterator('a', 'b')))
2912 self.assertEqual(
2913 [],
2914 list(a.getiterator('x', 'y')))
2915 self.assertEqual(
2916 [a, f],
2917 list(a.getiterator('f', 'a')))
2918 self.assertEqual(
2919 [c, e, f],
2920 list(c.getiterator('c', '*', 'a')))
2921 self.assertEqual(
2922 [],
2923 list(a.getiterator( (), () )))
2924
2926 Element = self.etree.Element
2927 SubElement = self.etree.SubElement
2928
2929 a = Element('a')
2930 b = SubElement(a, 'b')
2931 c = SubElement(a, 'c')
2932 d = SubElement(b, 'd')
2933 e = SubElement(c, 'e')
2934 f = SubElement(c, 'f')
2935
2936 self.assertEqual(
2937 [a, b],
2938 list(a.getiterator( ('a', 'b') )))
2939 self.assertEqual(
2940 [],
2941 list(a.getiterator( ('x', 'y') )))
2942 self.assertEqual(
2943 [a, f],
2944 list(a.getiterator( ('f', 'a') )))
2945 self.assertEqual(
2946 [c, e, f],
2947 list(c.getiterator( ('c', '*', 'a') )))
2948 self.assertEqual(
2949 [],
2950 list(a.getiterator( () )))
2951
2953 Element = self.etree.Element
2954 SubElement = self.etree.SubElement
2955
2956 a = Element('{a}a')
2957 b = SubElement(a, '{a}b')
2958 c = SubElement(a, '{a}c')
2959 d = SubElement(b, '{b}d')
2960 e = SubElement(c, '{a}e')
2961 f = SubElement(c, '{b}f')
2962 g = SubElement(c, 'g')
2963
2964 self.assertEqual(
2965 [a],
2966 list(a.getiterator('{a}a')))
2967 self.assertEqual(
2968 [],
2969 list(a.getiterator('{b}a')))
2970 self.assertEqual(
2971 [],
2972 list(a.getiterator('a')))
2973 self.assertEqual(
2974 [a,b,d,c,e,f,g],
2975 list(a.getiterator('*')))
2976 self.assertEqual(
2977 [f],
2978 list(c.getiterator('{b}*')))
2979 self.assertEqual(
2980 [d, f],
2981 list(a.getiterator('{b}*')))
2982 self.assertEqual(
2983 [g],
2984 list(a.getiterator('g')))
2985 self.assertEqual(
2986 [g],
2987 list(a.getiterator('{}g')))
2988 self.assertEqual(
2989 [g],
2990 list(a.getiterator('{}*')))
2991
2993 Element = self.etree.Element
2994 SubElement = self.etree.SubElement
2995
2996 a = Element('{a}a')
2997 b = SubElement(a, '{nsA}b')
2998 c = SubElement(b, '{nsB}b')
2999 d = SubElement(a, 'b')
3000 e = SubElement(a, '{nsA}e')
3001 f = SubElement(e, '{nsB}e')
3002 g = SubElement(e, 'e')
3003
3004 self.assertEqual(
3005 [b, c, d],
3006 list(a.getiterator('{*}b')))
3007 self.assertEqual(
3008 [e, f, g],
3009 list(a.getiterator('{*}e')))
3010 self.assertEqual(
3011 [a, b, c, d, e, f, g],
3012 list(a.getiterator('{*}*')))
3013
3015 Element = self.etree.Element
3016 Entity = self.etree.Entity
3017 SubElement = self.etree.SubElement
3018
3019 a = Element('a')
3020 b = SubElement(a, 'b')
3021 entity_b = Entity("TEST-b")
3022 b.append(entity_b)
3023
3024 self.assertEqual(
3025 [entity_b],
3026 list(a.getiterator(Entity)))
3027
3028 entity_a = Entity("TEST-a")
3029 a.append(entity_a)
3030
3031 self.assertEqual(
3032 [entity_b, entity_a],
3033 list(a.getiterator(Entity)))
3034
3035 self.assertEqual(
3036 [entity_b],
3037 list(b.getiterator(Entity)))
3038
3040 Element = self.etree.Element
3041 Comment = self.etree.Comment
3042 PI = self.etree.PI
3043 SubElement = self.etree.SubElement
3044
3045 a = Element('a')
3046 b = SubElement(a, 'b')
3047 a.append(Comment("test"))
3048 a.append(PI("pi", "content"))
3049 c = SubElement(a, 'c')
3050
3051 self.assertEqual(
3052 [a, b, c],
3053 list(a.getiterator(Element)))
3054
3056 # ElementTree iterates over everything here
3057 Element = self.etree.Element
3058 Comment = self.etree.Comment
3059 PI = self.etree.PI
3060 SubElement = self.etree.SubElement
3061
3062 a = Element('a')
3063 b = SubElement(a, 'b')
3064 a.append(Comment("test"))
3065 a.append(PI("pi", "content"))
3066 c = SubElement(a, 'c')
3067
3068 self.assertEqual(
3069 [a, b, c],
3070 list(a.getiterator('*')))
3071
3073 a = etree.Element("a")
3074 b = etree.SubElement(a, "b")
3075 c = etree.SubElement(a, "c")
3076 d1 = etree.SubElement(c, "d")
3077 d2 = etree.SubElement(c, "d")
3078 c.text = d1.text = 'TEXT'
3079
3080 tree = etree.ElementTree(a)
3081 self.assertEqual('.', tree.getelementpath(a))
3082 self.assertEqual('c/d[1]', tree.getelementpath(d1))
3083 self.assertEqual('c/d[2]', tree.getelementpath(d2))
3084
3085 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3086 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3087
3088 tree = etree.ElementTree(c)
3089 self.assertEqual('.', tree.getelementpath(c))
3090 self.assertEqual('d[2]', tree.getelementpath(d2))
3091 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3092
3093 tree = etree.ElementTree(b) # not a parent of a/c/d1/d2
3094 self.assertEqual('.', tree.getelementpath(b))
3095 self.assertRaises(ValueError, tree.getelementpath, a)
3096 self.assertRaises(ValueError, tree.getelementpath, c)
3097 self.assertRaises(ValueError, tree.getelementpath, d2)
3098
3100 a = etree.Element("{http://ns1/}a")
3101 b = etree.SubElement(a, "{http://ns1/}b")
3102 c = etree.SubElement(a, "{http://ns1/}c")
3103 d1 = etree.SubElement(c, "{http://ns1/}d")
3104 d2 = etree.SubElement(c, "{http://ns2/}d")
3105 d3 = etree.SubElement(c, "{http://ns1/}d")
3106
3107 tree = etree.ElementTree(a)
3108 self.assertEqual('.', tree.getelementpath(a))
3109 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
3110 tree.getelementpath(d1))
3111 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
3112 tree.getelementpath(d2))
3113 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
3114 tree.getelementpath(d3))
3115
3116 self.assertEqual(a, tree.find(tree.getelementpath(a)))
3117 self.assertEqual(b, tree.find(tree.getelementpath(b)))
3118 self.assertEqual(c, tree.find(tree.getelementpath(c)))
3119 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3120 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3121 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3122
3123 tree = etree.ElementTree(c)
3124 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
3125 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
3126 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
3127 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3128 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3129 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3130
3131 tree = etree.ElementTree(b) # not a parent of d1/d2
3132 self.assertRaises(ValueError, tree.getelementpath, d1)
3133 self.assertRaises(ValueError, tree.getelementpath, d2)
3134
3136 XML = self.etree.XML
3137 ElementTree = self.etree.ElementTree
3138 QName = self.etree.QName
3139 tree = ElementTree(XML(_bytes('<a><b><c/></b><b/><c><b/></c></a>')))
3140 self.assertEqual(tree.find(QName("c")), tree.getroot()[2])
3141
3143 XML = self.etree.XML
3144 ElementTree = self.etree.ElementTree
3145 QName = self.etree.QName
3146 tree = ElementTree(XML(_bytes('<a><b><c/></b><b/><c><b/></c></a>')))
3147 self.assertEqual(len(list(tree.findall(QName("c")))), 1)
3148
3150 XML = self.etree.XML
3151 ElementTree = self.etree.ElementTree
3152 QName = self.etree.QName
3153 tree = ElementTree(XML(
3154 _bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>')))
3155 self.assertEqual(len(list(tree.findall(QName("b")))), 2)
3156 self.assertEqual(len(list(tree.findall(QName("X", "b")))), 1)
3157
3159 XML = self.etree.XML
3160 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
3161 self.assertEqual(len(root.findall(".//{X}b")), 2)
3162 self.assertEqual(len(root.findall(".//{X}*")), 2)
3163 self.assertEqual(len(root.findall(".//b")), 3)
3164
3166 XML = self.etree.XML
3167 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3168 nsmap = {'xx': 'X'}
3169 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3170 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
3171 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3172 nsmap = {'xx': 'Y'}
3173 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3174 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
3175 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3176
3178 XML = self.etree.XML
3179 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3180 nsmap = {'xx': 'X'}
3181 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3182 nsmap = {'xx': 'X', None: 'Y'}
3183 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3184 nsmap = {'xx': 'X', '': 'Y'}
3185 self.assertRaises(ValueError, root.findall, ".//xx:b", namespaces=nsmap)
3186
3188 XML = self.etree.XML
3189 root = XML(_bytes('<a><b><c/></b><b/><c><b/><b/></c><b/></a>'))
3190 self.assertRaises(SyntaxError, root.findall, '')
3191 self.assertRaises(SyntaxError, root.findall, '//') # absolute path on Element
3192 self.assertRaises(SyntaxError, root.findall, './//')
3193
3195 etree = self.etree
3196 e = etree.Element('foo')
3197 for i in range(10):
3198 etree.SubElement(e, 'a%s' % i)
3199 for i in range(10):
3200 self.assertEqual(
3201 i,
3202 e.index(e[i]))
3203 self.assertEqual(
3204 3, e.index(e[3], 3))
3205 self.assertRaises(
3206 ValueError, e.index, e[3], 4)
3207 self.assertRaises(
3208 ValueError, e.index, e[3], 0, 2)
3209 self.assertRaises(
3210 ValueError, e.index, e[8], 0, -3)
3211 self.assertRaises(
3212 ValueError, e.index, e[8], -5, -3)
3213 self.assertEqual(
3214 8, e.index(e[8], 0, -1))
3215 self.assertEqual(
3216 8, e.index(e[8], -12, -1))
3217 self.assertEqual(
3218 0, e.index(e[0], -12, -1))
3219
3221 etree = self.etree
3222 e = etree.Element('foo')
3223 for i in range(10):
3224 el = etree.SubElement(e, 'a%s' % i)
3225 el.text = "text%d" % i
3226 el.tail = "tail%d" % i
3227
3228 child0 = e[0]
3229 child1 = e[1]
3230 child2 = e[2]
3231
3232 e.replace(e[0], e[1])
3233 self.assertEqual(
3234 9, len(e))
3235 self.assertEqual(
3236 child1, e[0])
3237 self.assertEqual(
3238 child1.text, "text1")
3239 self.assertEqual(
3240 child1.tail, "tail1")
3241 self.assertEqual(
3242 child0.tail, "tail0")
3243 self.assertEqual(
3244 child2, e[1])
3245
3246 e.replace(e[-1], e[0])
3247 self.assertEqual(
3248 child1, e[-1])
3249 self.assertEqual(
3250 child1.text, "text1")
3251 self.assertEqual(
3252 child1.tail, "tail1")
3253 self.assertEqual(
3254 child2, e[0])
3255
3257 etree = self.etree
3258 e = etree.Element('foo')
3259 for i in range(10):
3260 etree.SubElement(e, 'a%s' % i)
3261
3262 new_element = etree.Element("test")
3263 new_element.text = "TESTTEXT"
3264 new_element.tail = "TESTTAIL"
3265 child1 = e[1]
3266 e.replace(e[0], new_element)
3267 self.assertEqual(
3268 new_element, e[0])
3269 self.assertEqual(
3270 "TESTTEXT",
3271 e[0].text)
3272 self.assertEqual(
3273 "TESTTAIL",
3274 e[0].tail)
3275 self.assertEqual(
3276 child1, e[1])
3277
3279 Element = self.etree.Element
3280 SubElement = self.etree.SubElement
3281
3282 a = Element('a')
3283
3284 e = Element('e')
3285 f = Element('f')
3286 g = Element('g')
3287
3288 s = [e, f, g]
3289 a[::-1] = s
3290 self.assertEqual(
3291 [g, f, e],
3292 list(a))
3293
3295 Element = self.etree.Element
3296 SubElement = self.etree.SubElement
3297
3298 a = Element('a')
3299 b = SubElement(a, 'b')
3300 c = SubElement(a, 'c')
3301 d = SubElement(a, 'd')
3302 e = SubElement(a, 'e')
3303
3304 x = Element('x')
3305 y = Element('y')
3306
3307 a[1::2] = [x, y]
3308 self.assertEqual(
3309 [b, x, d, y],
3310 list(a))
3311
3313 Element = self.etree.Element
3314 SubElement = self.etree.SubElement
3315
3316 a = Element('a')
3317 b = SubElement(a, 'b')
3318 c = SubElement(a, 'c')
3319 d = SubElement(a, 'd')
3320 e = SubElement(a, 'e')
3321
3322 x = Element('x')
3323 y = Element('y')
3324
3325 a[1::-1] = [x, y]
3326 self.assertEqual(
3327 [y, x, d, e],
3328 list(a))
3329
3331 Element = self.etree.Element
3332 SubElement = self.etree.SubElement
3333
3334 a = Element('a')
3335 b = SubElement(a, 'b')
3336 c = SubElement(a, 'c')
3337 d = SubElement(a, 'd')
3338 e = SubElement(a, 'e')
3339
3340 x = Element('x')
3341 y = Element('y')
3342
3343 a[::-2] = [x, y]
3344 self.assertEqual(
3345 [b, y, d, x],
3346 list(a))
3347
3349 Element = self.etree.Element
3350 SubElement = self.etree.SubElement
3351 try:
3352 slice
3353 except NameError:
3354 print("slice() not found")
3355 return
3356
3357 a = Element('a')
3358 b = SubElement(a, 'b')
3359 c = SubElement(a, 'c')
3360 d = SubElement(a, 'd')
3361 e = SubElement(a, 'e')
3362
3363 x = Element('x')
3364 y = Element('y')
3365 z = Element('z')
3366
3367 self.assertRaises(
3368 ValueError,
3369 operator.setitem, a, slice(1,None,2), [x, y, z])
3370
3371 self.assertEqual(
3372 [b, c, d, e],
3373 list(a))
3374
3376 XML = self.etree.XML
3377 root = XML(_bytes('''<?xml version="1.0"?>
3378 <root><test>
3379
3380 <bla/></test>
3381 </root>
3382 '''))
3383
3384 self.assertEqual(
3385 [2, 2, 4],
3386 [ el.sourceline for el in root.getiterator() ])
3387
3389 XML = self.etree.XML
3390 root = XML(_bytes(
3391 '<?xml version="1.0"?>\n'
3392 '<root>' + '\n' * 65536 +
3393 '<p>' + '\n' * 65536 + '</p>\n' +
3394 '<br/>\n'
3395 '</root>'))
3396
3397 if self.etree.LIBXML_VERSION >= (2, 9):
3398 expected = [2, 131074, 131076]
3399 else:
3400 expected = [2, 65535, 65535]
3401
3402 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3403
3405 parse = self.etree.parse
3406 tree = parse(fileInTestDir('include/test_xinclude.xml'))
3407
3408 self.assertEqual(
3409 [1, 2, 3],
3410 [ el.sourceline for el in tree.getiterator() ])
3411
3413 iterparse = self.etree.iterparse
3414 lines = [ el.sourceline for (event, el) in
3415 iterparse(fileInTestDir('include/test_xinclude.xml')) ]
3416
3417 self.assertEqual(
3418 [2, 3, 1],
3419 lines)
3420
3422 iterparse = self.etree.iterparse
3423 lines = [ el.sourceline for (event, el) in
3424 iterparse(fileInTestDir('include/test_xinclude.xml'),
3425 events=("start",)) ]
3426
3427 self.assertEqual(
3428 [1, 2, 3],
3429 lines)
3430
3432 Element = self.etree.Element
3433 SubElement = self.etree.SubElement
3434 el = Element("test")
3435 self.assertEqual(None, el.sourceline)
3436
3437 child = SubElement(el, "test")
3438 self.assertEqual(None, el.sourceline)
3439 self.assertEqual(None, child.sourceline)
3440
3442 etree = self.etree
3443 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3444 docinfo = root.getroottree().docinfo
3445 self.assertEqual(docinfo.URL, "http://no/such/url")
3446
3448 etree = self.etree
3449 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3450 docinfo = root.getroottree().docinfo
3451 self.assertEqual(docinfo.URL, "http://no/such/url")
3452 docinfo.URL = "https://secret/url"
3453 self.assertEqual(docinfo.URL, "https://secret/url")
3454
3456 etree = self.etree
3457 tree = etree.parse(BytesIO("<root/>"), base_url="http://no/such/url")
3458 docinfo = tree.docinfo
3459 self.assertEqual(docinfo.URL, "http://no/such/url")
3460
3462 etree = self.etree
3463 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3464 base_url="http://no/such/url")
3465 docinfo = tree.docinfo
3466 self.assertEqual(docinfo.URL, "http://no/such/url")
3467
3469 etree = self.etree
3470 root = etree.HTML(_bytes("<html/>"), base_url="http://no/such/url")
3471 docinfo = root.getroottree().docinfo
3472 self.assertEqual(docinfo.URL, "http://no/such/url")
3473
3475 etree = self.etree
3476 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3477 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3478 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3479 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3480
3481 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3482
3483 tree = etree.parse(BytesIO(xml))
3484 docinfo = tree.docinfo
3485 self.assertEqual(docinfo.encoding, "ascii")
3486 self.assertEqual(docinfo.xml_version, "1.0")
3487 self.assertEqual(docinfo.public_id, pub_id)
3488 self.assertEqual(docinfo.system_url, sys_id)
3489 self.assertEqual(docinfo.root_name, 'html')
3490 self.assertEqual(docinfo.doctype, doctype_string)
3491
3493 etree = self.etree
3494 xml_header = '<?xml version="1.0" encoding="UTF-8"?>'
3495 sys_id = "some.dtd"
3496 doctype_string = '<!DOCTYPE html SYSTEM "%s">' % sys_id
3497 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3498
3499 tree = etree.parse(BytesIO(xml))
3500 docinfo = tree.docinfo
3501 self.assertEqual(docinfo.encoding, "UTF-8")
3502 self.assertEqual(docinfo.xml_version, "1.0")
3503 self.assertEqual(docinfo.public_id, None)
3504 self.assertEqual(docinfo.system_url, sys_id)
3505 self.assertEqual(docinfo.root_name, 'html')
3506 self.assertEqual(docinfo.doctype, doctype_string)
3507
3509 etree = self.etree
3510 xml = _bytes('<html><body></body></html>')
3511 tree = etree.parse(BytesIO(xml))
3512 docinfo = tree.docinfo
3513 self.assertEqual(docinfo.encoding, "UTF-8")
3514 self.assertEqual(docinfo.xml_version, "1.0")
3515 self.assertEqual(docinfo.public_id, None)
3516 self.assertEqual(docinfo.system_url, None)
3517 self.assertEqual(docinfo.root_name, 'html')
3518 self.assertEqual(docinfo.doctype, '')
3519
3521 etree = self.etree
3522 xml = _bytes('<!DOCTYPE root><root></root>')
3523 tree = etree.parse(BytesIO(xml))
3524 docinfo = tree.docinfo
3525 self.assertEqual(docinfo.encoding, "UTF-8")
3526 self.assertEqual(docinfo.xml_version, "1.0")
3527 self.assertEqual(docinfo.public_id, None)
3528 self.assertEqual(docinfo.system_url, None)
3529 self.assertEqual(docinfo.root_name, 'root')
3530 self.assertEqual(docinfo.doctype, '<!DOCTYPE root>')
3531
3533 etree = self.etree
3534 xml = _bytes('<!DOCTYPE root>\n<root/>')
3535 tree = etree.parse(BytesIO(xml))
3536 self.assertEqual(xml, etree.tostring(tree))
3537
3539 etree = self.etree
3540 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3541 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3542 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3543
3544 xml = _bytes('<!DOCTYPE root>\n<root/>')
3545 tree = etree.parse(BytesIO(xml))
3546 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3547 etree.tostring(tree, doctype=doctype_string))
3548
3550 etree = self.etree
3551 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3552 self.assertEqual(root.base, "http://no/such/url")
3553 self.assertEqual(
3554 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3555 root.base = "https://secret/url"
3556 self.assertEqual(root.base, "https://secret/url")
3557 self.assertEqual(
3558 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3559 "https://secret/url")
3560
3562 etree = self.etree
3563 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3564 self.assertEqual(root.base, "http://no/such/url")
3565 self.assertEqual(
3566 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3567 root.set('{http://www.w3.org/XML/1998/namespace}base',
3568 "https://secret/url")
3569 self.assertEqual(root.base, "https://secret/url")
3570 self.assertEqual(
3571 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3572 "https://secret/url")
3573
3575 etree = self.etree
3576 root = etree.HTML(_bytes("<html><body></body></html>"),
3577 base_url="http://no/such/url")
3578 self.assertEqual(root.base, "http://no/such/url")
3579
3581 etree = self.etree
3582 root = etree.HTML(_bytes('<html><head><base href="http://no/such/url"></head></html>'))
3583 self.assertEqual(root.base, "http://no/such/url")
3584
3586 # parse from a file object that returns unicode strings
3587 f = LargeFileLikeUnicode()
3588 tree = self.etree.parse(f)
3589 root = tree.getroot()
3590 self.assertTrue(root.tag.endswith('root'))
3591
3593 # check that DTDs that go in also go back out
3594 xml = _bytes('''\
3595 <!DOCTYPE test SYSTEM "test.dtd" [
3596 <!ENTITY entity "tasty">
3597 <!ELEMENT test (a)>
3598 <!ELEMENT a (#PCDATA)>
3599 ]>
3600 <test><a>test-test</a></test>\
3601 ''')
3602 tree = self.etree.parse(BytesIO(xml))
3603 self.assertEqual(self.etree.tostring(tree).replace(_bytes(" "), _bytes("")),
3604 xml.replace(_bytes(" "), _bytes("")))
3605
3607 Element = self.etree.Element
3608
3609 a = Element('a')
3610 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
3611 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
3612
3613 self.assertRaises(ValueError, Element, 'ha\0ho')
3614
3616 Element = self.etree.Element
3617
3618 a = Element('a')
3619 self.assertRaises(ValueError, setattr, a, "text",
3620 _str('ha\0ho'))
3621 self.assertRaises(ValueError, setattr, a, "tail",
3622 _str('ha\0ho'))
3623
3624 self.assertRaises(ValueError, Element,
3625 _str('ha\0ho'))
3626
3628 Element = self.etree.Element
3629
3630 a = Element('a')
3631 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
3632 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
3633
3634 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
3635 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
3636
3637 self.assertRaises(ValueError, Element, 'ha\x07ho')
3638 self.assertRaises(ValueError, Element, 'ha\x02ho')
3639
3641 Element = self.etree.Element
3642
3643 a = Element('a')
3644 self.assertRaises(ValueError, setattr, a, "text",
3645 _str('ha\x07ho'))
3646 self.assertRaises(ValueError, setattr, a, "text",
3647 _str('ha\x02ho'))
3648
3649 self.assertRaises(ValueError, setattr, a, "tail",
3650 _str('ha\x07ho'))
3651 self.assertRaises(ValueError, setattr, a, "tail",
3652 _str('ha\x02ho'))
3653
3654 self.assertRaises(ValueError, Element,
3655 _str('ha\x07ho'))
3656 self.assertRaises(ValueError, Element,
3657 _str('ha\x02ho'))
3658
3660 Element = self.etree.Element
3661
3662 a = Element('a')
3663 self.assertRaises(ValueError, setattr, a, "text",
3664 _str('ha\u1234\x07ho'))
3665 self.assertRaises(ValueError, setattr, a, "text",
3666 _str('ha\u1234\x02ho'))
3667
3668 self.assertRaises(ValueError, setattr, a, "tail",
3669 _str('ha\u1234\x07ho'))
3670 self.assertRaises(ValueError, setattr, a, "tail",
3671 _str('ha\u1234\x02ho'))
3672
3673 self.assertRaises(ValueError, Element,
3674 _str('ha\u1234\x07ho'))
3675 self.assertRaises(ValueError, Element,
3676 _str('ha\u1234\x02ho'))
3677
3679 # ElementTree fails to serialize this
3680 tostring = self.etree.tostring
3681 Element = self.etree.Element
3682 SubElement = self.etree.SubElement
3683
3684 a = Element('a')
3685 b = SubElement(a, 'b')
3686 c = SubElement(a, 'c')
3687
3688 result = tostring(a, encoding='UTF-16')
3689 self.assertEqual(_bytes('<a><b></b><c></c></a>'),
3690 canonicalize(result))
3691
3693 # ElementTree raises an AssertionError here
3694 tostring = self.etree.tostring
3695 self.assertRaises(TypeError, self.etree.tostring, None)
3696
3698 tostring = self.etree.tostring
3699 Element = self.etree.Element
3700 SubElement = self.etree.SubElement
3701
3702 a = Element('a')
3703 b = SubElement(a, 'b')
3704 c = SubElement(a, 'c')
3705
3706 result = tostring(a)
3707 self.assertEqual(result, _bytes("<a><b/><c/></a>"))
3708
3709 result = tostring(a, pretty_print=False)
3710 self.assertEqual(result, _bytes("<a><b/><c/></a>"))
3711
3712 result = tostring(a, pretty_print=True)
3713 self.assertEqual(result, _bytes("<a>\n <b/>\n <c/>\n</a>\n"))
3714
3716 tostring = self.etree.tostring
3717 Element = self.etree.Element
3718 SubElement = self.etree.SubElement
3719
3720 a = Element('a')
3721 a.tail = "aTAIL"
3722 b = SubElement(a, 'b')
3723 b.tail = "bTAIL"
3724 c = SubElement(a, 'c')
3725
3726 result = tostring(a)
3727 self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
3728
3729 result = tostring(a, with_tail=False)
3730 self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>"))
3731
3732 result = tostring(a, with_tail=True)
3733 self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
3734
3736 tostring = self.etree.tostring
3737 html = self.etree.fromstring(
3738 '<html><body>'
3739 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
3740 '</body></html>',
3741 parser=self.etree.HTMLParser())
3742 self.assertEqual(html.tag, 'html')
3743 div = html.find('.//div')
3744 self.assertEqual(div.tail, '\r\n')
3745 result = tostring(div, method='html')
3746 self.assertEqual(
3747 result,
3748 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3749 result = tostring(div, method='html', with_tail=True)
3750 self.assertEqual(
3751 result,
3752 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3753 result = tostring(div, method='html', with_tail=False)
3754 self.assertEqual(
3755 result,
3756 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
3757
3759 tostring = self.etree.tostring
3760 XML = self.etree.XML
3761 ElementTree = self.etree.ElementTree
3762 Element = self.etree.Element
3763
3764 tree = Element("root").getroottree()
3765 self.assertEqual(None, tree.docinfo.standalone)
3766
3767 tree = XML(_bytes("<root/>")).getroottree()
3768 self.assertEqual(None, tree.docinfo.standalone)
3769
3770 tree = XML(_bytes(
3771 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"
3772 )).getroottree()
3773 self.assertEqual(True, tree.docinfo.standalone)
3774
3775 tree = XML(_bytes(
3776 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"
3777 )).getroottree()
3778 self.assertEqual(False, tree.docinfo.standalone)
3779
3781 tostring = self.etree.tostring
3782 XML = self.etree.XML
3783 ElementTree = self.etree.ElementTree
3784
3785 root = XML(_bytes("<root/>"))
3786
3787 tree = ElementTree(root)
3788 self.assertEqual(None, tree.docinfo.standalone)
3789
3790 result = tostring(root, xml_declaration=True, encoding="ASCII")
3791 self.assertEqual(result, _bytes(
3792 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3793
3794 result = tostring(root, xml_declaration=True, encoding="ASCII",
3795 standalone=True)
3796 self.assertEqual(result, _bytes(
3797 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3798
3799 tree = ElementTree(XML(result))
3800 self.assertEqual(True, tree.docinfo.standalone)
3801
3802 result = tostring(root, xml_declaration=True, encoding="ASCII",
3803 standalone=False)
3804 self.assertEqual(result, _bytes(
3805 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3806
3807 tree = ElementTree(XML(result))
3808 self.assertEqual(False, tree.docinfo.standalone)
3809
3811 tostring = self.etree.tostring
3812 XML = self.etree.XML
3813 ElementTree = self.etree.ElementTree
3814
3815 root = XML(_bytes(
3816 "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n<root/>"))
3817
3818 tree = ElementTree(root)
3819 self.assertEqual(True, tree.docinfo.standalone)
3820
3821 result = tostring(root, xml_declaration=True, encoding="ASCII")
3822 self.assertEqual(result, _bytes(
3823 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3824
3825 result = tostring(root, xml_declaration=True, encoding="ASCII",
3826 standalone=True)
3827 self.assertEqual(result, _bytes(
3828 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3829
3831 tostring = self.etree.tostring
3832 Element = self.etree.Element
3833 SubElement = self.etree.SubElement
3834
3835 a = Element('a')
3836 a.text = "A"
3837 a.tail = "tail"
3838 b = SubElement(a, 'b')
3839 b.text = "B"
3840 b.tail = _str("Søk på nettet")
3841 c = SubElement(a, 'c')
3842 c.text = "C"
3843
3844 result = tostring(a, method="text", encoding="UTF-16")
3845
3846 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3847 result)
3848
3850 tostring = self.etree.tostring
3851 Element = self.etree.Element
3852 SubElement = self.etree.SubElement
3853
3854 a = Element('a')
3855 a.text = _str('Søk på nettetA')
3856 a.tail = "tail"
3857 b = SubElement(a, 'b')
3858 b.text = "B"
3859 b.tail = _str('Søk på nettetB')
3860 c = SubElement(a, 'c')
3861 c.text = "C"
3862
3863 self.assertRaises(UnicodeEncodeError,
3864 tostring, a, method="text")
3865
3866 self.assertEqual(
3867 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3868 tostring(a, encoding="UTF-8", method="text"))
3869
3871 tounicode = self.etree.tounicode
3872 Element = self.etree.Element
3873 SubElement = self.etree.SubElement
3874
3875 a = Element('a')
3876 b = SubElement(a, 'b')
3877 c = SubElement(a, 'c')
3878
3879 self.assertTrue(isinstance(tounicode(a), _unicode))
3880 self.assertEqual(_bytes('<a><b></b><c></c></a>'),
3881 canonicalize(tounicode(a)))
3882
3884 tounicode = self.etree.tounicode
3885 Element = self.etree.Element
3886 SubElement = self.etree.SubElement
3887
3888 a = Element('a')
3889 b = SubElement(a, 'b')
3890 c = SubElement(a, 'c')
3891 d = SubElement(c, 'd')
3892 self.assertTrue(isinstance(tounicode(b), _unicode))
3893 self.assertTrue(isinstance(tounicode(c), _unicode))
3894 self.assertEqual(_bytes('<b></b>'),
3895 canonicalize(tounicode(b)))
3896 self.assertEqual(_bytes('<c><d></d></c>'),
3897 canonicalize(tounicode(c)))
3898
3902
3904 tounicode = self.etree.tounicode
3905 Element = self.etree.Element
3906 SubElement = self.etree.SubElement
3907
3908 a = Element('a')
3909 b = SubElement(a, 'b')
3910 c = SubElement(a, 'c')
3911 d = SubElement(c, 'd')
3912 b.tail = 'Foo'
3913
3914 self.assertTrue(isinstance(tounicode(b), _unicode))
3915 self.assertTrue(tounicode(b) == '<b/>Foo' or
3916 tounicode(b) == '<b />Foo')
3917
3919 tounicode = self.etree.tounicode
3920 Element = self.etree.Element
3921 SubElement = self.etree.SubElement
3922
3923 a = Element('a')
3924 b = SubElement(a, 'b')
3925 c = SubElement(a, 'c')
3926
3927 result = tounicode(a)
3928 self.assertEqual(result, "<a><b/><c/></a>")
3929
3930 result = tounicode(a, pretty_print=False)
3931 self.assertEqual(result, "<a><b/><c/></a>")
3932
3933 result = tounicode(a, pretty_print=True)
3934 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
3935
3937 tostring = self.etree.tostring
3938 Element = self.etree.Element
3939 SubElement = self.etree.SubElement
3940
3941 a = Element('a')
3942 b = SubElement(a, 'b')
3943 c = SubElement(a, 'c')
3944
3945 self.assertTrue(isinstance(tostring(a, encoding=_unicode), _unicode))
3946 self.assertEqual(_bytes('<a><b></b><c></c></a>'),
3947 canonicalize(tostring(a, encoding=_unicode)))
3948
3950 tostring = self.etree.tostring
3951 Element = self.etree.Element
3952 SubElement = self.etree.SubElement
3953
3954 a = Element('a')
3955 b = SubElement(a, 'b')
3956 c = SubElement(a, 'c')
3957 d = SubElement(c, 'd')
3958 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3959 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
3960 self.assertEqual(_bytes('<b></b>'),
3961 canonicalize(tostring(b, encoding=_unicode)))
3962 self.assertEqual(_bytes('<c><d></d></c>'),
3963 canonicalize(tostring(c, encoding=_unicode)))
3964
3966 tostring = self.etree.tostring
3967 self.assertRaises(TypeError, self.etree.tostring,
3968 None, encoding=_unicode)
3969
3971 tostring = self.etree.tostring
3972 Element = self.etree.Element
3973 SubElement = self.etree.SubElement
3974
3975 a = Element('a')
3976 b = SubElement(a, 'b')
3977 c = SubElement(a, 'c')
3978 d = SubElement(c, 'd')
3979 b.tail = 'Foo'
3980
3981 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3982 self.assertTrue(tostring(b, encoding=_unicode) == '<b/>Foo' or
3983 tostring(b, encoding=_unicode) == '<b />Foo')
3984
3986 tostring = self.etree.tostring
3987 Element = self.etree.Element
3988 SubElement = self.etree.SubElement
3989
3990 a = Element('a')
3991 b = SubElement(a, 'b')
3992 c = SubElement(a, 'c')
3993
3994 result = tostring(a, encoding=_unicode)
3995 self.assertEqual(result, "<a><b/><c/></a>")
3996
3997 result = tostring(a, encoding=_unicode, pretty_print=False)
3998 self.assertEqual(result, "<a><b/><c/></a>")
3999
4000 result = tostring(a, encoding=_unicode, pretty_print=True)
4001 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
4002
4004 root = etree.Element('parent')
4005 etree.SubElement(root, 'child')
4006
4007 self.assertEqual(len(root), 1)
4008 self.assertEqual(root[0].tag, 'child')
4009
4010 # in PyPy, GC used to kill the Python proxy instance without cleanup
4011 gc.collect()
4012 self.assertEqual(len(root), 1)
4013 self.assertEqual(root[0].tag, 'child')
4014
4018
4019 el1 = SubEl()
4020 el2 = SubEl()
4021 self.assertEqual('SubEl', el1.tag)
4022 self.assertEqual('SubEl', el2.tag)
4023 el1.other = el2
4024 el2.other = el1
4025
4026 del el1, el2
4027 gc.collect()
4028 # not really testing anything here, but it shouldn't crash
4029
4031 root = etree.Element('parent')
4032 c1 = etree.SubElement(root, 'child1')
4033 c2 = etree.SubElement(root, 'child2')
4034
4035 root.remove(c1)
4036 root.remove(c2)
4037 c1.addnext(c2)
4038 del c1
4039 # trigger deallocation attempt of c1
4040 c2.getprevious()
4041 # make sure it wasn't deallocated
4042 self.assertEqual('child1', c2.getprevious().tag)
4043
4045 root = etree.Element('parent')
4046 c1 = etree.SubElement(root, 'child1')
4047 c2 = etree.SubElement(root, 'child2')
4048
4049 root.remove(c1)
4050 root.remove(c2)
4051 c1.addnext(c2)
4052 c1.tail = 'abc'
4053 c2.tail = 'xyz'
4054 del c1
4055 # trigger deallocation attempt of c1
4056 c2.getprevious()
4057 # make sure it wasn't deallocated
4058 self.assertEqual('child1', c2.getprevious().tag)
4059 self.assertEqual('abc', c2.getprevious().tail)
4060
4061 # helper methods
4062
4064 """Write out element for comparison.
4065 """
4066 ElementTree = self.etree.ElementTree
4067 f = BytesIO()
4068 tree = ElementTree(element=element)
4069 tree.write(f, encoding=encoding, compression=compression)
4070 data = f.getvalue()
4071 if compression:
4072 data = zlib.decompress(data)
4073 return canonicalize(data)
4074
4078 filename = fileInTestDir('test_broken.xml')
4079 root = etree.XML(_bytes('''\
4080 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4081 <xi:include href="%s" parse="text"/>
4082 </doc>
4083 ''' % path2url(filename)))
4084 old_text = root.text
4085 content = read_file(filename)
4086 old_tail = root[0].tail
4087
4088 self.include( etree.ElementTree(root) )
4089 self.assertEqual(old_text + content + old_tail,
4090 root.text)
4091
4093 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'))
4094 self.assertNotEqual(
4095 'a',
4096 tree.getroot()[1].tag)
4097 # process xincludes
4098 self.include( tree )
4099 # check whether we find it replaced with included data
4100 self.assertEqual(
4101 'a',
4102 tree.getroot()[1].tag)
4103
4105 class res(etree.Resolver):
4106 include_text = read_file(fileInTestDir('test.xml'))
4107 called = {}
4108 def resolve(self, url, id, context):
4109 if url.endswith(".dtd"):
4110 self.called["dtd"] = True
4111 return self.resolve_filename(
4112 fileInTestDir('test.dtd'), context)
4113 elif url.endswith("test_xinclude.xml"):
4114 self.called["input"] = True
4115 return None # delegate to default resolver
4116 else:
4117 self.called["include"] = True
4118 return self.resolve_string(self.include_text, context)
4119
4120 res_instance = res()
4121 parser = etree.XMLParser(load_dtd = True)
4122 parser.resolvers.add(res_instance)
4123
4124 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4125 parser = parser)
4126
4127 self.include(tree)
4128
4129 called = list(res_instance.called.items())
4130 called.sort()
4131 self.assertEqual(
4132 [("dtd", True), ("include", True), ("input", True)],
4133 called)
4134
4136 data = textwrap.dedent('''
4137 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4138 <foo/>
4139 <xi:include href="./test.xml" />
4140 </doc>
4141 ''')
4142
4143 class Resolver(etree.Resolver):
4144 called = {}
4145
4146 def resolve(self, url, id, context):
4147 if url.endswith("test_xinclude.xml"):
4148 assert not self.called.get("input")
4149 self.called["input"] = True
4150 return None # delegate to default resolver
4151 elif url.endswith('/test5.xml'):
4152 assert not self.called.get("DONE")
4153 self.called["DONE"] = True
4154 return self.resolve_string('<DONE/>', context)
4155 else:
4156 _, filename = url.rsplit('/', 1)
4157 assert not self.called.get(filename)
4158 self.called[filename] = True
4159 next_data = data.replace(
4160 'test.xml', 'test%d.xml' % len(self.called))
4161 return self.resolve_string(next_data, context)
4162
4163 res_instance = Resolver()
4164 parser = etree.XMLParser(load_dtd=True)
4165 parser.resolvers.add(res_instance)
4166
4167 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4168 parser=parser)
4169
4170 self.include(tree)
4171
4172 called = list(res_instance.called.items())
4173 called.sort()
4174 self.assertEqual(
4175 [("DONE", True), ("input", True), ("test.xml", True),
4176 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
4177 called)
4178
4183
4189
4193 tree = self.parse(_bytes('<a><b/></a>'))
4194 f = BytesIO()
4195 tree.write_c14n(f)
4196 s = f.getvalue()
4197 self.assertEqual(_bytes('<a><b></b></a>'),
4198 s)
4199
4201 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4202 f = BytesIO()
4203 tree.write_c14n(f, compression=9)
4204 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4205 s = gzfile.read()
4206 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4207 s)
4208
4210 tree = self.parse(_bytes('<a><b/></a>'))
4211 with tmpfile() as filename:
4212 tree.write_c14n(filename)
4213 data = read_file(filename, 'rb')
4214 self.assertEqual(_bytes('<a><b></b></a>'),
4215 data)
4216
4218 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4219 with tmpfile() as filename:
4220 tree.write_c14n(filename, compression=9)
4221 with closing(gzip.open(filename, 'rb')) as f:
4222 data = f.read()
4223 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4224 data)
4225
4227 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4228 f = BytesIO()
4229 tree.write_c14n(f)
4230 s = f.getvalue()
4231 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4232 s)
4233 f = BytesIO()
4234 tree.write_c14n(f, with_comments=True)
4235 s = f.getvalue()
4236 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4237 s)
4238 f = BytesIO()
4239 tree.write_c14n(f, with_comments=False)
4240 s = f.getvalue()
4241 self.assertEqual(_bytes('<a><b></b></a>'),
4242 s)
4243
4245 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4246 s = etree.tostring(tree, method='c14n')
4247 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4248 s)
4249 s = etree.tostring(tree, method='c14n', with_comments=True)
4250 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4251 s)
4252 s = etree.tostring(tree, method='c14n', with_comments=False)
4253 self.assertEqual(_bytes('<a><b></b></a>'),
4254 s)
4255
4257 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4258 s = etree.tostring(tree.getroot(), method='c14n')
4259 self.assertEqual(_bytes('<a><!--ho--><b></b></a>'),
4260 s)
4261 s = etree.tostring(tree.getroot(), method='c14n', with_comments=True)
4262 self.assertEqual(_bytes('<a><!--ho--><b></b></a>'),
4263 s)
4264 s = etree.tostring(tree.getroot(), method='c14n', with_comments=False)
4265 self.assertEqual(_bytes('<a><b></b></a>'),
4266 s)
4267
4269 tree = self.parse(_bytes(
4270 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4271 f = BytesIO()
4272 tree.write_c14n(f)
4273 s = f.getvalue()
4274 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4275 s)
4276 f = BytesIO()
4277 tree.write_c14n(f, exclusive=False)
4278 s = f.getvalue()
4279 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4280 s)
4281 f = BytesIO()
4282 tree.write_c14n(f, exclusive=True)
4283 s = f.getvalue()
4284 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4285 s)
4286
4287 f = BytesIO()
4288 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
4289 s = f.getvalue()
4290 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
4291 s)
4292
4294 tree = self.parse(_bytes(
4295 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4296 s = etree.tostring(tree, method='c14n')
4297 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4298 s)
4299 s = etree.tostring(tree, method='c14n', exclusive=False)
4300 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4301 s)
4302 s = etree.tostring(tree, method='c14n', exclusive=True)
4303 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4304 s)
4305
4306 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4307 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
4308 s)
4309
4311 tree = self.parse(_bytes(
4312 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4313 s = etree.tostring(tree.getroot(), method='c14n')
4314 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4315 s)
4316 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
4317 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4318 s)
4319 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
4320 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4321 s)
4322
4323 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
4324 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4325 s)
4326 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
4327 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
4328 s)
4329
4330 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4331 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4332 s)
4333
4335 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
4336 tree = self.parse(_bytes(
4337 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4338
4339 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
4340 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4341 s)
4342
4346 tree = self.parse(_bytes('<a><b/></a>'))
4347 f = BytesIO()
4348 tree.write(f)
4349 s = f.getvalue()
4350 self.assertEqual(_bytes('<a><b/></a>'),
4351 s)
4352
4354 tree = self.parse(_bytes('<a><b/></a>'))
4355 f = BytesIO()
4356 tree.write(f, doctype='HUHU')
4357 s = f.getvalue()
4358 self.assertEqual(_bytes('HUHU\n<a><b/></a>'),
4359 s)
4360
4362 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4363 f = BytesIO()
4364 tree.write(f, compression=9)
4365 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4366 s = gzfile.read()
4367 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4368 s)
4369
4371 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4372 f = BytesIO()
4373 tree.write(f, compression=9, doctype='<!DOCTYPE a>')
4374 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4375 s = gzfile.read()
4376 self.assertEqual(_bytes('<!DOCTYPE a>\n<a>'+'<b/>'*200+'</a>'),
4377 s)
4378
4380 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4381 f = BytesIO()
4382 tree.write(f, compression=0)
4383 s0 = f.getvalue()
4384
4385 f = BytesIO()
4386 tree.write(f)
4387 self.assertEqual(f.getvalue(), s0)
4388
4389 f = BytesIO()
4390 tree.write(f, compression=1)
4391 s = f.getvalue()
4392 self.assertTrue(len(s) <= len(s0))
4393 with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
4394 s1 = gzfile.read()
4395
4396 f = BytesIO()
4397 tree.write(f, compression=9)
4398 s = f.getvalue()
4399 self.assertTrue(len(s) <= len(s0))
4400 with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
4401 s9 = gzfile.read()
4402
4403 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4404 s0)
4405 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4406 s1)
4407 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4408 s9)
4409
4411 tree = self.parse(_bytes('<a><b/></a>'))
4412 with tmpfile() as filename:
4413 tree.write(filename)
4414 data = read_file(filename, 'rb')
4415 self.assertEqual(_bytes('<a><b/></a>'),
4416 data)
4417
4419 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4420 with tmpfile() as filename:
4421 tree.write(filename, compression=9)
4422 with closing(gzip.open(filename, 'rb')) as f:
4423 data = f.read()
4424 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4425 data)
4426
4428 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4429 with tmpfile() as filename:
4430 tree.write(filename, compression=9)
4431 data = etree.tostring(etree.parse(filename))
4432 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4433 data)
4434
4443
4446 etree = etree
4447
4449 parse = self.etree.parse
4450 f = BytesIO('<a><b></c></b></a>')
4451 self.etree.clear_error_log()
4452 try:
4453 parse(f)
4454 logs = None
4455 except SyntaxError:
4456 e = sys.exc_info()[1]
4457 logs = e.error_log
4458 f.close()
4459 self.assertTrue([ log for log in logs
4460 if 'mismatch' in log.message ])
4461 self.assertTrue([ log for log in logs
4462 if 'PARSER' in log.domain_name])
4463 self.assertTrue([ log for log in logs
4464 if 'ERR_TAG_NAME_MISMATCH' in log.type_name ])
4465 self.assertTrue([ log for log in logs
4466 if 1 == log.line ])
4467 self.assertTrue([ log for log in logs
4468 if 15 == log.column ])
4469
4480
4481 self.etree.use_global_python_log(Logger())
4482 f = BytesIO('<a><b></c></b></a>')
4483 try:
4484 parse(f)
4485 except SyntaxError:
4486 pass
4487 f.close()
4488
4489 self.assertTrue([ message for message in messages
4490 if 'mismatch' in message ])
4491 self.assertTrue([ message for message in messages
4492 if ':PARSER:' in message])
4493 self.assertTrue([ message for message in messages
4494 if ':ERR_TAG_NAME_MISMATCH:' in message ])
4495 self.assertTrue([ message for message in messages
4496 if ':1:15:' in message ])
4497
4512 def close(self):
4513 return 'close()'
4514
4515 parser = self.etree.XMLPullParser(target=Target())
4516 events = parser.read_events()
4517
4518 parser.feed('<root><element>')
4519 self.assertFalse(list(events))
4520 self.assertFalse(list(events))
4521 parser.feed('</element><child>')
4522 self.assertEqual([('end', 'end(element)')], list(events))
4523 parser.feed('</child>')
4524 self.assertEqual([('end', 'end(child)')], list(events))
4525 parser.feed('</root>')
4526 self.assertEqual([('end', 'end(root)')], list(events))
4527 self.assertFalse(list(events))
4528 self.assertEqual('close()', parser.close())
4529
4534 def end(self, tag):
4535 return 'end(%s)' % tag
4536 def close(self):
4537 return 'close()'
4538
4539 parser = self.etree.XMLPullParser(
4540 ['start', 'end'], target=Target())
4541 events = parser.read_events()
4542
4543 parser.feed('<root><element>')
4544 self.assertEqual(
4545 [('start', 'start(root)'), ('start', 'start(element)')],
4546 list(events))
4547 self.assertFalse(list(events))
4548 parser.feed('</element><child>')
4549 self.assertEqual(
4550 [('end', 'end(element)'), ('start', 'start(child)')],
4551 list(events))
4552 parser.feed('</child>')
4553 self.assertEqual(
4554 [('end', 'end(child)')],
4555 list(events))
4556 parser.feed('</root>')
4557 self.assertEqual(
4558 [('end', 'end(root)')],
4559 list(events))
4560 self.assertFalse(list(events))
4561 self.assertEqual('close()', parser.close())
4562
4564 parser = self.etree.XMLPullParser(
4565 ['start', 'end'], target=etree.TreeBuilder())
4566 events = parser.read_events()
4567
4568 parser.feed('<root><element>')
4569 self.assert_event_tags(
4570 events, [('start', 'root'), ('start', 'element')])
4571 self.assertFalse(list(events))
4572 parser.feed('</element><child>')
4573 self.assert_event_tags(
4574 events, [('end', 'element'), ('start', 'child')])
4575 parser.feed('</child>')
4576 self.assert_event_tags(
4577 events, [('end', 'child')])
4578 parser.feed('</root>')
4579 self.assert_event_tags(
4580 events, [('end', 'root')])
4581 self.assertFalse(list(events))
4582 root = parser.close()
4583 self.assertEqual('root', root.tag)
4584
4586 class Target(etree.TreeBuilder):
4587 def end(self, tag):
4588 el = super(Target, self).end(tag)
4589 el.tag += '-huhu'
4590 return el
4591
4592 parser = self.etree.XMLPullParser(
4593 ['start', 'end'], target=Target())
4594 events = parser.read_events()
4595
4596 parser.feed('<root><element>')
4597 self.assert_event_tags(
4598 events, [('start', 'root'), ('start', 'element')])
4599 self.assertFalse(list(events))
4600 parser.feed('</element><child>')
4601 self.assert_event_tags(
4602 events, [('end', 'element-huhu'), ('start', 'child')])
4603 parser.feed('</child>')
4604 self.assert_event_tags(
4605 events, [('end', 'child-huhu')])
4606 parser.feed('</root>')
4607 self.assert_event_tags(
4608 events, [('end', 'root-huhu')])
4609 self.assertFalse(list(events))
4610 root = parser.close()
4611 self.assertEqual('root-huhu', root.tag)
4612
4615 suite = unittest.TestSuite()
4616 suite.addTests([unittest.makeSuite(ETreeOnlyTestCase)])
4617 suite.addTests([unittest.makeSuite(ETreeXIncludeTestCase)])
4618 suite.addTests([unittest.makeSuite(ElementIncludeTestCase)])
4619 suite.addTests([unittest.makeSuite(ETreeC14NTestCase)])
4620 suite.addTests([unittest.makeSuite(ETreeWriteTestCase)])
4621 suite.addTests([unittest.makeSuite(ETreeErrorLogTest)])
4622 suite.addTests([unittest.makeSuite(XMLPullParserTest)])
4623
4624 # add original doctests from ElementTree selftest modules
4625 from . import selftest, selftest2
4626 suite.addTests(doctest.DocTestSuite(selftest))
4627 suite.addTests(doctest.DocTestSuite(selftest2))
4628
4629 # add doctests
4630 suite.addTests(doctest.DocTestSuite(etree))
4631 suite.addTests(
4632 [make_doctest('../../../doc/tutorial.txt')])
4633 suite.addTests(
4634 [make_doctest('../../../doc/api.txt')])
4635 suite.addTests(
4636 [make_doctest('../../../doc/FAQ.txt')])
4637 suite.addTests(
4638 [make_doctest('../../../doc/parsing.txt')])
4639 suite.addTests(
4640 [make_doctest('../../../doc/resolvers.txt')])
4641 return suite
4642
4643
4644 if __name__ == '__main__':
4645 print('to test use test.py %s' % __file__)
4646
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Fri Mar 1 01:51:11 2019 | http://epydoc.sourceforge.net |