| Home | Trees | Indices | Help |
|
|---|
|
|
1 # -*- coding: utf-8 -*-
2 from __future__ import absolute_import
3
4 import unittest
5 import sys
6
7 from .common_imports import StringIO, etree, HelperTestCase, _str, _bytes, _chr
8
9 try:
10 unicode
11 except NameError:
12 unicode = str
13
14 ascii_uni = _bytes('a').decode('utf8')
15
16 klingon = _bytes("\\uF8D2").decode("unicode_escape") # not valid for XML names
17
18 invalid_tag = _bytes("test").decode('utf8') + klingon
19
20 uni = _bytes('\\xc3\\u0680\\u3120').decode("unicode_escape") # some non-ASCII characters
21
22 uxml = _bytes("<test><title>test \\xc3\\xa1\\u3120</title><h1>page \\xc3\\xa1\\u3120 title</h1></test>"
23 ).decode("unicode_escape")
24
25
28 # test the testing framework, namely _str from common_imports
29 self.assertEqual(_str('\x10'), _str('\u0010'))
30 self.assertEqual(_str('\x10'), _str('\U00000010'))
31 self.assertEqual(_str('\u1234'), _str('\U00001234'))
32
36
38 if sys.maxunicode < 1114111:
39 return # skip test
40 tree = etree.XML(_bytes('<p>\\U00026007</p>').decode('unicode_escape'))
41 self.assertEqual(1, len(tree.text))
42 self.assertEqual(_bytes('\\U00026007').decode('unicode_escape'),
43 tree.text)
44
46 uxml = ('<?xml version="1.0" encoding="UTF-8"?>' +
47 '<p>%s</p>' % uni)
48 self.assertRaises(ValueError, etree.XML, uxml)
49
53
57
62
64 # namespace URIs must conform to RFC 3986
65 tag = "{http://%s/}abc" % uni
66 self.assertRaises(ValueError, etree.Element, tag)
67
69 # sadly, Klingon is not well-formed
70 tag = "{http://abc/}%s" % invalid_tag
71 self.assertRaises(ValueError, etree.Element, tag)
72
74 qname = etree.QName(uni, uni)
75 tag = "{%s}%s" % (uni, uni)
76 self.assertEqual(qname.text, tag)
77 self.assertEqual(unicode(qname), tag)
78
81
85
89
94
98
102
106
112
113 self.assertRaises(ValueError, settext, _str('ab\ufffe'))
114 self.assertRaises(ValueError, settext, _str('ö\ffff'))
115 self.assertRaises(ValueError, settext, _str('\u0123\ud800'))
116 self.assertRaises(ValueError, settext, _str('x\ud8ff'))
117 self.assertRaises(ValueError, settext, _str('\U00010000\udfff'))
118 self.assertRaises(ValueError, settext, _str('abd\x00def'))
119 # should not Raise
120 settext(_str('\ud7ff\ue000\U00010000\U0010FFFFäöas'))
121
122 for char_val in range(0xD800, 0xDFFF+1):
123 self.assertRaises(ValueError, settext, 'abc' + _chr(char_val))
124 self.assertRaises(ValueError, settext, _chr(char_val))
125 self.assertRaises(ValueError, settext, _chr(char_val) + 'abc')
126
127 self.assertRaises(ValueError, settext, _bytes('\xe4'))
128 self.assertRaises(ValueError, settext, _bytes('\x80'))
129 self.assertRaises(ValueError, settext, _bytes('\xff'))
130 self.assertRaises(ValueError, settext, _bytes('\x08'))
131 self.assertRaises(ValueError, settext, _bytes('\x19'))
132 self.assertRaises(ValueError, settext, _bytes('\x20\x00'))
133 # should not Raise
134 settext(_bytes('\x09\x0A\x0D\x20\x60\x7f'))
135
140
141 self.assertRaises(ValueError, el, ':')
142 self.assertRaises(ValueError, el, '0a')
143 self.assertRaises(ValueError, el, _str('\u203f'))
144 # should not Raise
145 el(_str('\u0132'))
146
147
148
152
153 ## def test_parse_fileobject_unicode(self):
154 ## # parse unicode from unnamed file object (not supported by ElementTree)
155 ## f = SillyFileLike(uxml)
156 ## root = etree.parse(f).getroot()
157 ## self.assertEqual(unicode(etree.tostring(root, 'UTF-8'), 'UTF-8'),
158 ## uxml)
159
160
163 data = _bytes('<test>\x80\x80\x80</test>', encoding='iso8859-1')
164 self.assertRaises(etree.XMLSyntaxError, etree.fromstring, data)
165
167 data = _bytes('<test>\x80\x80\x80</test>', encoding='iso8859-1')
168 parser = etree.XMLParser(recover=True)
169 self.assertRaises(etree.XMLSyntaxError, etree.fromstring, data, parser)
170
172 foo = """<?xml version='1.0' encoding='%s'?>\n<tag attrib='123'></tag>""" % (
173 xml_encoding_name or encoding)
174 root = etree.fromstring(foo.encode(encoding))
175 self.assertEqual('tag', root.tag)
176
177 doc_encoding = root.getroottree().docinfo.encoding
178 self.assertTrue(
179 doc_encoding.lower().rstrip('lbe'),
180 (xml_encoding_name or encoding).lower().rstrip('lbe'))
181
183 self._test_encoding('utf-8')
184
186 self._test_encoding('utf_8_sig', 'utf-8')
187
189 self._test_encoding('utf-16')
190
192 self._test_encoding('utf-16le', 'utf-16')
193
195 self._test_encoding('utf-16be', 'utf-16')
196
198 self._test_encoding('utf-32', 'utf-32')
199
201 self._test_encoding('utf-32le', 'utf-32')
202
204 self._test_encoding('utf-32be', 'utf-32')
205
206
208 suite = unittest.TestSuite()
209 suite.addTests([unittest.makeSuite(UnicodeTestCase)])
210 suite.addTests([unittest.makeSuite(EncodingsTestCase)])
211 return suite
212
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Sat Jul 11 05:50:06 2020 | http://epydoc.sourceforge.net |