| Home | Trees | Indices | Help |
|
|---|
|
|
1 # -*- coding: utf-8 -*-
2 import unittest
3 import sys
4 import os.path
5
6 this_dir = os.path.dirname(__file__)
7 if this_dir not in sys.path:
8 sys.path.insert(0, this_dir) # needed for Py3
9
10 from common_imports import StringIO, etree, SillyFileLike, HelperTestCase
11 from common_imports import _str, _bytes, _chr
12
13 try:
14 unicode
15 except NameError:
16 unicode = str
17
18 ascii_uni = _bytes('a').decode('utf8')
19
20 klingon = _bytes("\\uF8D2").decode("unicode_escape") # not valid for XML names
21
22 invalid_tag = _bytes("test").decode('utf8') + klingon
23
24 uni = _bytes('\\xc3\\u0680\\u3120').decode("unicode_escape") # some non-ASCII characters
25
26 uxml = _bytes("<test><title>test \\xc3\\xa1\\u3120</title><h1>page \\xc3\\xa1\\u3120 title</h1></test>"
27 ).decode("unicode_escape")
28
29
32 # test the testing framework, namely _str from common_imports
33 self.assertEqual(_str('\x10'), _str('\u0010'))
34 self.assertEqual(_str('\x10'), _str('\U00000010'))
35 self.assertEqual(_str('\u1234'), _str('\U00001234'))
36
40
42 if sys.maxunicode < 1114111:
43 return # skip test
44 tree = etree.XML(_bytes('<p>\\U00026007</p>').decode('unicode_escape'))
45 self.assertEqual(1, len(tree.text))
46 self.assertEqual(_bytes('\\U00026007').decode('unicode_escape'),
47 tree.text)
48
50 uxml = ('<?xml version="1.0" encoding="UTF-8"?>' +
51 '<p>%s</p>' % uni)
52 self.assertRaises(ValueError, etree.XML, uxml)
53
57
61
66
68 # namespace URIs must conform to RFC 3986
69 tag = "{http://%s/}abc" % uni
70 self.assertRaises(ValueError, etree.Element, tag)
71
73 # sadly, Klingon is not well-formed
74 tag = "{http://abc/}%s" % invalid_tag
75 self.assertRaises(ValueError, etree.Element, tag)
76
78 qname = etree.QName(uni, uni)
79 tag = "{%s}%s" % (uni, uni)
80 self.assertEqual(qname.text, tag)
81 self.assertEqual(unicode(qname), tag)
82
85
89
93
98
102
106
110
116
117 self.assertRaises(ValueError, settext, _str('ab\ufffe'))
118 self.assertRaises(ValueError, settext, _str('ö\ffff'))
119 self.assertRaises(ValueError, settext, _str('\u0123\ud800'))
120 self.assertRaises(ValueError, settext, _str('x\ud8ff'))
121 self.assertRaises(ValueError, settext, _str('\U00010000\udfff'))
122 self.assertRaises(ValueError, settext, _str('abd\x00def'))
123 # should not Raise
124 settext(_str('\ud7ff\ue000\U00010000\U0010FFFFäöas'))
125
126 for char_val in range(0xD800, 0xDFFF+1):
127 self.assertRaises(ValueError, settext, 'abc' + _chr(char_val))
128 self.assertRaises(ValueError, settext, _chr(char_val))
129 self.assertRaises(ValueError, settext, _chr(char_val) + 'abc')
130
131 self.assertRaises(ValueError, settext, _bytes('\xe4'))
132 self.assertRaises(ValueError, settext, _bytes('\x80'))
133 self.assertRaises(ValueError, settext, _bytes('\xff'))
134 self.assertRaises(ValueError, settext, _bytes('\x08'))
135 self.assertRaises(ValueError, settext, _bytes('\x19'))
136 self.assertRaises(ValueError, settext, _bytes('\x20\x00'))
137 # should not Raise
138 settext(_bytes('\x09\x0A\x0D\x20\x60\x7f'))
139
144
145 self.assertRaises(ValueError, el, ':')
146 self.assertRaises(ValueError, el, '0a')
147 self.assertRaises(ValueError, el, _str('\u203f'))
148 # should not Raise
149 el(_str('\u0132'))
150
151
152
156
157 ## def test_parse_fileobject_unicode(self):
158 ## # parse unicode from unamed file object (not support by ElementTree)
159 ## f = SillyFileLike(uxml)
160 ## root = etree.parse(f).getroot()
161 ## self.assertEqual(unicode(etree.tostring(root, 'UTF-8'), 'UTF-8'),
162 ## uxml)
163
164
167 data = _bytes('<test>\x80\x80\x80</test>', encoding='iso8859-1')
168 self.assertRaises(etree.XMLSyntaxError, etree.fromstring, data)
169
171 data = _bytes('<test>\x80\x80\x80</test>', encoding='iso8859-1')
172 parser = etree.XMLParser(recover=True)
173 self.assertRaises(etree.XMLSyntaxError, etree.fromstring, data, parser)
174
176 foo = """<?xml version='1.0' encoding='%s'?>\n<tag attrib='123'></tag>""" % (
177 xml_encoding_name or encoding)
178 root = etree.fromstring(foo.encode(encoding))
179 self.assertEqual('tag', root.tag)
180
181 doc_encoding = root.getroottree().docinfo.encoding
182 self.assertTrue(
183 doc_encoding.lower().rstrip('lbe'),
184 (xml_encoding_name or encoding).lower().rstrip('lbe'))
185
187 self._test_encoding('utf-8')
188
190 self._test_encoding('utf_8_sig', 'utf-8')
191
193 self._test_encoding('utf-16')
194
196 self._test_encoding('utf-16le', 'utf-16')
197
199 self._test_encoding('utf-16be', 'utf-16')
200
202 self._test_encoding('utf-32', 'utf-32')
203
205 self._test_encoding('utf-32le', 'utf-32')
206
208 self._test_encoding('utf-32be', 'utf-32')
209
210
212 suite = unittest.TestSuite()
213 suite.addTests([unittest.makeSuite(UnicodeTestCase)])
214 suite.addTests([unittest.makeSuite(EncodingsTestCase)])
215 return suite
216
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Fri Nov 24 17:06:57 2017 | http://epydoc.sourceforge.net |