| Home | Trees | Indices | Help | 
 | 
|---|
|  | 
  1  # -*- coding: utf-8 -*- 
  2  import unittest 
  3  import sys 
  4  import os.path 
  5   
  6  this_dir = os.path.dirname(__file__) 
  7  if this_dir not in sys.path: 
  8      sys.path.insert(0, this_dir)  # needed for Py3 
  9   
 10  from common_imports import StringIO, etree, SillyFileLike, HelperTestCase 
 11  from common_imports import _str, _bytes, _chr 
 12   
 13  try: 
 14      unicode 
 15  except NameError: 
 16      unicode = str 
 17   
 18  ascii_uni = _bytes('a').decode('utf8') 
 19   
 20  klingon = _bytes("\\uF8D2").decode("unicode_escape") # not valid for XML names 
 21   
 22  invalid_tag = _bytes("test").decode('utf8') + klingon 
 23   
 24  uni = _bytes('\\xc3\\u0680\\u3120').decode("unicode_escape") # some non-ASCII characters 
 25   
 26  uxml = _bytes("<test><title>test \\xc3\\xa1\\u3120</title><h1>page \\xc3\\xa1\\u3120 title</h1></test>" 
 27                ).decode("unicode_escape") 
 28   
 29   
 32          # test the testing framework, namely _str from common_imports 
 33          self.assertEqual(_str('\x10'), _str('\u0010')) 
 34          self.assertEqual(_str('\x10'), _str('\U00000010')) 
 35          self.assertEqual(_str('\u1234'), _str('\U00001234')) 
 36   
 40   
 42          if sys.maxunicode < 1114111: 
 43              return  # skip test 
 44          tree = etree.XML(_bytes('<p>\\U00026007</p>').decode('unicode_escape')) 
 45          self.assertEqual(1, len(tree.text)) 
 46          self.assertEqual(_bytes('\\U00026007').decode('unicode_escape'), 
 47                           tree.text) 
 48   
 50          uxml = ('<?xml version="1.0" encoding="UTF-8"?>' + 
 51                  '<p>%s</p>' % uni) 
 52          self.assertRaises(ValueError, etree.XML, uxml) 
 53   
 57   
 61   
 66   
 68          # namespace URIs must conform to RFC 3986 
 69          tag = "{http://%s/}abc" % uni 
 70          self.assertRaises(ValueError, etree.Element, tag) 
 71   
 73          # sadly, Klingon is not well-formed 
 74          tag = "{http://abc/}%s" % invalid_tag 
 75          self.assertRaises(ValueError, etree.Element, tag) 
 76   
 78          qname = etree.QName(uni, uni) 
 79          tag = "{%s}%s" % (uni, uni) 
 80          self.assertEqual(qname.text, tag) 
 81          self.assertEqual(unicode(qname), tag) 
 82   
 85   
 89   
 93   
 98   
102   
106   
110   
116   
117          self.assertRaises(ValueError, settext, _str('ab\ufffe')) 
118          self.assertRaises(ValueError, settext, _str('ö\ffff')) 
119          self.assertRaises(ValueError, settext, _str('\u0123\ud800')) 
120          self.assertRaises(ValueError, settext, _str('x\ud8ff')) 
121          self.assertRaises(ValueError, settext, _str('\U00010000\udfff')) 
122          self.assertRaises(ValueError, settext, _str('abd\x00def')) 
123          # should not Raise 
124          settext(_str('\ud7ff\ue000\U00010000\U0010FFFFäöas')) 
125   
126          for char_val in range(0xD800, 0xDFFF+1): 
127              self.assertRaises(ValueError, settext, 'abc' + _chr(char_val)) 
128              self.assertRaises(ValueError, settext, _chr(char_val)) 
129              self.assertRaises(ValueError, settext, _chr(char_val) + 'abc') 
130   
131          self.assertRaises(ValueError, settext, _bytes('\xe4')) 
132          self.assertRaises(ValueError, settext, _bytes('\x80')) 
133          self.assertRaises(ValueError, settext, _bytes('\xff')) 
134          self.assertRaises(ValueError, settext, _bytes('\x08')) 
135          self.assertRaises(ValueError, settext, _bytes('\x19')) 
136          self.assertRaises(ValueError, settext, _bytes('\x20\x00')) 
137          # should not Raise 
138          settext(_bytes('\x09\x0A\x0D\x20\x60\x7f')) 
139   
144   
145          self.assertRaises(ValueError, el, ':') 
146          self.assertRaises(ValueError, el, '0a') 
147          self.assertRaises(ValueError, el, _str('\u203f')) 
148          # should not Raise 
149          el(_str('\u0132')) 
150   
151   
152   
156   
157  ##     def test_parse_fileobject_unicode(self): 
158  ##         # parse unicode from unamed file object (not support by ElementTree) 
159  ##         f = SillyFileLike(uxml) 
160  ##         root = etree.parse(f).getroot() 
161  ##         self.assertEqual(unicode(etree.tostring(root, 'UTF-8'), 'UTF-8'), 
162  ##                           uxml) 
163   
164   
167          data = _bytes('<test>\x80\x80\x80</test>', encoding='iso8859-1') 
168          self.assertRaises(etree.XMLSyntaxError, etree.fromstring, data) 
169   
171          data = _bytes('<test>\x80\x80\x80</test>', encoding='iso8859-1') 
172          parser = etree.XMLParser(recover=True) 
173          self.assertRaises(etree.XMLSyntaxError, etree.fromstring, data, parser) 
174   
176          foo = """<?xml version='1.0' encoding='%s'?>\n<tag attrib='123'></tag>""" % ( 
177              xml_encoding_name or encoding) 
178          root = etree.fromstring(foo.encode(encoding)) 
179          self.assertEqual('tag', root.tag) 
180   
181          doc_encoding = root.getroottree().docinfo.encoding 
182          self.assertTrue( 
183              doc_encoding.lower().rstrip('lbe'), 
184              (xml_encoding_name or encoding).lower().rstrip('lbe')) 
185   
187          self._test_encoding('utf-8') 
188   
190          self._test_encoding('utf_8_sig', 'utf-8') 
191   
193          self._test_encoding('utf-16') 
194   
196          self._test_encoding('utf-16le', 'utf-16') 
197   
199          self._test_encoding('utf-16be', 'utf-16') 
200   
202          self._test_encoding('utf-32', 'utf-32') 
203   
205          self._test_encoding('utf-32le', 'utf-32') 
206   
208          self._test_encoding('utf-32be', 'utf-32') 
209   
210   
212      suite = unittest.TestSuite() 
213      suite.addTests([unittest.makeSuite(UnicodeTestCase)]) 
214      suite.addTests([unittest.makeSuite(EncodingsTestCase)]) 
215      return suite 
216   
| Home | Trees | Indices | Help | 
 | 
|---|
| Generated by Epydoc 3.0.1 on Fri Nov 24 17:06:57 2017 | http://epydoc.sourceforge.net |