1   
  2  import unittest 
  3  import sys 
  4  import os.path 
  5   
  6  this_dir = os.path.dirname(__file__) 
  7  if this_dir not in sys.path: 
  8      sys.path.insert(0, this_dir)   
  9   
 10  from common_imports import StringIO, etree, SillyFileLike, HelperTestCase 
 11  from common_imports import _str, _bytes, _chr 
 12   
 13  try: 
 14      unicode 
 15  except NameError: 
 16      unicode = str 
 17   
 18  ascii_uni = _bytes('a').decode('utf8') 
 19   
 20  klingon = _bytes("\\uF8D2").decode("unicode_escape")  
 21   
 22  invalid_tag = _bytes("test").decode('utf8') + klingon 
 23   
 24  uni = _bytes('\\xc3\\u0680\\u3120').decode("unicode_escape")  
 25   
 26  uxml = _bytes("<test><title>test \\xc3\\xa1\\u3120</title><h1>page \\xc3\\xa1\\u3120 title</h1></test>" 
 27                ).decode("unicode_escape") 
 28   
 29   
 32           
 33          self.assertEqual(_str('\x10'), _str('\u0010')) 
 34          self.assertEqual(_str('\x10'), _str('\U00000010')) 
 35          self.assertEqual(_str('\u1234'), _str('\U00001234')) 
  36   
 40   
 42          if sys.maxunicode < 1114111: 
 43              return   
 44          tree = etree.XML(_bytes('<p>\\U00026007</p>').decode('unicode_escape')) 
 45          self.assertEqual(1, len(tree.text)) 
 46          self.assertEqual(_bytes('\\U00026007').decode('unicode_escape'), 
 47                           tree.text) 
  48   
 50          uxml = ('<?xml version="1.0" encoding="UTF-8"?>' + 
 51                  '<p>%s</p>' % uni) 
 52          self.assertRaises(ValueError, etree.XML, uxml) 
  53   
 57   
 61   
 66   
 71   
 76   
 82   
 85   
 89   
 93   
 98   
102   
104          x = etree.ProcessingInstruction(_str('Å'), _str('\u0131')) 
105          repr(x) 
 106   
110   
112          e = etree.Element('e') 
113   
114          def settext(text): 
115              e.text = text 
 116   
117          self.assertRaises(ValueError, settext, _str('ab\ufffe')) 
118          self.assertRaises(ValueError, settext, _str('ö\ffff')) 
119          self.assertRaises(ValueError, settext, _str('\u0123\ud800')) 
120          self.assertRaises(ValueError, settext, _str('x\ud8ff')) 
121          self.assertRaises(ValueError, settext, _str('\U00010000\udfff')) 
122          self.assertRaises(ValueError, settext, _str('abd\x00def')) 
123           
124          settext(_str('\ud7ff\ue000\U00010000\U0010FFFFäöas')) 
125   
126          for char_val in range(0xD800, 0xDFFF+1): 
127              self.assertRaises(ValueError, settext, 'abc' + _chr(char_val)) 
128              self.assertRaises(ValueError, settext, _chr(char_val)) 
129              self.assertRaises(ValueError, settext, _chr(char_val) + 'abc') 
130   
131          self.assertRaises(ValueError, settext, _bytes('\xe4')) 
132          self.assertRaises(ValueError, settext, _bytes('\x80')) 
133          self.assertRaises(ValueError, settext, _bytes('\xff')) 
134          self.assertRaises(ValueError, settext, _bytes('\x08')) 
135          self.assertRaises(ValueError, settext, _bytes('\x19')) 
136          self.assertRaises(ValueError, settext, _bytes('\x20\x00')) 
137           
138          settext(_bytes('\x09\x0A\x0D\x20\x60\x7f')) 
177      suite = unittest.TestSuite() 
178      suite.addTests([unittest.makeSuite(UnicodeTestCase)]) 
179      suite.addTests([unittest.makeSuite(EncodingsTestCase)]) 
180      return suite 
 181