| Home | Trees | Indices | Help | 
 | 
|---|
|  | 
  1  # 
  2  # ElementTree 
  3  # $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $ 
  4  # 
  5  # limited xinclude support for element trees 
  6  # 
  7  # history: 
  8  # 2003-08-15 fl   created 
  9  # 2003-11-14 fl   fixed default loader 
 10  # 
 11  # Copyright (c) 2003-2004 by Fredrik Lundh.  All rights reserved. 
 12  # 
 13  # fredrik@pythonware.com 
 14  # http://www.pythonware.com 
 15  # 
 16  # -------------------------------------------------------------------- 
 17  # The ElementTree toolkit is 
 18  # 
 19  # Copyright (c) 1999-2004 by Fredrik Lundh 
 20  # 
 21  # By obtaining, using, and/or copying this software and/or its 
 22  # associated documentation, you agree that you have read, understood, 
 23  # and will comply with the following terms and conditions: 
 24  # 
 25  # Permission to use, copy, modify, and distribute this software and 
 26  # its associated documentation for any purpose and without fee is 
 27  # hereby granted, provided that the above copyright notice appears in 
 28  # all copies, and that both that copyright notice and this permission 
 29  # notice appear in supporting documentation, and that the name of 
 30  # Secret Labs AB or the author not be used in advertising or publicity 
 31  # pertaining to distribution of the software without specific, written 
 32  # prior permission. 
 33  # 
 34  # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 
 35  # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 
 36  # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 
 37  # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 
 38  # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
 39  # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
 40  # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
 41  # OF THIS SOFTWARE. 
 42  # -------------------------------------------------------------------- 
 43   
 44  """ 
 45  Limited XInclude support for the ElementTree package. 
 46   
 47  While lxml.etree has full support for XInclude (see 
 48  `etree.ElementTree.xinclude()`), this module provides a simpler, pure 
 49  Python, ElementTree compatible implementation that supports a simple 
 50  form of custom URL resolvers. 
 51  """ 
 52   
 53  from lxml import etree 
 54  try: 
 55      from urlparse import urljoin 
 56      from urllib2 import urlopen 
 57  except ImportError: 
 58      # Python 3 
 59      from urllib.parse import urljoin 
 60      from urllib.request import urlopen 
 61   
 62  XINCLUDE = "{http://www.w3.org/2001/XInclude}" 
 63   
 64  XINCLUDE_INCLUDE = XINCLUDE + "include" 
 65  XINCLUDE_FALLBACK = XINCLUDE + "fallback" 
 66  XINCLUDE_ITER_TAG = XINCLUDE + "*" 
 67   
 68  # For security reasons, the inclusion depth is limited to this read-only value by default. 
 69  DEFAULT_MAX_INCLUSION_DEPTH = 6 
 70   
 71   
 72  ## 
 73  # Fatal include error. 
 74   
 77   
 78   
 81   
 82   
 83  ## 
 84  # ET compatible default loader. 
 85  # This loader reads an included resource from disk. 
 86  # 
 87  # @param href Resource reference. 
 88  # @param parse Parse mode.  Either "xml" or "text". 
 89  # @param encoding Optional text encoding. 
 90  # @return The expanded resource.  If the parse mode is "xml", this 
 91  #    is an ElementTree instance.  If the parse mode is "text", this 
 92  #    is a Unicode string.  If the loader fails, it can return None 
 93  #    or raise an IOError exception. 
 94  # @throws IOError If the loader fails to load the resource. 
 95   
 97      file = open(href, 'rb') 
 98      if parse == "xml": 
 99          data = etree.parse(file).getroot() 
100      else: 
101          data = file.read() 
102          if not encoding: 
103              encoding = 'utf-8' 
104          data = data.decode(encoding) 
105      file.close() 
106      return data 
107   
108   
109  ## 
110  # Default loader used by lxml.etree - handles custom resolvers properly 
111  #  
112   
114      if parse == "xml": 
115          data = etree.parse(href, parser).getroot() 
116      else: 
117          if "://" in href: 
118              f = urlopen(href) 
119          else: 
120              f = open(href, 'rb') 
121          data = f.read() 
122          f.close() 
123          if not encoding: 
124              encoding = 'utf-8' 
125          data = data.decode(encoding) 
126      return data 
127   
128   
129  ## 
130  # Wrapper for ET compatibility - drops the parser 
131   
135      return load 
136   
137   
138  ## 
139  # Expand XInclude directives. 
140  # 
141  # @param elem Root element. 
142  # @param loader Optional resource loader.  If omitted, it defaults 
143  #     to {@link default_loader}.  If given, it should be a callable 
144  #     that implements the same interface as <b>default_loader</b>. 
145  # @param base_url The base URL of the original file, to resolve 
146  #     relative include file references. 
147  # @param max_depth The maximum number of recursive inclusions. 
148  #     Limited to reduce the risk of malicious content explosion. 
149  #     Pass None to disable the limitation. 
150  # @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded. 
151  # @throws FatalIncludeError If the function fails to include a given 
152  #     resource, or if the tree contains malformed XInclude elements. 
153  # @throws IOError If the function fails to load a given resource. 
154  # @returns the node or its replacement if it was an XInclude node 
155   
158      if max_depth is None: 
159          max_depth = -1 
160      elif max_depth < 0: 
161          raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth) 
162   
163      if base_url is None: 
164          if hasattr(elem, 'getroot'): 
165              tree = elem 
166              elem = elem.getroot() 
167          else: 
168              tree = elem.getroottree() 
169          if hasattr(tree, 'docinfo'): 
170              base_url = tree.docinfo.URL 
171      elif hasattr(elem, 'getroot'): 
172          elem = elem.getroot() 
173      _include(elem, loader, base_url, max_depth) 
174   
175   
176 -def _include(elem, loader=None, base_url=None, 
177               max_depth=DEFAULT_MAX_INCLUSION_DEPTH, _parent_hrefs=None): 
178      if loader is not None: 
179          load_include = _wrap_et_loader(loader) 
180      else: 
181          load_include = _lxml_default_loader 
182   
183      if _parent_hrefs is None: 
184          _parent_hrefs = set() 
185   
186      parser = elem.getroottree().parser 
187   
188      include_elements = list( 
189          elem.iter(XINCLUDE_ITER_TAG)) 
190   
191      for e in include_elements: 
192          if e.tag == XINCLUDE_INCLUDE: 
193              # process xinclude directive 
194              href = urljoin(base_url, e.get("href")) 
195              parse = e.get("parse", "xml") 
196              parent = e.getparent() 
197              if parse == "xml": 
198                  if href in _parent_hrefs: 
199                      raise FatalIncludeError( 
200                          "recursive include of %r detected" % href 
201                          ) 
202                  if max_depth == 0: 
203                      raise LimitedRecursiveIncludeError( 
204                          "maximum xinclude depth reached when including file %s" % href) 
205                  node = load_include(href, parse, parser=parser) 
206                  if node is None: 
207                      raise FatalIncludeError( 
208                          "cannot load %r as %r" % (href, parse) 
209                          ) 
210                  node = _include(node, loader, href, max_depth - 1, {href} | _parent_hrefs) 
211                  if e.tail: 
212                      node.tail = (node.tail or "") + e.tail 
213                  if parent is None: 
214                      return node # replaced the root node! 
215                  parent.replace(e, node) 
216              elif parse == "text": 
217                  text = load_include(href, parse, encoding=e.get("encoding")) 
218                  if text is None: 
219                      raise FatalIncludeError( 
220                          "cannot load %r as %r" % (href, parse) 
221                          ) 
222                  predecessor = e.getprevious() 
223                  if predecessor is not None: 
224                      predecessor.tail = (predecessor.tail or "") + text 
225                  elif parent is None: 
226                      return text # replaced the root node! 
227                  else: 
228                      parent.text = (parent.text or "") + text + (e.tail or "") 
229                  parent.remove(e) 
230              else: 
231                  raise FatalIncludeError( 
232                      "unknown parse type in xi:include tag (%r)" % parse 
233                  ) 
234          elif e.tag == XINCLUDE_FALLBACK: 
235              parent = e.getparent() 
236              if parent is not None and parent.tag != XINCLUDE_INCLUDE: 
237                  raise FatalIncludeError( 
238                      "xi:fallback tag must be child of xi:include (%r)" % e.tag 
239                      ) 
240          else: 
241              raise FatalIncludeError( 
242                  "Invalid element found in XInclude namespace (%r)" % e.tag 
243                  ) 
244      return elem 
245   
| Home | Trees | Indices | Help | 
 | 
|---|
| Generated by Epydoc 3.0.1 on Wed Nov 27 06:31:25 2019 | http://epydoc.sourceforge.net |