| Home | Trees | Indices | Help |
|
|---|
|
|
1 #
2 # ElementTree
3 # $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
4 #
5 # light-weight XML support for Python 2.3 and later.
6 #
7 # history (since 1.2.6):
8 # 2005-11-12 fl added tostringlist/fromstringlist helpers
9 # 2006-07-05 fl merged in selected changes from the 1.3 sandbox
10 # 2006-07-05 fl removed support for 2.1 and earlier
11 # 2007-06-21 fl added deprecation/future warnings
12 # 2007-08-25 fl added doctype hook, added parser version attribute etc
13 # 2007-08-26 fl added new serializer code (better namespace handling, etc)
14 # 2007-08-27 fl warn for broken /tag searches on tree level
15 # 2007-09-02 fl added html/text methods to serializer (experimental)
16 # 2007-09-05 fl added method argument to tostring/tostringlist
17 # 2007-09-06 fl improved error handling
18 # 2007-09-13 fl added itertext, iterfind; assorted cleanups
19 # 2007-12-15 fl added C14N hooks, copy method (experimental)
20 #
21 # Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
22 #
23 # fredrik@pythonware.com
24 # http://www.pythonware.com
25 #
26 # --------------------------------------------------------------------
27 # The ElementTree toolkit is
28 #
29 # Copyright (c) 1999-2008 by Fredrik Lundh
30 #
31 # By obtaining, using, and/or copying this software and/or its
32 # associated documentation, you agree that you have read, understood,
33 # and will comply with the following terms and conditions:
34 #
35 # Permission to use, copy, modify, and distribute this software and
36 # its associated documentation for any purpose and without fee is
37 # hereby granted, provided that the above copyright notice appears in
38 # all copies, and that both that copyright notice and this permission
39 # notice appear in supporting documentation, and that the name of
40 # Secret Labs AB or the author not be used in advertising or publicity
41 # pertaining to distribution of the software without specific, written
42 # prior permission.
43 #
44 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
45 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
46 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
47 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
48 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
49 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
50 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
51 # OF THIS SOFTWARE.
52 # --------------------------------------------------------------------
53
54 # Licensed to PSF under a Contributor Agreement.
55 # See http://www.python.org/psf/license for licensing details.
56
57 __all__ = [
58 # public symbols
59 "Comment",
60 "dump",
61 "Element", "ElementTree",
62 "fromstring", "fromstringlist",
63 "iselement", "iterparse",
64 "parse", "ParseError",
65 "PI", "ProcessingInstruction",
66 "QName",
67 "SubElement",
68 "tostring", "tostringlist",
69 "TreeBuilder",
70 "VERSION",
71 "XML",
72 "XMLParser", "XMLTreeBuilder",
73 ]
74
75 VERSION = "1.3.0"
76
77 ##
78 # The <b>Element</b> type is a flexible container object, designed to
79 # store hierarchical data structures in memory. The type can be
80 # described as a cross between a list and a dictionary.
81 # <p>
82 # Each element has a number of properties associated with it:
83 # <ul>
84 # <li>a <i>tag</i>. This is a string identifying what kind of data
85 # this element represents (the element type, in other words).</li>
86 # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
87 # <li>a <i>text</i> string.</li>
88 # <li>an optional <i>tail</i> string.</li>
89 # <li>a number of <i>child elements</i>, stored in a Python sequence</li>
90 # </ul>
91 #
92 # To create an element instance, use the {@link #Element} constructor
93 # or the {@link #SubElement} factory function.
94 # <p>
95 # The {@link #ElementTree} class can be used to wrap an element
96 # structure, and convert it from and to XML.
97 ##
98
99 import sys
100 import re
101 import warnings
102
103
105 # emulate pre-1.2 find/findtext/findall behaviour
112 elem = self.find(element, tag)
113 if elem is None:
114 return default
115 return elem.text or ""
117 if tag[:3] == ".//":
118 for elem in element.iter(tag[3:]):
119 yield elem
120 for elem in element:
121 if elem.tag == tag:
122 yield elem
125
126 try:
127 from . import ElementPath
128 except ImportError:
129 ElementPath = _SimpleElementPath()
130
131 ##
132 # Parser error. This is a subclass of <b>SyntaxError</b>.
133 # <p>
134 # In addition to the exception value, an exception instance contains a
135 # specific exception code in the <b>code</b> attribute, and the line and
136 # column of the error in the <b>position</b> attribute.
137
140
141 # --------------------------------------------------------------------
142
143 ##
144 # Checks if an object appears to be a valid element object.
145 #
146 # @param An element instance.
147 # @return A true value if this is an element object.
148 # @defreturn flag
149
151 # FIXME: not sure about this; might be a better idea to look
152 # for tag/attrib/text attributes
153 return isinstance(element, Element) or hasattr(element, "tag")
154
155 ##
156 # Element class. This class defines the Element interface, and
157 # provides a reference implementation of this interface.
158 # <p>
159 # The element name, attribute names, and attribute values can be
160 # either ASCII strings (ordinary Python strings containing only 7-bit
161 # ASCII characters) or Unicode strings.
162 #
163 # @param tag The element name.
164 # @param attrib An optional dictionary, containing element attributes.
165 # @param **extra Additional attributes, given as keyword arguments.
166 # @see Element
167 # @see SubElement
168 # @see Comment
169 # @see ProcessingInstruction
170
172 # <tag attrib>text<child/>...</tag>tail
173
174 ##
175 # (Attribute) Element tag.
176
177 tag = None
178
179 ##
180 # (Attribute) Element attribute dictionary. Where possible, use
181 # {@link #Element.get},
182 # {@link #Element.set},
183 # {@link #Element.keys}, and
184 # {@link #Element.items} to access
185 # element attributes.
186
187 attrib = None
188
189 ##
190 # (Attribute) Text before first subelement. This is either a
191 # string or the value None. Note that if there was no text, this
192 # attribute may be either None or an empty string, depending on
193 # the parser.
194
195 text = None
196
197 ##
198 # (Attribute) Text after this element's end tag, but before the
199 # next sibling element's start tag. This is either a string or
200 # the value None. Note that if there was no text, this attribute
201 # may be either None or an empty string, depending on the parser.
202
203 tail = None # text after end tag, if any
204
205 # constructor
206
208 attrib = attrib.copy()
209 attrib.update(extra)
210 self.tag = tag
211 self.attrib = attrib
212 self._children = []
213
215 return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
216
217 ##
218 # Creates a new element object of the same type as this element.
219 #
220 # @param tag Element tag.
221 # @param attrib Element attributes, given as a dictionary.
222 # @return A new element instance.
223
226
227 ##
228 # (Experimental) Copies the current element. This creates a
229 # shallow copy; subelements will be shared with the original tree.
230 #
231 # @return A new element instance.
232
234 elem = self.makeelement(self.tag, self.attrib)
235 elem.text = self.text
236 elem.tail = self.tail
237 elem[:] = self
238 return elem
239
240 ##
241 # Returns the number of subelements. Note that this only counts
242 # full elements; to check if there's any content in an element, you
243 # have to check both the length and the <b>text</b> attribute.
244 #
245 # @return The number of subelements.
246
249
251 warnings.warn(
252 "The behavior of this method will change in future versions. "
253 "Use specific 'len(elem)' or 'elem is not None' test instead.",
254 FutureWarning, stacklevel=2
255 )
256 return len(self._children) != 0 # emulate old behaviour, for now
257
258 ##
259 # Returns the given subelement, by index.
260 #
261 # @param index What subelement to return.
262 # @return The given subelement.
263 # @exception IndexError If the given element does not exist.
264
266 return self._children[index]
267
268 ##
269 # Replaces the given subelement, by index.
270 #
271 # @param index What subelement to replace.
272 # @param element The new element value.
273 # @exception IndexError If the given element does not exist.
274
276 # if isinstance(index, slice):
277 # for elt in element:
278 # assert iselement(elt)
279 # else:
280 # assert iselement(element)
281 self._children[index] = element
282
283 ##
284 # Deletes the given subelement, by index.
285 #
286 # @param index What subelement to delete.
287 # @exception IndexError If the given element does not exist.
288
290 del self._children[index]
291
292 ##
293 # Adds a subelement to the end of this element. In document order,
294 # the new element will appear after the last existing subelement (or
295 # directly after the text, if it's the first subelement), but before
296 # the end tag for this element.
297 #
298 # @param element The element to add.
299
303
304 ##
305 # Appends subelements from a sequence.
306 #
307 # @param elements A sequence object with zero or more elements.
308 # @since 1.3
309
314
315 ##
316 # Inserts a subelement at the given position in this element.
317 #
318 # @param index Where to insert the new subelement.
319
323
324 ##
325 # Removes a matching subelement. Unlike the <b>find</b> methods,
326 # this method compares elements based on identity, not on tag
327 # value or contents. To remove subelements by other means, the
328 # easiest way is often to use a list comprehension to select what
329 # elements to keep, and use slice assignment to update the parent
330 # element.
331 #
332 # @param element What element to remove.
333 # @exception ValueError If a matching element could not be found.
334
338
339 ##
340 # (Deprecated) Returns all subelements. The elements are returned
341 # in document order.
342 #
343 # @return A list of subelements.
344 # @defreturn list of Element instances
345
347 warnings.warn(
348 "This method will be removed in future versions. "
349 "Use 'list(elem)' or iteration over elem instead.",
350 DeprecationWarning, stacklevel=2
351 )
352 return self._children
353
354 ##
355 # Finds the first matching subelement, by tag name or path.
356 #
357 # @param path What element to look for.
358 # @keyparam namespaces Optional namespace prefix map.
359 # @return The first matching element, or None if no element was found.
360 # @defreturn Element or None
361
364
365 ##
366 # Finds text for the first matching subelement, by tag name or path.
367 #
368 # @param path What element to look for.
369 # @param default What to return if the element was not found.
370 # @keyparam namespaces Optional namespace prefix map.
371 # @return The text content of the first matching element, or the
372 # default value no element was found. Note that if the element
373 # is found, but has no text content, this method returns an
374 # empty string.
375 # @defreturn string
376
379
380 ##
381 # Finds all matching subelements, by tag name or path.
382 #
383 # @param path What element to look for.
384 # @keyparam namespaces Optional namespace prefix map.
385 # @return A list or other sequence containing all matching elements,
386 # in document order.
387 # @defreturn list of Element instances
388
391
392 ##
393 # Finds all matching subelements, by tag name or path.
394 #
395 # @param path What element to look for.
396 # @keyparam namespaces Optional namespace prefix map.
397 # @return An iterator or sequence containing all matching elements,
398 # in document order.
399 # @defreturn a generated sequence of Element instances
400
403
404 ##
405 # Resets an element. This function removes all subelements, clears
406 # all attributes, and sets the <b>text</b> and <b>tail</b> attributes
407 # to None.
408
413
414 ##
415 # Gets an element attribute. Equivalent to <b>attrib.get</b>, but
416 # some implementations may handle this a bit more efficiently.
417 #
418 # @param key What attribute to look for.
419 # @param default What to return if the attribute was not found.
420 # @return The attribute value, or the default value, if the
421 # attribute was not found.
422 # @defreturn string or None
423
426
427 ##
428 # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>,
429 # but some implementations may handle this a bit more efficiently.
430 #
431 # @param key What attribute to set.
432 # @param value The attribute value.
433
436
437 ##
438 # Gets a list of attribute names. The names are returned in an
439 # arbitrary order (just like for an ordinary Python dictionary).
440 # Equivalent to <b>attrib.keys()</b>.
441 #
442 # @return A list of element attribute names.
443 # @defreturn list of strings
444
447
448 ##
449 # Gets element attributes, as a sequence. The attributes are
450 # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>.
451 #
452 # @return A list of (name, value) tuples for all attributes.
453 # @defreturn list of (string, string) tuples
454
457
458 ##
459 # Creates a tree iterator. The iterator loops over this element
460 # and all subelements, in document order, and returns all elements
461 # with a matching tag.
462 # <p>
463 # If the tree structure is modified during iteration, new or removed
464 # elements may or may not be included. To get a stable set, use the
465 # list() function on the iterator, and loop over the resulting list.
466 #
467 # @param tag What tags to look for (default is to return all elements).
468 # @return An iterator containing all the matching elements.
469 # @defreturn iterator
470
472 if tag == "*":
473 tag = None
474 if tag is None or self.tag == tag:
475 yield self
476 for e in self._children:
477 for e in e.iter(tag):
478 yield e
479
480 # compatibility
482 # Change for a DeprecationWarning in 1.4
483 warnings.warn(
484 "This method will be removed in future versions. "
485 "Use 'elem.iter()' or 'list(elem.iter())' instead.",
486 PendingDeprecationWarning, stacklevel=2
487 )
488 return list(self.iter(tag))
489
490 ##
491 # Creates a text iterator. The iterator loops over this element
492 # and all subelements, in document order, and returns all inner
493 # text.
494 #
495 # @return An iterator containing all inner text.
496 # @defreturn iterator
497
509
510 # compatibility
511 _Element = _ElementInterface = Element
512
513 ##
514 # Subelement factory. This function creates an element instance, and
515 # appends it to an existing element.
516 # <p>
517 # The element name, attribute names, and attribute values can be
518 # either 8-bit ASCII strings or Unicode strings.
519 #
520 # @param parent The parent element.
521 # @param tag The subelement name.
522 # @param attrib An optional dictionary, containing element attributes.
523 # @param **extra Additional attributes, given as keyword arguments.
524 # @return An element instance.
525 # @defreturn Element
526
528 attrib = attrib.copy()
529 attrib.update(extra)
530 element = parent.makeelement(tag, attrib)
531 parent.append(element)
532 return element
533
534 ##
535 # Comment element factory. This factory function creates a special
536 # element that will be serialized as an XML comment by the standard
537 # serializer.
538 # <p>
539 # The comment string can be either an 8-bit ASCII string or a Unicode
540 # string.
541 #
542 # @param text A string containing the comment string.
543 # @return An element instance, representing a comment.
544 # @defreturn Element
545
550
551 ##
552 # PI element factory. This factory function creates a special element
553 # that will be serialized as an XML processing instruction by the standard
554 # serializer.
555 #
556 # @param target A string containing the PI target.
557 # @param text A string containing the PI contents, if any.
558 # @return An element instance, representing a PI.
559 # @defreturn Element
560
562 element = Element(ProcessingInstruction)
563 element.text = target
564 if text:
565 element.text = element.text + " " + text
566 return element
567
568 PI = ProcessingInstruction
569
570 ##
571 # QName wrapper. This can be used to wrap a QName attribute value, in
572 # order to get proper namespace handling on output.
573 #
574 # @param text A string containing the QName value, in the form {uri}local,
575 # or, if the tag argument is given, the URI part of a QName.
576 # @param tag Optional tag. If given, the first argument is interpreted as
577 # a URI, and this argument is interpreted as a local name.
578 # @return An opaque object, representing the QName.
579
593
594 # --------------------------------------------------------------------
595
596 ##
597 # ElementTree wrapper class. This class represents an entire element
598 # hierarchy, and adds some extra support for serialization to and from
599 # standard XML.
600 #
601 # @param element Optional root element.
602 # @keyparam file Optional file handle or file name. If given, the
603 # tree is initialized with the contents of this XML file.
604
606
608 # assert element is None or iselement(element)
609 self._root = element # first node
610 if file:
611 self.parse(file)
612
613 ##
614 # Gets the root element for this tree.
615 #
616 # @return An element instance.
617 # @defreturn Element
618
621
622 ##
623 # Replaces the root element for this tree. This discards the
624 # current contents of the tree, and replaces it with the given
625 # element. Use with care.
626 #
627 # @param element An element instance.
628
632
633 ##
634 # Loads an external XML document into this element tree.
635 #
636 # @param source A file name or file object. If a file object is
637 # given, it only has to implement a <b>read(n)</b> method.
638 # @keyparam parser An optional parser instance. If not given, the
639 # standard {@link XMLParser} parser is used.
640 # @return The document root element.
641 # @defreturn Element
642 # @exception ParseError If the parser fails to parse the document.
643
645 close_source = False
646 if not hasattr(source, "read"):
647 source = open(source, "rb")
648 close_source = True
649 try:
650 if not parser:
651 parser = XMLParser(target=TreeBuilder())
652 while 1:
653 data = source.read(65536)
654 if not data:
655 break
656 parser.feed(data)
657 self._root = parser.close()
658 return self._root
659 finally:
660 if close_source:
661 source.close()
662
663 ##
664 # Creates a tree iterator for the root element. The iterator loops
665 # over all elements in this tree, in document order.
666 #
667 # @param tag What tags to look for (default is to return all elements)
668 # @return An iterator.
669 # @defreturn iterator
670
674
675 # compatibility
677 # Change for a DeprecationWarning in 1.4
678 warnings.warn(
679 "This method will be removed in future versions. "
680 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
681 PendingDeprecationWarning, stacklevel=2
682 )
683 return list(self.iter(tag))
684
685 ##
686 # Same as getroot().find(path), starting at the root of the
687 # tree.
688 #
689 # @param path What element to look for.
690 # @keyparam namespaces Optional namespace prefix map.
691 # @return The first matching element, or None if no element was found.
692 # @defreturn Element or None
693
695 # assert self._root is not None
696 if path[:1] == "/":
697 path = "." + path
698 warnings.warn(
699 "This search is broken in 1.3 and earlier, and will be "
700 "fixed in a future version. If you rely on the current "
701 "behaviour, change it to %r" % path,
702 FutureWarning, stacklevel=2
703 )
704 return self._root.find(path, namespaces)
705
706 ##
707 # Same as getroot().findtext(path), starting at the root of the tree.
708 #
709 # @param path What element to look for.
710 # @param default What to return if the element was not found.
711 # @keyparam namespaces Optional namespace prefix map.
712 # @return The text content of the first matching element, or the
713 # default value no element was found. Note that if the element
714 # is found, but has no text content, this method returns an
715 # empty string.
716 # @defreturn string
717
719 # assert self._root is not None
720 if path[:1] == "/":
721 path = "." + path
722 warnings.warn(
723 "This search is broken in 1.3 and earlier, and will be "
724 "fixed in a future version. If you rely on the current "
725 "behaviour, change it to %r" % path,
726 FutureWarning, stacklevel=2
727 )
728 return self._root.findtext(path, default, namespaces)
729
730 ##
731 # Same as getroot().findall(path), starting at the root of the tree.
732 #
733 # @param path What element to look for.
734 # @keyparam namespaces Optional namespace prefix map.
735 # @return A list or iterator containing all matching elements,
736 # in document order.
737 # @defreturn list of Element instances
738
740 # assert self._root is not None
741 if path[:1] == "/":
742 path = "." + path
743 warnings.warn(
744 "This search is broken in 1.3 and earlier, and will be "
745 "fixed in a future version. If you rely on the current "
746 "behaviour, change it to %r" % path,
747 FutureWarning, stacklevel=2
748 )
749 return self._root.findall(path, namespaces)
750
751 ##
752 # Finds all matching subelements, by tag name or path.
753 # Same as getroot().iterfind(path).
754 #
755 # @param path What element to look for.
756 # @keyparam namespaces Optional namespace prefix map.
757 # @return An iterator or sequence containing all matching elements,
758 # in document order.
759 # @defreturn a generated sequence of Element instances
760
762 # assert self._root is not None
763 if path[:1] == "/":
764 path = "." + path
765 warnings.warn(
766 "This search is broken in 1.3 and earlier, and will be "
767 "fixed in a future version. If you rely on the current "
768 "behaviour, change it to %r" % path,
769 FutureWarning, stacklevel=2
770 )
771 return self._root.iterfind(path, namespaces)
772
773 ##
774 # Writes the element tree to a file, as XML.
775 #
776 # @def write(file, **options)
777 # @param file A file name, or a file object opened for writing.
778 # @param **options Options, given as keyword arguments.
779 # @keyparam encoding Optional output encoding (default is US-ASCII).
780 # @keyparam xml_declaration Controls if an XML declaration should
781 # be added to the file. Use False for never, True for always,
782 # None for only if not US-ASCII or UTF-8. None is default.
783 # @keyparam default_namespace Sets the default XML namespace (for "xmlns").
784 # @keyparam method Optional output method ("xml", "html", "text" or
785 # "c14n"; default is "xml").
786
787 - def write(self, file_or_filename,
788 # keyword arguments
789 encoding=None,
790 xml_declaration=None,
791 default_namespace=None,
792 method=None):
793 # assert self._root is not None
794 if not method:
795 method = "xml"
796 elif method not in _serialize:
797 # FIXME: raise an ImportError for c14n if ElementC14N is missing?
798 raise ValueError("unknown method %r" % method)
799 if hasattr(file_or_filename, "write"):
800 file = file_or_filename
801 else:
802 file = open(file_or_filename, "wb")
803 write = file.write
804 if not encoding:
805 if method == "c14n":
806 encoding = "utf-8"
807 else:
808 encoding = "us-ascii"
809 elif xml_declaration or (xml_declaration is None and
810 encoding not in ("utf-8", "us-ascii")):
811 if method == "xml":
812 write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
813 if method == "text":
814 _serialize_text(write, self._root, encoding)
815 else:
816 qnames, namespaces = _namespaces(
817 self._root, encoding, default_namespace
818 )
819 serialize = _serialize[method]
820 serialize(write, self._root, encoding, qnames, namespaces)
821 if file_or_filename is not file:
822 file.close()
823
827
828 # --------------------------------------------------------------------
829 # serialization support
830
832 # identify namespaces used in this tree
833
834 # maps qnames to *encoded* prefix:local names
835 qnames = {None: None}
836
837 # maps uri:s to prefixes
838 namespaces = {}
839 if default_namespace:
840 namespaces[default_namespace] = ""
841
842 def encode(text):
843 return text.encode(encoding)
844
845 def add_qname(qname):
846 # calculate serialized qname representation
847 try:
848 if qname[:1] == "{":
849 uri, tag = qname[1:].rsplit("}", 1)
850 prefix = namespaces.get(uri)
851 if prefix is None:
852 prefix = _namespace_map.get(uri)
853 if prefix is None:
854 prefix = "ns%d" % len(namespaces)
855 if prefix != "xml":
856 namespaces[uri] = prefix
857 if prefix:
858 qnames[qname] = encode("%s:%s" % (prefix, tag))
859 else:
860 qnames[qname] = encode(tag) # default element
861 else:
862 if default_namespace:
863 # FIXME: can this be handled in XML 1.0?
864 raise ValueError(
865 "cannot use non-qualified names with "
866 "default_namespace option"
867 )
868 qnames[qname] = encode(qname)
869 except TypeError:
870 _raise_serialization_error(qname)
871
872 # populate qname and namespaces table
873 try:
874 iterate = elem.iter
875 except AttributeError:
876 iterate = elem.getiterator # cET compatibility
877 for elem in iterate():
878 tag = elem.tag
879 if isinstance(tag, QName):
880 if tag.text not in qnames:
881 add_qname(tag.text)
882 elif isinstance(tag, basestring):
883 if tag not in qnames:
884 add_qname(tag)
885 elif tag is not None and tag is not Comment and tag is not PI:
886 _raise_serialization_error(tag)
887 for key, value in elem.items():
888 if isinstance(key, QName):
889 key = key.text
890 if key not in qnames:
891 add_qname(key)
892 if isinstance(value, QName) and value.text not in qnames:
893 add_qname(value.text)
894 text = elem.text
895 if isinstance(text, QName) and text.text not in qnames:
896 add_qname(text.text)
897 return qnames, namespaces
898
900 tag = elem.tag
901 text = elem.text
902 if tag is Comment:
903 write("<!--%s-->" % _encode(text, encoding))
904 elif tag is ProcessingInstruction:
905 write("<?%s?>" % _encode(text, encoding))
906 else:
907 tag = qnames[tag]
908 if tag is None:
909 if text:
910 write(_escape_cdata(text, encoding))
911 for e in elem:
912 _serialize_xml(write, e, encoding, qnames, None)
913 else:
914 write("<" + tag)
915 items = elem.items()
916 if items or namespaces:
917 if namespaces:
918 for v, k in sorted(namespaces.items(),
919 key=lambda x: x[1]): # sort on prefix
920 if k:
921 k = ":" + k
922 write(" xmlns%s=\"%s\"" % (
923 k.encode(encoding),
924 _escape_attrib(v, encoding)
925 ))
926 for k, v in sorted(items): # lexical order
927 if isinstance(k, QName):
928 k = k.text
929 if isinstance(v, QName):
930 v = qnames[v.text]
931 else:
932 v = _escape_attrib(v, encoding)
933 write(" %s=\"%s\"" % (qnames[k], v))
934 if text or len(elem):
935 write(">")
936 if text:
937 write(_escape_cdata(text, encoding))
938 for e in elem:
939 _serialize_xml(write, e, encoding, qnames, None)
940 write("</" + tag + ">")
941 else:
942 write(" />")
943 if elem.tail:
944 write(_escape_cdata(elem.tail, encoding))
945
946 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
947 "img", "input", "isindex", "link", "meta", "param")
948
949 try:
950 HTML_EMPTY = set(HTML_EMPTY)
951 except NameError:
952 pass
953
955 tag = elem.tag
956 text = elem.text
957 if tag is Comment:
958 write("<!--%s-->" % _escape_cdata(text, encoding))
959 elif tag is ProcessingInstruction:
960 write("<?%s?>" % _escape_cdata(text, encoding))
961 else:
962 tag = qnames[tag]
963 if tag is None:
964 if text:
965 write(_escape_cdata(text, encoding))
966 for e in elem:
967 _serialize_html(write, e, encoding, qnames, None)
968 else:
969 write("<" + tag)
970 items = elem.items()
971 if items or namespaces:
972 if namespaces:
973 for v, k in sorted(namespaces.items(),
974 key=lambda x: x[1]): # sort on prefix
975 if k:
976 k = ":" + k
977 write(" xmlns%s=\"%s\"" % (
978 k.encode(encoding),
979 _escape_attrib(v, encoding)
980 ))
981 for k, v in sorted(items): # lexical order
982 if isinstance(k, QName):
983 k = k.text
984 if isinstance(v, QName):
985 v = qnames[v.text]
986 else:
987 v = _escape_attrib_html(v, encoding)
988 # FIXME: handle boolean attributes
989 write(" %s=\"%s\"" % (qnames[k], v))
990 write(">")
991 ltag = tag.lower()
992 if text:
993 if ltag == "script" or ltag == "style":
994 write(_encode(text, encoding))
995 else:
996 write(_escape_cdata(text, encoding))
997 for e in elem:
998 _serialize_html(write, e, encoding, qnames, None)
999 if ltag not in HTML_EMPTY:
1000 write("</" + tag + ">")
1001 if elem.tail:
1002 write(_escape_cdata(elem.tail, encoding))
1003
1005 for part in elem.itertext():
1006 write(part.encode(encoding))
1007 if elem.tail:
1008 write(elem.tail.encode(encoding))
1009
1010 _serialize = {
1011 "xml": _serialize_xml,
1012 "html": _serialize_html,
1013 "text": _serialize_text,
1014 # this optional method is imported at the end of the module
1015 # "c14n": _serialize_c14n,
1016 }
1017
1018 ##
1019 # Registers a namespace prefix. The registry is global, and any
1020 # existing mapping for either the given prefix or the namespace URI
1021 # will be removed.
1022 #
1023 # @param prefix Namespace prefix.
1024 # @param uri Namespace uri. Tags and attributes in this namespace
1025 # will be serialized with the given prefix, if at all possible.
1026 # @exception ValueError If the prefix is reserved, or is otherwise
1027 # invalid.
1028
1030 if re.match("ns\d+$", prefix):
1031 raise ValueError("Prefix format reserved for internal use")
1032 for k, v in _namespace_map.items():
1033 if k == uri or v == prefix:
1034 del _namespace_map[k]
1035 _namespace_map[uri] = prefix
1036
1037 _namespace_map = {
1038 # "well-known" namespace prefixes
1039 "http://www.w3.org/XML/1998/namespace": "xml",
1040 "http://www.w3.org/1999/xhtml": "html",
1041 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1042 "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1043 # xml schema
1044 "http://www.w3.org/2001/XMLSchema": "xs",
1045 "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1046 # dublin core
1047 "http://purl.org/dc/elements/1.1/": "dc",
1048 }
1049
1054
1056 try:
1057 return text.encode(encoding, "xmlcharrefreplace")
1058 except (TypeError, AttributeError):
1059 _raise_serialization_error(text)
1060
1062 # escape character data
1063 try:
1064 # it's worth avoiding do-nothing calls for strings that are
1065 # shorter than 500 character, or so. assume that's, by far,
1066 # the most common case in most applications.
1067 if "&" in text:
1068 text = text.replace("&", "&")
1069 if "<" in text:
1070 text = text.replace("<", "<")
1071 if ">" in text:
1072 text = text.replace(">", ">")
1073 return text.encode(encoding, "xmlcharrefreplace")
1074 except (TypeError, AttributeError):
1075 _raise_serialization_error(text)
1076
1078 # escape attribute value
1079 try:
1080 if "&" in text:
1081 text = text.replace("&", "&")
1082 if "<" in text:
1083 text = text.replace("<", "<")
1084 if ">" in text:
1085 text = text.replace(">", ">")
1086 if "\"" in text:
1087 text = text.replace("\"", """)
1088 if "\n" in text:
1089 text = text.replace("\n", " ")
1090 return text.encode(encoding, "xmlcharrefreplace")
1091 except (TypeError, AttributeError):
1092 _raise_serialization_error(text)
1093
1095 # escape attribute value
1096 try:
1097 if "&" in text:
1098 text = text.replace("&", "&")
1099 if ">" in text:
1100 text = text.replace(">", ">")
1101 if "\"" in text:
1102 text = text.replace("\"", """)
1103 return text.encode(encoding, "xmlcharrefreplace")
1104 except (TypeError, AttributeError):
1105 _raise_serialization_error(text)
1106
1107 # --------------------------------------------------------------------
1108
1109 ##
1110 # Generates a string representation of an XML element, including all
1111 # subelements.
1112 #
1113 # @param element An Element instance.
1114 # @keyparam encoding Optional output encoding (default is US-ASCII).
1115 # @keyparam method Optional output method ("xml", "html", "text" or
1116 # "c14n"; default is "xml").
1117 # @return An encoded string containing the XML data.
1118 # @defreturn string
1119
1123 data = []
1124 file = dummy()
1125 file.write = data.append
1126 ElementTree(element).write(file, encoding, method=method)
1127 return "".join(data)
1128
1129 ##
1130 # Generates a string representation of an XML element, including all
1131 # subelements. The string is returned as a sequence of string fragments.
1132 #
1133 # @param element An Element instance.
1134 # @keyparam encoding Optional output encoding (default is US-ASCII).
1135 # @keyparam method Optional output method ("xml", "html", "text" or
1136 # "c14n"; default is "xml").
1137 # @return A sequence object containing the XML data.
1138 # @defreturn sequence
1139 # @since 1.3
1140
1144 data = []
1145 file = dummy()
1146 file.write = data.append
1147 ElementTree(element).write(file, encoding, method=method)
1148 # FIXME: merge small fragments into larger parts
1149 return data
1150
1151 ##
1152 # Writes an element tree or element structure to sys.stdout. This
1153 # function should be used for debugging only.
1154 # <p>
1155 # The exact output format is implementation dependent. In this
1156 # version, it's written as an ordinary XML file.
1157 #
1158 # @param elem An element tree or an individual element.
1159
1161 # debugging
1162 if not isinstance(elem, ElementTree):
1163 elem = ElementTree(elem)
1164 elem.write(sys.stdout)
1165 tail = elem.getroot().tail
1166 if not tail or tail[-1] != "\n":
1167 sys.stdout.write("\n")
1168
1169 # --------------------------------------------------------------------
1170 # parsing
1171
1172 ##
1173 # Parses an XML document into an element tree.
1174 #
1175 # @param source A filename or file object containing XML data.
1176 # @param parser An optional parser instance. If not given, the
1177 # standard {@link XMLParser} parser is used.
1178 # @return An ElementTree instance
1179
1184
1185 ##
1186 # Parses an XML document into an element tree incrementally, and reports
1187 # what's going on to the user.
1188 #
1189 # @param source A filename or file object containing XML data.
1190 # @param events A list of events to report back. If omitted, only "end"
1191 # events are reported.
1192 # @param parser An optional parser instance. If not given, the
1193 # standard {@link XMLParser} parser is used.
1194 # @return A (event, elem) iterator.
1195
1197 close_source = False
1198 if not hasattr(source, "read"):
1199 source = open(source, "rb")
1200 close_source = True
1201 try:
1202 if not parser:
1203 parser = XMLParser(target=TreeBuilder())
1204 return _IterParseIterator(source, events, parser, close_source)
1205 except:
1206 if close_source:
1207 source.close()
1208 raise
1209
1211
1213 self._file = source
1214 self._close_file = close_source
1215 self._events = []
1216 self._index = 0
1217 self._error = None
1218 self.root = self._root = None
1219 self._parser = parser
1220 # wire up the parser for event reporting
1221 parser = self._parser._parser
1222 append = self._events.append
1223 if events is None:
1224 events = ["end"]
1225 for event in events:
1226 if event == "start":
1227 try:
1228 parser.ordered_attributes = 1
1229 parser.specified_attributes = 1
1230 def handler(tag, attrib_in, event=event, append=append,
1231 start=self._parser._start_list):
1232 append((event, start(tag, attrib_in)))
1233 parser.StartElementHandler = handler
1234 except AttributeError:
1235 def handler(tag, attrib_in, event=event, append=append,
1236 start=self._parser._start):
1237 append((event, start(tag, attrib_in)))
1238 parser.StartElementHandler = handler
1239 elif event == "end":
1240 def handler(tag, event=event, append=append,
1241 end=self._parser._end):
1242 append((event, end(tag)))
1243 parser.EndElementHandler = handler
1244 elif event == "start-ns":
1245 def handler(prefix, uri, event=event, append=append):
1246 try:
1247 uri = (uri or "").encode("ascii")
1248 except UnicodeError:
1249 pass
1250 append((event, (prefix or "", uri or "")))
1251 parser.StartNamespaceDeclHandler = handler
1252 elif event == "end-ns":
1253 def handler(prefix, event=event, append=append):
1254 append((event, None))
1255 parser.EndNamespaceDeclHandler = handler
1256 else:
1257 raise ValueError("unknown event %r" % event)
1258
1260 try:
1261 while 1:
1262 try:
1263 item = self._events[self._index]
1264 self._index += 1
1265 return item
1266 except IndexError:
1267 pass
1268 if self._error:
1269 e = self._error
1270 self._error = None
1271 raise e
1272 if self._parser is None:
1273 self.root = self._root
1274 break
1275 # load event buffer
1276 del self._events[:]
1277 self._index = 0
1278 data = self._file.read(16384)
1279 if data:
1280 try:
1281 self._parser.feed(data)
1282 except SyntaxError as exc:
1283 self._error = exc
1284 else:
1285 self._root = self._parser.close()
1286 self._parser = None
1287 except:
1288 if self._close_file:
1289 self._file.close()
1290 raise
1291 if self._close_file:
1292 self._file.close()
1293 raise StopIteration
1294
1297
1298 ##
1299 # Parses an XML document from a string constant. This function can
1300 # be used to embed "XML literals" in Python code.
1301 #
1302 # @param source A string containing XML data.
1303 # @param parser An optional parser instance. If not given, the
1304 # standard {@link XMLParser} parser is used.
1305 # @return An Element instance.
1306 # @defreturn Element
1307
1309 if not parser:
1310 parser = XMLParser(target=TreeBuilder())
1311 parser.feed(text)
1312 return parser.close()
1313
1314 ##
1315 # Parses an XML document from a string constant, and also returns
1316 # a dictionary which maps from element id:s to elements.
1317 #
1318 # @param source A string containing XML data.
1319 # @param parser An optional parser instance. If not given, the
1320 # standard {@link XMLParser} parser is used.
1321 # @return A tuple containing an Element instance and a dictionary.
1322 # @defreturn (Element, dictionary)
1323
1325 if not parser:
1326 parser = XMLParser(target=TreeBuilder())
1327 parser.feed(text)
1328 tree = parser.close()
1329 ids = {}
1330 for elem in tree.iter():
1331 id = elem.get("id")
1332 if id:
1333 ids[id] = elem
1334 return tree, ids
1335
1336 ##
1337 # Parses an XML document from a string constant. Same as {@link #XML}.
1338 #
1339 # @def fromstring(text)
1340 # @param source A string containing XML data.
1341 # @return An Element instance.
1342 # @defreturn Element
1343
1344 fromstring = XML
1345
1346 ##
1347 # Parses an XML document from a sequence of string fragments.
1348 #
1349 # @param sequence A list or other sequence containing XML data fragments.
1350 # @param parser An optional parser instance. If not given, the
1351 # standard {@link XMLParser} parser is used.
1352 # @return An Element instance.
1353 # @defreturn Element
1354 # @since 1.3
1355
1357 if not parser:
1358 parser = XMLParser(target=TreeBuilder())
1359 for text in sequence:
1360 parser.feed(text)
1361 return parser.close()
1362
1363 # --------------------------------------------------------------------
1364
1365 ##
1366 # Generic element structure builder. This builder converts a sequence
1367 # of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
1368 # #TreeBuilder.end} method calls to a well-formed element structure.
1369 # <p>
1370 # You can use this class to build an element structure using a custom XML
1371 # parser, or a parser for some other XML-like format.
1372 #
1373 # @param element_factory Optional element factory. This factory
1374 # is called to create new Element instances, as necessary.
1375
1377
1379 self._data = [] # data collector
1380 self._elem = [] # element stack
1381 self._last = None # last element
1382 self._tail = None # true if we're after an end tag
1383 if element_factory is None:
1384 element_factory = Element
1385 self._factory = element_factory
1386
1387 ##
1388 # Flushes the builder buffers, and returns the toplevel document
1389 # element.
1390 #
1391 # @return An Element instance.
1392 # @defreturn Element
1393
1395 assert len(self._elem) == 0, "missing end tags"
1396 assert self._last is not None, "missing toplevel element"
1397 return self._last
1398
1400 if self._data:
1401 if self._last is not None:
1402 text = "".join(self._data)
1403 if self._tail:
1404 assert self._last.tail is None, "internal error (tail)"
1405 self._last.tail = text
1406 else:
1407 assert self._last.text is None, "internal error (text)"
1408 self._last.text = text
1409 self._data = []
1410
1411 ##
1412 # Adds text to the current element.
1413 #
1414 # @param data A string. This should be either an 8-bit string
1415 # containing ASCII text, or a Unicode string.
1416
1419
1420 ##
1421 # Opens a new element.
1422 #
1423 # @param tag The element name.
1424 # @param attrib A dictionary containing element attributes.
1425 # @return The opened element.
1426 # @defreturn Element
1427
1429 self._flush()
1430 self._last = elem = self._factory(tag, attrs)
1431 if self._elem:
1432 self._elem[-1].append(elem)
1433 self._elem.append(elem)
1434 self._tail = 0
1435 return elem
1436
1437 ##
1438 # Closes the current element.
1439 #
1440 # @param tag The element name.
1441 # @return The closed element.
1442 # @defreturn Element
1443
1452
1453 ##
1454 # Element structure builder for XML source data, based on the
1455 # <b>expat</b> parser.
1456 #
1457 # @keyparam target Target object. If omitted, the builder uses an
1458 # instance of the standard {@link #TreeBuilder} class.
1459 # @keyparam html Predefine HTML entities. This flag is not supported
1460 # by the current implementation.
1461 # @keyparam encoding Optional encoding. If given, the value overrides
1462 # the encoding specified in the XML file.
1463 # @see #ElementTree
1464 # @see #TreeBuilder
1465
1467
1469 try:
1470 from xml.parsers import expat
1471 except ImportError:
1472 try:
1473 import pyexpat as expat
1474 except ImportError:
1475 raise ImportError(
1476 "No module named expat; use SimpleXMLTreeBuilder instead"
1477 )
1478 parser = expat.ParserCreate(encoding, "}")
1479 if target is None:
1480 target = TreeBuilder()
1481 # underscored names are provided for compatibility only
1482 self.parser = self._parser = parser
1483 self.target = self._target = target
1484 self._error = expat.error
1485 self._names = {} # name memo cache
1486 # callbacks
1487 parser.DefaultHandlerExpand = self._default
1488 parser.StartElementHandler = self._start
1489 parser.EndElementHandler = self._end
1490 parser.CharacterDataHandler = self._data
1491 # optional callbacks
1492 parser.CommentHandler = self._comment
1493 parser.ProcessingInstructionHandler = self._pi
1494 # let expat do the buffering, if supported
1495 try:
1496 self._parser.buffer_text = 1
1497 except AttributeError:
1498 pass
1499 # use new-style attribute handling, if supported
1500 try:
1501 self._parser.ordered_attributes = 1
1502 self._parser.specified_attributes = 1
1503 parser.StartElementHandler = self._start_list
1504 except AttributeError:
1505 pass
1506 self._doctype = None
1507 self.entity = {}
1508 try:
1509 self.version = "Expat %d.%d.%d" % expat.version_info
1510 except AttributeError:
1511 pass # unknown
1512
1514 err = ParseError(value)
1515 err.code = value.code
1516 err.position = value.lineno, value.offset
1517 raise err
1518
1520 # convert text string to ascii, if possible
1521 try:
1522 return text.encode("ascii")
1523 except UnicodeError:
1524 return text
1525
1527 # expand qname, and convert name string to ascii, if possible
1528 try:
1529 name = self._names[key]
1530 except KeyError:
1531 name = key
1532 if "}" in name:
1533 name = "{" + name
1534 self._names[key] = name = self._fixtext(name)
1535 return name
1536
1538 fixname = self._fixname
1539 fixtext = self._fixtext
1540 tag = fixname(tag)
1541 attrib = {}
1542 for key, value in attrib_in.items():
1543 attrib[fixname(key)] = fixtext(value)
1544 return self.target.start(tag, attrib)
1545
1547 fixname = self._fixname
1548 fixtext = self._fixtext
1549 tag = fixname(tag)
1550 attrib = {}
1551 if attrib_in:
1552 for i in range(0, len(attrib_in), 2):
1553 attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])
1554 return self.target.start(tag, attrib)
1555
1558
1561
1563 try:
1564 comment = self.target.comment
1565 except AttributeError:
1566 pass
1567 else:
1568 return comment(self._fixtext(data))
1569
1571 try:
1572 pi = self.target.pi
1573 except AttributeError:
1574 pass
1575 else:
1576 return pi(self._fixtext(target), self._fixtext(data))
1577
1579 prefix = text[:1]
1580 if prefix == "&":
1581 # deal with undefined entities
1582 try:
1583 self.target.data(self.entity[text[1:-1]])
1584 except KeyError:
1585 from xml.parsers import expat
1586 err = expat.error(
1587 "undefined entity %s: line %d, column %d" %
1588 (text, self._parser.ErrorLineNumber,
1589 self._parser.ErrorColumnNumber)
1590 )
1591 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1592 err.lineno = self._parser.ErrorLineNumber
1593 err.offset = self._parser.ErrorColumnNumber
1594 raise err
1595 elif prefix == "<" and text[:9] == "<!DOCTYPE":
1596 self._doctype = [] # inside a doctype declaration
1597 elif self._doctype is not None:
1598 # parse doctype contents
1599 if prefix == ">":
1600 self._doctype = None
1601 return
1602 text = text.strip()
1603 if not text:
1604 return
1605 self._doctype.append(text)
1606 n = len(self._doctype)
1607 if n > 2:
1608 type = self._doctype[1]
1609 if type == "PUBLIC" and n == 4:
1610 name, type, pubid, system = self._doctype
1611 elif type == "SYSTEM" and n == 3:
1612 name, type, system = self._doctype
1613 pubid = None
1614 else:
1615 return
1616 if pubid:
1617 pubid = pubid[1:-1]
1618 if hasattr(self.target, "doctype"):
1619 self.target.doctype(name, pubid, system[1:-1])
1620 elif self.doctype is not self._XMLParser__doctype:
1621 # warn about deprecated call
1622 self._XMLParser__doctype(name, pubid, system[1:-1])
1623 self.doctype(name, pubid, system[1:-1])
1624 self._doctype = None
1625
1626 ##
1627 # (Deprecated) Handles a doctype declaration.
1628 #
1629 # @param name Doctype name.
1630 # @param pubid Public identifier.
1631 # @param system System identifier.
1632
1634 """This method of XMLParser is deprecated."""
1635 warnings.warn(
1636 "This method of XMLParser is deprecated. Define doctype() "
1637 "method on the TreeBuilder target.",
1638 DeprecationWarning,
1639 )
1640
1641 # sentinel, if doctype is redefined in a subclass
1642 __doctype = doctype
1643
1644 ##
1645 # Feeds data to the parser.
1646 #
1647 # @param data Encoded data.
1648
1654
1655 ##
1656 # Finishes feeding data to the parser.
1657 #
1658 # @return An element structure.
1659 # @defreturn Element
1660
1669
1670 # compatibility
1671 XMLTreeBuilder = XMLParser
1672
1673 # workaround circular import.
1674 try:
1675 from ElementC14N import _serialize_c14n
1676 _serialize["c14n"] = _serialize_c14n
1677 except ImportError:
1678 pass
1679
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Fri Dec 23 19:00:53 2016 | http://epydoc.sourceforge.net |