View Javadoc
1   /*
2    * Copyright (c) 2002-2017 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package com.gargoylesoftware.htmlunit.javascript.host.xml;
16  
17  import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_XML_SERIALIZER_BLANK_BEFORE_SELF_CLOSING;
18  import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_XML_SERIALIZER_HTML_DOCUMENT_FRAGMENT_ALWAYS_EMPTY;
19  import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_XML_SERIALIZER_ROOT_CDATA_AS_ESCAPED_TEXT;
20  
21  import java.util.Arrays;
22  import java.util.HashSet;
23  import java.util.Locale;
24  import java.util.Set;
25  
26  import org.w3c.dom.NamedNodeMap;
27  
28  import com.gargoylesoftware.htmlunit.SgmlPage;
29  import com.gargoylesoftware.htmlunit.html.*;
30  import com.gargoylesoftware.htmlunit.javascript.SimpleScriptable;
31  import com.gargoylesoftware.htmlunit.javascript.configuration.JsxClass;
32  import com.gargoylesoftware.htmlunit.javascript.configuration.JsxConstructor;
33  import com.gargoylesoftware.htmlunit.javascript.configuration.JsxFunction;
34  import com.gargoylesoftware.htmlunit.javascript.host.Element;
35  import com.gargoylesoftware.htmlunit.javascript.host.dom.CDATASection;
36  import com.gargoylesoftware.htmlunit.javascript.host.dom.Document;
37  import com.gargoylesoftware.htmlunit.javascript.host.dom.DocumentFragment;
38  import com.gargoylesoftware.htmlunit.javascript.host.dom.Node;
39  import com.gargoylesoftware.htmlunit.javascript.host.html.HTMLDocument;
40  import com.gargoylesoftware.htmlunit.util.StringUtils;
41  
42  /**
43   * A JavaScript object for {@code XMLSerializer}.
44   *
45   * @author Ahmed Ashour
46   * @author Darrell DeBoer
47   * @author Ronald Brill
48   * @author Frank Danek
49   */
50  @JsxClass
51  public class XMLSerializer extends SimpleScriptable {
52  
53      // this is a bit strange but it is the way FF works
54      // output of empty tags are not allowed for these HTML tags
55      private static final Set<String> NON_EMPTY_TAGS = new HashSet<>(Arrays.asList(
56              HtmlAbbreviated.TAG_NAME, HtmlAcronym.TAG_NAME,
57              HtmlAnchor.TAG_NAME, HtmlApplet.TAG_NAME, HtmlAddress.TAG_NAME, HtmlAudio.TAG_NAME,
58              HtmlBackgroundSound.TAG_NAME,
59              HtmlBidirectionalOverride.TAG_NAME, HtmlBig.TAG_NAME, HtmlBlink.TAG_NAME,
60              HtmlBlockQuote.TAG_NAME, HtmlBody.TAG_NAME, HtmlBold.TAG_NAME,
61              HtmlButton.TAG_NAME, HtmlCanvas.TAG_NAME, HtmlCaption.TAG_NAME,
62              HtmlCenter.TAG_NAME, HtmlCitation.TAG_NAME, HtmlCode.TAG_NAME,
63              HtmlDefinition.TAG_NAME, HtmlDefinitionDescription.TAG_NAME,
64              HtmlDeletedText.TAG_NAME, HtmlDirectory.TAG_NAME,
65              HtmlDivision.TAG_NAME,
66              HtmlDefinitionList.TAG_NAME,
67              HtmlDefinitionTerm.TAG_NAME, HtmlEmbed.TAG_NAME,
68              HtmlEmphasis.TAG_NAME, HtmlFieldSet.TAG_NAME,
69              HtmlFont.TAG_NAME, HtmlForm.TAG_NAME,
70              HtmlFrame.TAG_NAME, HtmlFrameSet.TAG_NAME, HtmlHeading1.TAG_NAME,
71              HtmlHeading2.TAG_NAME, HtmlHeading3.TAG_NAME,
72              HtmlHeading4.TAG_NAME, HtmlHeading5.TAG_NAME,
73              HtmlHeading6.TAG_NAME, HtmlHead.TAG_NAME,
74              HtmlHtml.TAG_NAME, HtmlInlineFrame.TAG_NAME,
75              HtmlInsertedText.TAG_NAME, HtmlIsIndex.TAG_NAME,
76              HtmlItalic.TAG_NAME, HtmlKeyboard.TAG_NAME, HtmlLabel.TAG_NAME,
77              HtmlLegend.TAG_NAME, HtmlListing.TAG_NAME, HtmlListItem.TAG_NAME,
78              HtmlMap.TAG_NAME, HtmlMarquee.TAG_NAME,
79              HtmlMenu.TAG_NAME, HtmlMultiColumn.TAG_NAME,
80              HtmlNoBreak.TAG_NAME, HtmlNoEmbed.TAG_NAME, HtmlNoFrames.TAG_NAME,
81              HtmlNoScript.TAG_NAME, HtmlObject.TAG_NAME, HtmlOrderedList.TAG_NAME,
82              HtmlOptionGroup.TAG_NAME, HtmlOption.TAG_NAME, HtmlParagraph.TAG_NAME,
83              HtmlPlainText.TAG_NAME, HtmlPreformattedText.TAG_NAME,
84              HtmlInlineQuotation.TAG_NAME, HtmlS.TAG_NAME, HtmlSample.TAG_NAME,
85              HtmlScript.TAG_NAME, HtmlSelect.TAG_NAME, HtmlSmall.TAG_NAME,
86              HtmlSource.TAG_NAME, HtmlSpan.TAG_NAME,
87              HtmlStrike.TAG_NAME, HtmlStrong.TAG_NAME, HtmlStyle.TAG_NAME,
88              HtmlSubscript.TAG_NAME, HtmlSuperscript.TAG_NAME, HtmlTitle.TAG_NAME,
89              HtmlTable.TAG_NAME, HtmlTableColumn.TAG_NAME, HtmlTableColumnGroup.TAG_NAME,
90              HtmlTableBody.TAG_NAME, HtmlTableDataCell.TAG_NAME, HtmlTableHeaderCell.TAG_NAME,
91              HtmlTableRow.TAG_NAME, HtmlTextArea.TAG_NAME, HtmlTableFooter.TAG_NAME,
92              HtmlTableHeader.TAG_NAME, HtmlTeletype.TAG_NAME, HtmlUnderlined.TAG_NAME,
93              HtmlUnorderedList.TAG_NAME, HtmlVariable.TAG_NAME, HtmlVideo.TAG_NAME,
94              HtmlWordBreak.TAG_NAME, HtmlExample.TAG_NAME
95      ));
96  
97      /**
98       * Default constructor.
99       */
100     @JsxConstructor
101     public XMLSerializer() {
102         // Empty.
103     }
104 
105     /**
106      * The subtree rooted by the specified element is serialized to a string.
107      * @param root the root of the subtree to be serialized (this may be any node, even a document)
108      * @return the serialized string
109      */
110     @JsxFunction
111     public String serializeToString(Node root) {
112         if (root == null) {
113             return "";
114         }
115         if (root instanceof Document) {
116             root = ((Document) root).getDocumentElement();
117         }
118         else if (root instanceof DocumentFragment) {
119             if (root.getOwnerDocument() instanceof HTMLDocument
120                 && getBrowserVersion().hasFeature(JS_XML_SERIALIZER_HTML_DOCUMENT_FRAGMENT_ALWAYS_EMPTY)) {
121                 return "";
122             }
123             root = root.getFirstChild();
124         }
125         if (root instanceof Element) {
126             final StringBuilder builder = new StringBuilder();
127             final DomNode node = root.getDomNodeOrDie();
128             final SgmlPage page = node.getPage();
129             final boolean isHtmlPage = page != null && page.isHtmlPage();
130 
131             String forcedNamespace = null;
132             if (isHtmlPage) {
133                 forcedNamespace = "http://www.w3.org/1999/xhtml";
134             }
135             toXml(1, node, builder, forcedNamespace);
136 
137             return builder.toString();
138         }
139         if (root instanceof CDATASection
140             && getBrowserVersion().hasFeature(JS_XML_SERIALIZER_ROOT_CDATA_AS_ESCAPED_TEXT)) {
141             final DomCDataSection domCData = (DomCDataSection) root.getDomNodeOrDie();
142             final String data = domCData.getData();
143             if (org.apache.commons.lang3.StringUtils.isNotBlank(data)) {
144                 return StringUtils.escapeXmlChars(data);
145             }
146         }
147         return root.getDomNodeOrDie().asXml();
148     }
149 
150     private void toXml(final int indent,
151             final DomNode node, final StringBuilder builder, final String foredNamespace) {
152         final String nodeName = node.getNodeName();
153         builder.append('<').append(nodeName);
154 
155         String optionalPrefix = "";
156         final String namespaceURI = node.getNamespaceURI();
157         final String prefix = node.getPrefix();
158         if (namespaceURI != null && prefix != null) {
159             boolean sameNamespace = false;
160             for (DomNode parentNode = node.getParentNode(); parentNode instanceof DomElement;
161                     parentNode = parentNode.getParentNode()) {
162                 if (namespaceURI.equals(parentNode.getNamespaceURI())) {
163                     sameNamespace = true;
164                 }
165             }
166             if (node.getParentNode() == null || !sameNamespace) {
167                 ((DomElement) node).setAttribute("xmlns:" + prefix, namespaceURI);
168             }
169         }
170         else if (foredNamespace != null) {
171             builder.append(" xmlns=\"").append(foredNamespace).append('"');
172             optionalPrefix = " ";
173         }
174 
175         final NamedNodeMap attributesMap = node.getAttributes();
176         for (int i = 0; i < attributesMap.getLength(); i++) {
177             final DomAttr attrib = (DomAttr) attributesMap.item(i);
178             builder.append(' ').append(attrib.getQualifiedName()).append('=')
179                 .append('"').append(attrib.getValue()).append('"');
180         }
181         boolean startTagClosed = false;
182         for (final DomNode child : node.getChildren()) {
183             if (!startTagClosed) {
184                 builder.append(optionalPrefix).append('>');
185                 startTagClosed = true;
186             }
187             switch (child.getNodeType()) {
188                 case Node.ELEMENT_NODE:
189                     toXml(indent + 1, child, builder, null);
190                     break;
191 
192                 case Node.TEXT_NODE:
193                     String value = child.getNodeValue();
194                     value = StringUtils.escapeXmlChars(value);
195                     builder.append(value);
196                     break;
197 
198                 case Node.CDATA_SECTION_NODE:
199                 case Node.COMMENT_NODE:
200                     builder.append(child.asXml());
201                     break;
202 
203                 default:
204 
205             }
206         }
207         if (!startTagClosed) {
208             final String tagName = nodeName.toLowerCase(Locale.ROOT);
209             if (NON_EMPTY_TAGS.contains(tagName)) {
210                 builder.append('>');
211                 builder.append("</").append(nodeName).append('>');
212             }
213             else {
214                 builder.append(optionalPrefix);
215                 if (builder.charAt(builder.length() - 1) != ' '
216                     && getBrowserVersion().hasFeature(JS_XML_SERIALIZER_BLANK_BEFORE_SELF_CLOSING)) {
217                     builder.append(" ");
218                 }
219                 builder.append("/>");
220             }
221         }
222         else {
223             builder.append('<').append('/').append(nodeName).append('>');
224         }
225     }
226 
227 }