View Javadoc
1   /*
2    * Copyright (c) 2002-2017 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package com.gargoylesoftware.htmlunit;
16  
17  import java.io.IOException;
18  import java.net.URL;
19  import java.nio.charset.Charset;
20  import java.util.Comparator;
21  import java.util.LinkedList;
22  import java.util.List;
23  
24  import org.w3c.dom.CDATASection;
25  import org.w3c.dom.Comment;
26  import org.w3c.dom.DOMException;
27  import org.w3c.dom.Document;
28  import org.w3c.dom.DocumentType;
29  import org.w3c.dom.Element;
30  import org.w3c.dom.Node;
31  import org.w3c.dom.Text;
32  import org.w3c.dom.traversal.DocumentTraversal;
33  import org.w3c.dom.traversal.NodeFilter;
34  
35  import com.gargoylesoftware.htmlunit.html.AbstractDomNodeList;
36  import com.gargoylesoftware.htmlunit.html.DomAttr;
37  import com.gargoylesoftware.htmlunit.html.DomCDataSection;
38  import com.gargoylesoftware.htmlunit.html.DomComment;
39  import com.gargoylesoftware.htmlunit.html.DomDocumentFragment;
40  import com.gargoylesoftware.htmlunit.html.DomElement;
41  import com.gargoylesoftware.htmlunit.html.DomNode;
42  import com.gargoylesoftware.htmlunit.html.DomNodeIterator;
43  import com.gargoylesoftware.htmlunit.html.DomNodeList;
44  import com.gargoylesoftware.htmlunit.html.DomText;
45  import com.gargoylesoftware.htmlunit.html.DomTreeWalker;
46  
47  /**
48   * A basic class of Standard Generalized Markup Language (SGML), e.g. HTML and XML.
49   *
50   * @author Ahmed Ashour
51   * @author Ronald Brill
52   */
53  public abstract class SgmlPage extends DomNode implements Page, Document, DocumentTraversal {
54  
55      private DocumentType documentType_;
56      private final WebResponse webResponse_;
57      private WebWindow enclosingWindow_;
58      private final WebClient webClient_;
59  
60      /**
61       * Creates an instance of SgmlPage.
62       *
63       * @param webResponse the web response that was used to create this page
64       * @param webWindow the window that this page is being loaded into
65       */
66      public SgmlPage(final WebResponse webResponse, final WebWindow webWindow) {
67          super(null);
68          webResponse_ = webResponse;
69          enclosingWindow_ = webWindow;
70          webClient_ = webWindow.getWebClient();
71      }
72  
73      /**
74       * {@inheritDoc}
75       */
76      @Override
77      public void cleanUp() {
78          if (getWebClient().getCache().getCachedResponse(webResponse_.getWebRequest()) == null) {
79              webResponse_.cleanUp();
80          }
81      }
82  
83      /**
84       * {@inheritDoc}
85       */
86      @Override
87      public WebResponse getWebResponse() {
88          return webResponse_;
89      }
90  
91      /**
92       * {@inheritDoc}
93       */
94      @Override
95      public void initialize() throws IOException {
96          // nothing to do here
97      }
98  
99      /**
100      * Gets the name for the current node.
101      * @return the node name
102      */
103     @Override
104     public String getNodeName() {
105         return "#document";
106     }
107 
108     /**
109      * Gets the type of the current node.
110      * @return the node type
111      */
112     @Override
113     public short getNodeType() {
114         return org.w3c.dom.Node.DOCUMENT_NODE;
115     }
116 
117     /**
118      * Returns the window that this page is sitting inside.
119      *
120      * @return the enclosing frame or null if this page isn't inside a frame
121      */
122     @Override
123     public WebWindow getEnclosingWindow() {
124         return enclosingWindow_;
125     }
126 
127     /**
128      * Sets the window that contains this page.
129      *
130      * @param window the new frame or null if this page is being removed from a frame
131      */
132     public void setEnclosingWindow(final WebWindow window) {
133         enclosingWindow_ = window;
134     }
135 
136     /**
137      * Returns the WebClient that originally loaded this page.
138      *
139      * @return the WebClient that originally loaded this page
140      */
141     public WebClient getWebClient() {
142         return webClient_;
143     }
144 
145     /**
146      * Creates an empty {@link DomDocumentFragment} object.
147      * @return a newly created {@link DomDocumentFragment}
148      */
149     @Override
150     public DomDocumentFragment createDocumentFragment() {
151         return new DomDocumentFragment(this);
152     }
153 
154     /**
155      * Returns the document type.
156      * @return the document type
157      */
158     @Override
159     public final DocumentType getDoctype() {
160         return documentType_;
161     }
162 
163     /**
164      * Sets the document type.
165      * @param type the document type
166      */
167     protected void setDocumentType(final DocumentType type) {
168         documentType_ = type;
169     }
170 
171     /**
172      * {@inheritDoc}
173      */
174     @Override
175     public SgmlPage getPage() {
176         return this;
177     }
178 
179     /**
180      * Creates an element, the type of which depends on the specified tag name.
181      * @param tagName the tag name which determines the type of element to be created
182      * @return an element, the type of which depends on the specified tag name
183      */
184     @Override
185     public abstract Element createElement(String tagName);
186 
187     /**
188      * Create a new Element with the given namespace and qualified name.
189      * @param namespaceURI the URI that identifies an XML namespace
190      * @param qualifiedName the qualified name of the element type to instantiate
191      * @return the new element
192      */
193     @Override
194     public abstract Element createElementNS(String namespaceURI, String qualifiedName);
195 
196     /**
197      * Returns the encoding.
198      * @return the encoding
199      */
200     public abstract Charset getCharset();
201 
202     /**
203      * Returns the document element.
204      * @return the document element
205      */
206     @Override
207     public DomElement getDocumentElement() {
208         DomNode childNode = getFirstChild();
209         while (childNode != null && !(childNode instanceof DomElement)) {
210             childNode = childNode.getNextSibling();
211         }
212         return (DomElement) childNode;
213     }
214 
215     /**
216      * Creates a clone of this instance.
217      * @return a clone of this instance
218      */
219     @Override
220     protected SgmlPage clone() {
221         try {
222             final SgmlPage result = (SgmlPage) super.clone();
223             return result;
224         }
225         catch (final CloneNotSupportedException e) {
226             throw new IllegalStateException("Clone not supported");
227         }
228     }
229 
230     /**
231      * {@inheritDoc}
232      */
233     @Override
234     public String asXml() {
235         final DomElement documentElement = getDocumentElement();
236         if (documentElement == null) {
237             return "";
238         }
239         return documentElement.asXml();
240     }
241 
242     /**
243      * Returns {@code true} if this page has case-sensitive tag names, {@code false} otherwise. In general,
244      * XML has case-sensitive tag names, and HTML doesn't. This is especially important during XPath matching.
245      * @return {@code true} if this page has case-sensitive tag names, {@code false} otherwise
246      */
247     public abstract boolean hasCaseSensitiveTagNames();
248 
249     /**
250      * {@inheritDoc}
251      * The current implementation just {@link DomNode#normalize()}s the document element.
252      */
253     @Override
254     public void normalizeDocument() {
255         getDocumentElement().normalize();
256     }
257 
258     /**
259      * {@inheritDoc}
260      */
261     @Override
262     public String getCanonicalXPath() {
263         return "/";
264     }
265 
266     /**
267      * {@inheritDoc}
268      */
269     @Override
270     public DomAttr createAttribute(final String name) {
271         return new DomAttr(getPage(), null, name, "", false);
272     }
273 
274     /**
275      * Returns the URL of this page.
276      * @return the URL of this page
277      */
278     @Override
279     public URL getUrl() {
280         return getWebResponse().getWebRequest().getUrl();
281     }
282 
283     @Override
284     public boolean isHtmlPage() {
285         return false;
286     }
287 
288     /**
289      * {@inheritDoc}
290      */
291     @Override
292     public DomNodeList<DomElement> getElementsByTagName(final String tagName) {
293         return new AbstractDomNodeList<DomElement>(this) {
294             @Override
295             protected List<DomElement> provideElements() {
296                 final List<DomElement> res = new LinkedList<>();
297                 final boolean caseSensitive = hasCaseSensitiveTagNames();
298                 for (final DomElement elem : getDomElementDescendants()) {
299                     final String localName = elem.getLocalName();
300                     if ("*".equals(tagName) || localName.equals(tagName)
301                             || (!caseSensitive && localName.equalsIgnoreCase(tagName))) {
302                         res.add(elem);
303                     }
304                 }
305                 return res;
306             }
307         };
308     }
309 
310     /**
311      * {@inheritDoc}
312      */
313     @Override
314     public DomNodeList<DomElement> getElementsByTagNameNS(final String namespaceURI, final String localName) {
315         return new AbstractDomNodeList<DomElement>(this) {
316             @Override
317             protected List<DomElement> provideElements() {
318                 final List<DomElement> res = new LinkedList<>();
319                 final Comparator<String> comparator;
320 
321                 if (hasCaseSensitiveTagNames()) {
322                     comparator = Comparator.nullsFirst(String::compareTo);
323                 }
324                 else {
325                     comparator = Comparator.nullsFirst(String::compareToIgnoreCase);
326                 }
327 
328                 for (final DomElement elem : getDomElementDescendants()) {
329                     final String locName = elem.getLocalName();
330 
331                     if (("*".equals(namespaceURI) || comparator.compare(namespaceURI, elem.getNamespaceURI()) == 0)
332                             && ("*".equals(locName) || comparator.compare(locName, elem.getLocalName()) == 0)) {
333                         res.add(elem);
334                     }
335                 }
336                 return res;
337             }
338         };
339     }
340 
341     /**
342      * {@inheritDoc}
343      */
344     @Override
345     public CDATASection createCDATASection(final String data) {
346         return new DomCDataSection(this, data);
347     }
348 
349     /**
350      * {@inheritDoc}
351      */
352     @Override
353     public Text createTextNode(final String data) {
354         return new DomText(this, data);
355     }
356 
357     /**
358      * {@inheritDoc}
359      */
360     @Override
361     public Comment createComment(final String data) {
362         return new DomComment(this, data);
363     }
364 
365     /**
366      * {@inheritDoc}
367      */
368     @Override
369     public DomTreeWalker createTreeWalker(final Node root, final int whatToShow, final NodeFilter filter,
370             final boolean entityReferenceExpansion) throws DOMException {
371         return new DomTreeWalker((DomNode) root, whatToShow, filter, entityReferenceExpansion);
372     }
373 
374     /**
375      * {@inheritDoc}
376      */
377     @Override
378     public DomNodeIterator createNodeIterator(final Node root, final int whatToShow, final NodeFilter filter,
379             final boolean entityReferenceExpansion) throws DOMException {
380         return new DomNodeIterator((DomNode) root, whatToShow, filter, entityReferenceExpansion);
381     }
382 
383     /**
384      * Returns the content type of this page.
385      * @return the content type of this page
386      */
387     public abstract String getContentType();
388 }