View Javadoc
1   /*
2    * Copyright (c) 2002-2017 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package com.gargoylesoftware.htmlunit;
16  
17  import java.io.IOException;
18  import java.net.URL;
19  import java.nio.charset.Charset;
20  import java.util.Comparator;
21  import java.util.LinkedList;
22  import java.util.List;
23  
24  import org.w3c.dom.CDATASection;
25  import org.w3c.dom.Comment;
26  import org.w3c.dom.DOMException;
27  import org.w3c.dom.Document;
28  import org.w3c.dom.DocumentType;
29  import org.w3c.dom.Element;
30  import org.w3c.dom.Node;
31  import org.w3c.dom.Text;
32  import org.w3c.dom.traversal.DocumentTraversal;
33  import org.w3c.dom.traversal.NodeFilter;
34  
35  import com.gargoylesoftware.htmlunit.html.AbstractDomNodeList;
36  import com.gargoylesoftware.htmlunit.html.DomAttr;
37  import com.gargoylesoftware.htmlunit.html.DomCDataSection;
38  import com.gargoylesoftware.htmlunit.html.DomComment;
39  import com.gargoylesoftware.htmlunit.html.DomDocumentFragment;
40  import com.gargoylesoftware.htmlunit.html.DomElement;
41  import com.gargoylesoftware.htmlunit.html.DomNode;
42  import com.gargoylesoftware.htmlunit.html.DomNodeIterator;
43  import com.gargoylesoftware.htmlunit.html.DomNodeList;
44  import com.gargoylesoftware.htmlunit.html.DomText;
45  import com.gargoylesoftware.htmlunit.html.DomTreeWalker;
46  
47  /**
48   * A basic class of Standard Generalized Markup Language (SGML), e.g. HTML and XML.
49   *
50   * @author Ahmed Ashour
51   * @author Ronald Brill
52   */
53  public abstract class SgmlPage extends DomNode implements Page, Document, DocumentTraversal {
54  
55      private DocumentType documentType_;
56      private final WebResponse webResponse_;
57      private WebWindow enclosingWindow_;
58      private final WebClient webClient_;
59  
60      /**
61       * Creates an instance of SgmlPage.
62       *
63       * @param webResponse the web response that was used to create this page
64       * @param webWindow the window that this page is being loaded into
65       */
66      public SgmlPage(final WebResponse webResponse, final WebWindow webWindow) {
67          super(null);
68          webResponse_ = webResponse;
69          enclosingWindow_ = webWindow;
70          webClient_ = webWindow.getWebClient();
71      }
72  
73      /**
74       * {@inheritDoc}
75       */
76      @Override
77      public void cleanUp() {
78          if (getWebClient().getCache().getCachedResponse(webResponse_.getWebRequest()) == null) {
79              webResponse_.cleanUp();
80          }
81      }
82  
83      /**
84       * {@inheritDoc}
85       */
86      @Override
87      public WebResponse getWebResponse() {
88          return webResponse_;
89      }
90  
91      /**
92       * {@inheritDoc}
93       */
94      @Override
95      public void initialize() throws IOException {
96          // nothing to do here
97      }
98  
99      /**
100      * Gets the name for the current node.
101      * @return the node name
102      */
103     @Override
104     public String getNodeName() {
105         return "#document";
106     }
107 
108     /**
109      * Gets the type of the current node.
110      * @return the node type
111      */
112     @Override
113     public short getNodeType() {
114         return org.w3c.dom.Node.DOCUMENT_NODE;
115     }
116 
117     /**
118      * Returns the window that this page is sitting inside.
119      *
120      * @return the enclosing frame or null if this page isn't inside a frame
121      */
122     @Override
123     public WebWindow getEnclosingWindow() {
124         return enclosingWindow_;
125     }
126 
127     /**
128      * Sets the window that contains this page.
129      *
130      * @param window the new frame or null if this page is being removed from a frame
131      */
132     public void setEnclosingWindow(final WebWindow window) {
133         enclosingWindow_ = window;
134     }
135 
136     /**
137      * Returns the WebClient that originally loaded this page.
138      *
139      * @return the WebClient that originally loaded this page
140      */
141     public WebClient getWebClient() {
142         return webClient_;
143     }
144 
145     /**
146      * Creates an empty {@link DomDocumentFragment} object.
147      * @return a newly created {@link DomDocumentFragment}
148      */
149     @Override
150     public DomDocumentFragment createDocumentFragment() {
151         return new DomDocumentFragment(this);
152     }
153 
154     /**
155      * Returns the document type.
156      * @return the document type
157      */
158     @Override
159     public final DocumentType getDoctype() {
160         return documentType_;
161     }
162 
163     /**
164      * Sets the document type.
165      * @param type the document type
166      */
167     protected void setDocumentType(final DocumentType type) {
168         documentType_ = type;
169     }
170 
171     /**
172      * {@inheritDoc}
173      */
174     @Override
175     public SgmlPage getPage() {
176         return this;
177     }
178 
179     /**
180      * Creates an element, the type of which depends on the specified tag name.
181      * @param tagName the tag name which determines the type of element to be created
182      * @return an element, the type of which depends on the specified tag name
183      */
184     @Override
185     public abstract Element createElement(String tagName);
186 
187     /**
188      * Create a new Element with the given namespace and qualified name.
189      * @param namespaceURI the URI that identifies an XML namespace
190      * @param qualifiedName the qualified name of the element type to instantiate
191      * @return the new element
192      */
193     @Override
194     public abstract Element createElementNS(String namespaceURI, String qualifiedName);
195 
196     /**
197      * Returns the page encoding.
198      * @return the page encoding
199      * @deprecated as of 2.25, please use {@link #getCharset()} instead
200      */
201     @Deprecated
202     public String getPageEncoding() {
203         final Charset charset = getCharset();
204         return charset == null ? null : charset.name();
205     }
206 
207     /**
208      * Returns the encoding.
209      * @return the encoding
210      */
211     public abstract Charset getCharset();
212 
213     /**
214      * Returns the document element.
215      * @return the document element
216      */
217     @Override
218     public DomElement getDocumentElement() {
219         DomNode childNode = getFirstChild();
220         while (childNode != null && !(childNode instanceof DomElement)) {
221             childNode = childNode.getNextSibling();
222         }
223         return (DomElement) childNode;
224     }
225 
226     /**
227      * Creates a clone of this instance.
228      * @return a clone of this instance
229      */
230     @Override
231     protected SgmlPage clone() {
232         try {
233             final SgmlPage result = (SgmlPage) super.clone();
234             return result;
235         }
236         catch (final CloneNotSupportedException e) {
237             throw new IllegalStateException("Clone not supported");
238         }
239     }
240 
241     /**
242      * {@inheritDoc}
243      */
244     @Override
245     public String asXml() {
246         final DomElement documentElement = getDocumentElement();
247         if (documentElement == null) {
248             return "";
249         }
250         return documentElement.asXml();
251     }
252 
253     /**
254      * Returns {@code true} if this page has case-sensitive tag names, {@code false} otherwise. In general,
255      * XML has case-sensitive tag names, and HTML doesn't. This is especially important during XPath matching.
256      * @return {@code true} if this page has case-sensitive tag names, {@code false} otherwise
257      */
258     public abstract boolean hasCaseSensitiveTagNames();
259 
260     /**
261      * {@inheritDoc}
262      * The current implementation just {@link DomNode#normalize()}s the document element.
263      */
264     @Override
265     public void normalizeDocument() {
266         getDocumentElement().normalize();
267     }
268 
269     /**
270      * {@inheritDoc}
271      */
272     @Override
273     public String getCanonicalXPath() {
274         return "/";
275     }
276 
277     /**
278      * {@inheritDoc}
279      */
280     @Override
281     public DomAttr createAttribute(final String name) {
282         return new DomAttr(getPage(), null, name, "", false);
283     }
284 
285     /**
286      * Returns the URL of this page.
287      * @return the URL of this page
288      */
289     @Override
290     public URL getUrl() {
291         return getWebResponse().getWebRequest().getUrl();
292     }
293 
294     @Override
295     public boolean isHtmlPage() {
296         return false;
297     }
298 
299     /**
300      * {@inheritDoc}
301      */
302     @Override
303     public DomNodeList<DomElement> getElementsByTagName(final String tagName) {
304         return new AbstractDomNodeList<DomElement>(this) {
305             @Override
306             protected List<DomElement> provideElements() {
307                 final List<DomElement> res = new LinkedList<>();
308                 final boolean caseSensitive = hasCaseSensitiveTagNames();
309                 for (final DomElement elem : getDomElementDescendants()) {
310                     final String localName = elem.getLocalName();
311                     if ("*".equals(tagName) || localName.equals(tagName)
312                             || (!caseSensitive && localName.equalsIgnoreCase(tagName))) {
313                         res.add(elem);
314                     }
315                 }
316                 return res;
317             }
318         };
319     }
320 
321     /**
322      * {@inheritDoc}
323      */
324     @Override
325     public DomNodeList<DomElement> getElementsByTagNameNS(final String namespaceURI, final String localName) {
326         return new AbstractDomNodeList<DomElement>(this) {
327             @Override
328             protected List<DomElement> provideElements() {
329                 final List<DomElement> res = new LinkedList<>();
330                 final Comparator<String> comparator;
331 
332                 if (hasCaseSensitiveTagNames()) {
333                     comparator = Comparator.nullsFirst(String::compareTo);
334                 }
335                 else {
336                     comparator = Comparator.nullsFirst(String::compareToIgnoreCase);
337                 }
338 
339                 for (final DomElement elem : getDomElementDescendants()) {
340                     final String locName = elem.getLocalName();
341 
342                     if (("*".equals(namespaceURI) || comparator.compare(namespaceURI, elem.getNamespaceURI()) == 0)
343                             && ("*".equals(locName) || comparator.compare(locName, elem.getLocalName()) == 0)) {
344                         res.add(elem);
345                     }
346                 }
347                 return res;
348             }
349         };
350     }
351 
352     /**
353      * {@inheritDoc}
354      */
355     @Override
356     public CDATASection createCDATASection(final String data) {
357         return new DomCDataSection(this, data);
358     }
359 
360     /**
361      * {@inheritDoc}
362      */
363     @Override
364     public Text createTextNode(final String data) {
365         return new DomText(this, data);
366     }
367 
368     /**
369      * {@inheritDoc}
370      */
371     @Override
372     public Comment createComment(final String data) {
373         return new DomComment(this, data);
374     }
375 
376     /**
377      * {@inheritDoc}
378      */
379     @Override
380     public DomTreeWalker createTreeWalker(final Node root, final int whatToShow, final NodeFilter filter,
381             final boolean entityReferenceExpansion) throws DOMException {
382         return new DomTreeWalker((DomNode) root, whatToShow, filter, entityReferenceExpansion);
383     }
384 
385     /**
386      * {@inheritDoc}
387      */
388     @Override
389     public DomNodeIterator createNodeIterator(final Node root, final int whatToShow, final NodeFilter filter,
390             final boolean entityReferenceExpansion) throws DOMException {
391         return new DomNodeIterator((DomNode) root, whatToShow, filter, entityReferenceExpansion);
392     }
393 
394     /**
395      * Returns the content type of this page.
396      * @return the content type of this page
397      */
398     public abstract String getContentType();
399 }