View Javadoc
1   /*
2    * Copyright (c) 2002-2017 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package com.gargoylesoftware.htmlunit.html;
16  
17  import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.EVENT_FOCUS_FOCUS_IN_BLUR_OUT;
18  import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.EVENT_FOCUS_IN_FOCUS_OUT_BLUR;
19  import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.FOCUS_BODY_ELEMENT_AT_START;
20  import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_CALL_RESULT_IS_LAST_RETURN_VALUE;
21  import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_DEFERRED;
22  import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_IGNORES_UTF8_BOM_SOMETIMES;
23  import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.PAGE_SELECTION_RANGE_FROM_SELECTABLE_TEXT_INPUT;
24  import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.URL_MISSING_SLASHES;
25  import static java.nio.charset.StandardCharsets.ISO_8859_1;
26  
27  import java.io.File;
28  import java.io.IOException;
29  import java.io.ObjectInputStream;
30  import java.io.ObjectOutputStream;
31  import java.io.Serializable;
32  import java.net.MalformedURLException;
33  import java.net.URL;
34  import java.nio.charset.Charset;
35  import java.util.ArrayList;
36  import java.util.Arrays;
37  import java.util.Collection;
38  import java.util.Collections;
39  import java.util.Comparator;
40  import java.util.HashMap;
41  import java.util.LinkedHashSet;
42  import java.util.List;
43  import java.util.Locale;
44  import java.util.Map;
45  import java.util.SortedSet;
46  import java.util.TreeSet;
47  
48  import org.apache.commons.lang3.StringUtils;
49  import org.apache.commons.logging.Log;
50  import org.apache.commons.logging.LogFactory;
51  import org.apache.http.HttpStatus;
52  import org.w3c.dom.Attr;
53  import org.w3c.dom.Comment;
54  import org.w3c.dom.DOMConfiguration;
55  import org.w3c.dom.DOMException;
56  import org.w3c.dom.DOMImplementation;
57  import org.w3c.dom.Document;
58  import org.w3c.dom.DocumentType;
59  import org.w3c.dom.Element;
60  import org.w3c.dom.EntityReference;
61  import org.w3c.dom.ProcessingInstruction;
62  import org.w3c.dom.ranges.Range;
63  
64  import com.gargoylesoftware.htmlunit.BrowserVersion;
65  import com.gargoylesoftware.htmlunit.Cache;
66  import com.gargoylesoftware.htmlunit.ElementNotFoundException;
67  import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
68  import com.gargoylesoftware.htmlunit.History;
69  import com.gargoylesoftware.htmlunit.HttpHeader;
70  import com.gargoylesoftware.htmlunit.OnbeforeunloadHandler;
71  import com.gargoylesoftware.htmlunit.Page;
72  import com.gargoylesoftware.htmlunit.ScriptResult;
73  import com.gargoylesoftware.htmlunit.SgmlPage;
74  import com.gargoylesoftware.htmlunit.TopLevelWindow;
75  import com.gargoylesoftware.htmlunit.WebAssert;
76  import com.gargoylesoftware.htmlunit.WebClient;
77  import com.gargoylesoftware.htmlunit.WebRequest;
78  import com.gargoylesoftware.htmlunit.WebResponse;
79  import com.gargoylesoftware.htmlunit.WebWindow;
80  import com.gargoylesoftware.htmlunit.html.HTMLParser.HtmlUnitDOMBuilder;
81  import com.gargoylesoftware.htmlunit.html.impl.SelectableTextInput;
82  import com.gargoylesoftware.htmlunit.html.impl.SimpleRange;
83  import com.gargoylesoftware.htmlunit.javascript.AbstractJavaScriptEngine;
84  import com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine;
85  import com.gargoylesoftware.htmlunit.javascript.PostponedAction;
86  import com.gargoylesoftware.htmlunit.javascript.SimpleScriptable;
87  import com.gargoylesoftware.htmlunit.javascript.host.Window;
88  import com.gargoylesoftware.htmlunit.javascript.host.dom.Node;
89  import com.gargoylesoftware.htmlunit.javascript.host.event.BeforeUnloadEvent;
90  import com.gargoylesoftware.htmlunit.javascript.host.event.Event;
91  import com.gargoylesoftware.htmlunit.javascript.host.html.HTMLDocument;
92  import com.gargoylesoftware.htmlunit.protocol.javascript.JavaScriptURLConnection;
93  import com.gargoylesoftware.htmlunit.util.EncodingSniffer;
94  import com.gargoylesoftware.htmlunit.util.UrlUtils;
95  
96  import net.sourceforge.htmlunit.corejs.javascript.Context;
97  import net.sourceforge.htmlunit.corejs.javascript.Function;
98  import net.sourceforge.htmlunit.corejs.javascript.Script;
99  import net.sourceforge.htmlunit.corejs.javascript.Scriptable;
100 import net.sourceforge.htmlunit.corejs.javascript.ScriptableObject;
101 import net.sourceforge.htmlunit.corejs.javascript.Undefined;
102 
103 /**
104  * A representation of an HTML page returned from a server.
105  * <p>
106  * This class provides different methods to access the page's content like
107  * {@link #getForms()}, {@link #getAnchors()}, {@link #getElementById(String)}, ... as well as the
108  * very powerful inherited methods {@link #getByXPath(String)} and {@link #getFirstByXPath(String)}
109  * for fine grained user specific access to child nodes.
110  * </p>
111  * <p>
112  * Child elements allowing user interaction provide methods for this purpose like {@link HtmlAnchor#click()},
113  * {@link HtmlInput#type(String)}, {@link HtmlOption#setSelected(boolean)}, ...
114  * </p>
115  * <p>
116  * HtmlPage instances should not be instantiated directly. They will be returned by {@link WebClient#getPage(String)}
117  * when the content type of the server's response is <code>text/html</code> (or one of its variations).<br>
118  * <br>
119  * <b>Example:</b><br>
120  * <br>
121  * <code>
122  * final HtmlPage page = webClient.{@link WebClient#getPage(String) getPage}("http://mywebsite/some/page.html");
123  * </code>
124  * </p>
125  *
126  * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
127  * @author Alex Nikiforoff
128  * @author Noboru Sinohara
129  * @author David K. Taylor
130  * @author Andreas Hangler
131  * @author <a href="mailto:cse@dynabean.de">Christian Sell</a>
132  * @author Chris Erskine
133  * @author Marc Guillemot
134  * @author Ahmed Ashour
135  * @author Daniel Gredler
136  * @author Dmitri Zoubkov
137  * @author Sudhan Moghe
138  * @author Ethan Glasser-Camp
139  * @author <a href="mailto:tom.anderson@univ.oxon.org">Tom Anderson</a>
140  * @author Ronald Brill
141  * @author Frank Danek
142  * @author Joerg Werner
143  */
144 public class HtmlPage extends SgmlPage {
145 
146     private static final Log LOG = LogFactory.getLog(HtmlPage.class);
147 
148     private static final Comparator<DomElement> documentPositionComparator = new DocumentPositionComparator();
149 
150     private HtmlUnitDOMBuilder builder_;
151     private transient Charset originalCharset_;
152 
153     private Map<String, SortedSet<DomElement>> idMap_
154             = Collections.synchronizedMap(new HashMap<String, SortedSet<DomElement>>());
155     private Map<String, SortedSet<DomElement>> nameMap_
156             = Collections.synchronizedMap(new HashMap<String, SortedSet<DomElement>>());
157 
158     private SortedSet<BaseFrameElement> frameElements_ = new TreeSet<>(documentPositionComparator);
159     private int parserCount_;
160     private int snippetParserCount_;
161     private int inlineSnippetParserCount_;
162     private Collection<HtmlAttributeChangeListener> attributeListeners_;
163     private final Object lock_ = new String(); // used for synchronization
164     private List<PostponedAction> afterLoadActions_ = Collections.synchronizedList(new ArrayList<PostponedAction>());
165     private boolean cleaning_;
166     private HtmlBase base_;
167     private URL baseUrl_;
168     private List<AutoCloseable> autoCloseableList_;
169     private ElementFromPointHandler elementFromPointHandler_;
170     private DomElement elementWithFocus_;
171     private List<Range> selectionRanges_ = new ArrayList<>(3);
172 
173     private static final List<String> TABBABLE_TAGS = Arrays.asList(HtmlAnchor.TAG_NAME, HtmlArea.TAG_NAME,
174             HtmlButton.TAG_NAME, HtmlInput.TAG_NAME, HtmlObject.TAG_NAME, HtmlSelect.TAG_NAME, HtmlTextArea.TAG_NAME);
175     private static final List<String> ACCEPTABLE_TAG_NAMES = Arrays.asList(HtmlAnchor.TAG_NAME, HtmlArea.TAG_NAME,
176             HtmlButton.TAG_NAME, HtmlInput.TAG_NAME, HtmlLabel.TAG_NAME, HtmlLegend.TAG_NAME, HtmlTextArea.TAG_NAME);
177 
178     static class DocumentPositionComparator implements Comparator<DomElement>, Serializable {
179         @Override
180         public int compare(final DomElement elt1, final DomElement elt2) {
181             final short relation = elt1.compareDocumentPosition(elt2);
182             if (relation == 0) {
183                 return 0; // same node
184             }
185             if ((relation & DOCUMENT_POSITION_CONTAINS) != 0 || (relation & DOCUMENT_POSITION_PRECEDING) != 0) {
186                 return 1;
187             }
188 
189             return -1;
190         }
191     }
192 
193     /**
194      * Creates an instance of HtmlPage.
195      * An HtmlPage instance is normally retrieved with {@link WebClient#getPage(String)}.
196      *
197      * @param webResponse the web response that was used to create this page
198      * @param webWindow the window that this page is being loaded into
199      */
200     public HtmlPage(final WebResponse webResponse, final WebWindow webWindow) {
201         super(webResponse, webWindow);
202     }
203 
204     /**
205      * {@inheritDoc}
206      */
207     @Override
208     public HtmlPage getPage() {
209         return this;
210     }
211 
212     /**
213      * {@inheritDoc}
214      */
215     @Override
216     public boolean hasCaseSensitiveTagNames() {
217         return false;
218     }
219 
220     /**
221      * Initialize this page.
222      * @throws IOException if an IO problem occurs
223      * @throws FailingHttpStatusCodeException if the server returns a failing status code AND the property
224      * {@link com.gargoylesoftware.htmlunit.WebClientOptions#setThrowExceptionOnFailingStatusCode(boolean)} is set
225      * to true.
226      */
227     @Override
228     public void initialize() throws IOException, FailingHttpStatusCodeException {
229         final WebWindow enclosingWindow = getEnclosingWindow();
230         final boolean isAboutBlank = getUrl() == WebClient.URL_ABOUT_BLANK;
231         if (isAboutBlank) {
232             // a frame contains first a faked "about:blank" before its real content specified by src gets loaded
233             if (enclosingWindow instanceof FrameWindow
234                     && !((FrameWindow) enclosingWindow).getFrameElement().isContentLoaded()) {
235                 return;
236             }
237 
238             // save the URL that should be used to resolve relative URLs in this page
239             if (enclosingWindow instanceof TopLevelWindow) {
240                 final TopLevelWindow topWindow = (TopLevelWindow) enclosingWindow;
241                 final WebWindow openerWindow = topWindow.getOpener();
242                 if (openerWindow != null && openerWindow.getEnclosedPage() != null) {
243                     baseUrl_ = openerWindow.getEnclosedPage().getWebResponse().getWebRequest().getUrl();
244                 }
245             }
246         }
247         loadFrames();
248 
249         // don't set the ready state if we really load the blank page into the window
250         // see Node.initInlineFrameIfNeeded()
251         if (!isAboutBlank) {
252             if (hasFeature(FOCUS_BODY_ELEMENT_AT_START)) {
253                 setElementWithFocus(getBody());
254             }
255             setReadyState(READY_STATE_COMPLETE);
256             getDocumentElement().setReadyState(READY_STATE_COMPLETE);
257         }
258 
259         executeEventHandlersIfNeeded(Event.TYPE_DOM_DOCUMENT_LOADED);
260         executeDeferredScriptsIfNeeded();
261         setReadyStateOnDeferredScriptsIfNeeded();
262 
263         // frame initialization has a different order
264         boolean isFrameWindow = enclosingWindow instanceof FrameWindow;
265         boolean isFirstPageInFrameWindow = false;
266         if (isFrameWindow) {
267             isFrameWindow = ((FrameWindow) enclosingWindow).getFrameElement() instanceof HtmlFrame;
268 
269             final History hist = enclosingWindow.getHistory();
270             if (hist.getLength() > 0 && WebClient.URL_ABOUT_BLANK == hist.getUrl(0)) {
271                 isFirstPageInFrameWindow = hist.getLength() <= 2;
272             }
273             else {
274                 isFirstPageInFrameWindow = enclosingWindow.getHistory().getLength() < 2;
275             }
276         }
277 
278         if (isFrameWindow && !isFirstPageInFrameWindow) {
279             executeEventHandlersIfNeeded(Event.TYPE_LOAD);
280         }
281 
282         for (final FrameWindow frameWindow : getFrames()) {
283             if (frameWindow.getFrameElement() instanceof HtmlFrame) {
284                 final Page page = frameWindow.getEnclosedPage();
285                 if (page != null && page.isHtmlPage()) {
286                     ((HtmlPage) page).executeEventHandlersIfNeeded(Event.TYPE_LOAD);
287                 }
288             }
289         }
290 
291         if (!isFrameWindow) {
292             executeEventHandlersIfNeeded(Event.TYPE_LOAD);
293         }
294 
295         try {
296             while (!afterLoadActions_.isEmpty()) {
297                 final PostponedAction action = afterLoadActions_.remove(0);
298                 action.execute();
299             }
300         }
301         catch (final IOException e) {
302             throw e;
303         }
304         catch (final Exception e) {
305             throw new RuntimeException(e);
306         }
307         executeRefreshIfNeeded();
308     }
309 
310     /**
311      * Adds an action that should be executed once the page has been loaded.
312      * @param action the action
313      */
314     void addAfterLoadAction(final PostponedAction action) {
315         afterLoadActions_.add(action);
316     }
317 
318     /**
319      * Clean up this page.
320      */
321     @Override
322     public void cleanUp() {
323         //To avoid endless recursion caused by window.close() in onUnload
324         if (cleaning_) {
325             return;
326         }
327         cleaning_ = true;
328         super.cleanUp();
329         executeEventHandlersIfNeeded(Event.TYPE_UNLOAD);
330         deregisterFramesIfNeeded();
331         cleaning_ = false;
332         if (autoCloseableList_ != null) {
333             for (final AutoCloseable closeable : new ArrayList<>(autoCloseableList_)) {
334                 try {
335                     closeable.close();
336                 }
337                 catch (final Exception e) {
338                     throw new RuntimeException(e);
339                 }
340             }
341         }
342     }
343 
344     /**
345      * {@inheritDoc}
346      */
347     @Override
348     public HtmlElement getDocumentElement() {
349         return (HtmlElement) super.getDocumentElement();
350     }
351 
352     /**
353      * Returns the <tt>body</tt> element (or <tt>frameset</tt> element), or {@code null} if it does not yet exist.
354      * @return the <tt>body</tt> element (or <tt>frameset</tt> element), or {@code null} if it does not yet exist
355      */
356     public HtmlElement getBody() {
357         final DomElement doc = getDocumentElement();
358         if (doc != null) {
359             for (final DomNode node : doc.getChildren()) {
360                 if (node instanceof HtmlBody || node instanceof HtmlFrameSet) {
361                     return (HtmlElement) node;
362                 }
363             }
364         }
365         return null;
366     }
367 
368     /**
369      * Returns the head element.
370      * @return the head element
371      */
372     public HtmlElement getHead() {
373         final DomElement doc = getDocumentElement();
374         if (doc != null) {
375             for (final DomNode node : doc.getChildren()) {
376                 if (node instanceof HtmlHead) {
377                     return (HtmlElement) node;
378                 }
379             }
380         }
381         return null;
382     }
383 
384     /**
385      * {@inheritDoc}
386      */
387     @Override
388     public Document getOwnerDocument() {
389         return null;
390     }
391 
392     /**
393      * {@inheritDoc}
394      * Not yet implemented.
395      */
396     @Override
397     public org.w3c.dom.Node importNode(final org.w3c.dom.Node importedNode, final boolean deep) {
398         throw new UnsupportedOperationException("HtmlPage.importNode is not yet implemented.");
399     }
400 
401     /**
402      * {@inheritDoc}
403      * Not yet implemented.
404      */
405     @Override
406     public String getInputEncoding() {
407         throw new UnsupportedOperationException("HtmlPage.getInputEncoding is not yet implemented.");
408     }
409 
410     /**
411      * {@inheritDoc}
412      */
413     @Override
414     public String getXmlEncoding() {
415         return null;
416     }
417 
418     /**
419      * {@inheritDoc}
420      */
421     @Override
422     public boolean getXmlStandalone() {
423         return false;
424     }
425 
426     /**
427      * {@inheritDoc}
428      * Not yet implemented.
429      */
430     @Override
431     public void setXmlStandalone(final boolean xmlStandalone) throws DOMException {
432         throw new UnsupportedOperationException("HtmlPage.setXmlStandalone is not yet implemented.");
433     }
434 
435     /**
436      * {@inheritDoc}
437      */
438     @Override
439     public String getXmlVersion() {
440         return null;
441     }
442 
443     /**
444      * {@inheritDoc}
445      * Not yet implemented.
446      */
447     @Override
448     public void setXmlVersion(final String xmlVersion) throws DOMException {
449         throw new UnsupportedOperationException("HtmlPage.setXmlVersion is not yet implemented.");
450     }
451 
452     /**
453      * {@inheritDoc}
454      * Not yet implemented.
455      */
456     @Override
457     public boolean getStrictErrorChecking() {
458         throw new UnsupportedOperationException("HtmlPage.getStrictErrorChecking is not yet implemented.");
459     }
460 
461     /**
462      * {@inheritDoc}
463      * Not yet implemented.
464      */
465     @Override
466     public void setStrictErrorChecking(final boolean strictErrorChecking) {
467         throw new UnsupportedOperationException("HtmlPage.setStrictErrorChecking is not yet implemented.");
468     }
469 
470     /**
471      * {@inheritDoc}
472      * Not yet implemented.
473      */
474     @Override
475     public String getDocumentURI() {
476         throw new UnsupportedOperationException("HtmlPage.getDocumentURI is not yet implemented.");
477     }
478 
479     /**
480      * {@inheritDoc}
481      * Not yet implemented.
482      */
483     @Override
484     public void setDocumentURI(final String documentURI) {
485         throw new UnsupportedOperationException("HtmlPage.setDocumentURI is not yet implemented.");
486     }
487 
488     /**
489      * {@inheritDoc}
490      * Not yet implemented.
491      */
492     @Override
493     public org.w3c.dom.Node adoptNode(final org.w3c.dom.Node source) throws DOMException {
494         throw new UnsupportedOperationException("HtmlPage.adoptNode is not yet implemented.");
495     }
496 
497     /**
498      * {@inheritDoc}
499      * Not yet implemented.
500      */
501     @Override
502     public DOMConfiguration getDomConfig() {
503         throw new UnsupportedOperationException("HtmlPage.getDomConfig is not yet implemented.");
504     }
505 
506     /**
507      * {@inheritDoc}
508      * Not yet implemented.
509      */
510     @Override
511     public org.w3c.dom.Node renameNode(final org.w3c.dom.Node newNode, final String namespaceURI,
512         final String qualifiedName) throws DOMException {
513         throw new UnsupportedOperationException("HtmlPage.renameNode is not yet implemented.");
514     }
515 
516     /**
517      * {@inheritDoc}
518      */
519     @Override
520     public Charset getCharset() {
521         if (originalCharset_ == null) {
522             originalCharset_ = getWebResponse().getContentCharset();
523         }
524         return originalCharset_;
525     }
526 
527     /**
528      * {@inheritDoc}
529      */
530     @Override
531     public String getContentType() {
532         return getWebResponse().getContentType();
533     }
534 
535     /**
536      * {@inheritDoc}
537      * Not yet implemented.
538      */
539     @Override
540     public DOMImplementation getImplementation() {
541         throw new UnsupportedOperationException("HtmlPage.getImplementation is not yet implemented.");
542     }
543 
544     /**
545      * {@inheritDoc}
546      * @param tagName the tag name, preferably in lowercase
547      */
548     @Override
549     public DomElement createElement(String tagName) {
550         if (tagName.indexOf(':') == -1) {
551             tagName = tagName.toLowerCase(Locale.ROOT);
552         }
553         return HTMLParser.getFactory(tagName).createElementNS(this, null, tagName, null, true);
554     }
555 
556     /**
557      * {@inheritDoc}
558      */
559     @Override
560     public DomElement createElementNS(final String namespaceURI, final String qualifiedName) {
561         return HTMLParser.getElementFactory(this, namespaceURI, qualifiedName, false, true)
562             .createElementNS(this, namespaceURI, qualifiedName, null, true);
563     }
564 
565     /**
566      * {@inheritDoc}
567      * Not yet implemented.
568      */
569     @Override
570     public Attr createAttributeNS(final String namespaceURI, final String qualifiedName) {
571         throw new UnsupportedOperationException("HtmlPage.createAttributeNS is not yet implemented.");
572     }
573 
574     /**
575      * {@inheritDoc}
576      * Not yet implemented.
577      */
578     @Override
579     public EntityReference createEntityReference(final String id) {
580         throw new UnsupportedOperationException("HtmlPage.createEntityReference is not yet implemented.");
581     }
582 
583     /**
584      * {@inheritDoc}
585      * Not yet implemented.
586      */
587     @Override
588     public ProcessingInstruction createProcessingInstruction(final String namespaceURI, final String qualifiedName) {
589         throw new UnsupportedOperationException("HtmlPage.createProcessingInstruction is not yet implemented.");
590     }
591 
592     /**
593      * {@inheritDoc}
594      */
595     @Override
596     public DomElement getElementById(final String elementId) {
597         final SortedSet<DomElement> elements = idMap_.get(elementId);
598         if (elements != null) {
599             return elements.first();
600         }
601         return null;
602     }
603 
604     /**
605      * Returns the {@link HtmlAnchor} with the specified name.
606      *
607      * @param name the name to search by
608      * @return the {@link HtmlAnchor} with the specified name
609      * @throws ElementNotFoundException if the anchor could not be found
610      */
611     public HtmlAnchor getAnchorByName(final String name) throws ElementNotFoundException {
612         return getDocumentElement().getOneHtmlElementByAttribute("a", "name", name);
613     }
614 
615     /**
616      * Returns the {@link HtmlAnchor} with the specified href.
617      *
618      * @param href the string to search by
619      * @return the HtmlAnchor
620      * @throws ElementNotFoundException if the anchor could not be found
621      */
622     public HtmlAnchor getAnchorByHref(final String href) throws ElementNotFoundException {
623         return getDocumentElement().getOneHtmlElementByAttribute("a", "href", href);
624     }
625 
626     /**
627      * Returns a list of all anchors contained in this page.
628      * @return the list of {@link HtmlAnchor} in this page
629      */
630     public List<HtmlAnchor> getAnchors() {
631         return getDocumentElement().getElementsByTagNameImpl("a");
632     }
633 
634     /**
635      * Returns the first anchor with the specified text.
636      * @param text the text to search for
637      * @return the first anchor that was found
638      * @throws ElementNotFoundException if no anchors are found with the specified text
639      */
640     public HtmlAnchor getAnchorByText(final String text) throws ElementNotFoundException {
641         WebAssert.notNull("text", text);
642 
643         for (final HtmlAnchor anchor : getAnchors()) {
644             if (text.equals(anchor.asText())) {
645                 return anchor;
646             }
647         }
648         throw new ElementNotFoundException("a", "<text>", text);
649     }
650 
651     /**
652      * Returns the first form that matches the specified name.
653      * @param name the name to search for
654      * @return the first form
655      * @exception ElementNotFoundException If no forms match the specified result.
656      */
657     public HtmlForm getFormByName(final String name) throws ElementNotFoundException {
658         final List<HtmlForm> forms = getDocumentElement().getElementsByAttribute("form", "name", name);
659         if (forms.isEmpty()) {
660             throw new ElementNotFoundException("form", "name", name);
661         }
662         return forms.get(0);
663     }
664 
665     /**
666      * Returns a list of all the forms in this page.
667      * @return all the forms in this page
668      */
669     public List<HtmlForm> getForms() {
670         return getDocumentElement().getElementsByTagNameImpl("form");
671     }
672 
673     /**
674      * Given a relative URL (ie <tt>/foo</tt>), returns a fully-qualified URL based on
675      * the URL that was used to load this page.
676      *
677      * @param relativeUrl the relative URL
678      * @return the fully-qualified URL for the specified relative URL
679      * @exception MalformedURLException if an error occurred when creating a URL object
680      */
681     public URL getFullyQualifiedUrl(String relativeUrl) throws MalformedURLException {
682         final URL baseUrl = getBaseURL();
683 
684         // to handle http: and http:/ in FF (Bug #474)
685         if (hasFeature(URL_MISSING_SLASHES)) {
686             boolean incorrectnessNotified = false;
687             while (relativeUrl.startsWith("http:") && !relativeUrl.startsWith("http://")) {
688                 if (!incorrectnessNotified) {
689                     notifyIncorrectness("Incorrect URL \"" + relativeUrl + "\" has been corrected");
690                     incorrectnessNotified = true;
691                 }
692                 relativeUrl = "http:/" + relativeUrl.substring(5);
693             }
694         }
695 
696         return WebClient.expandUrl(baseUrl, relativeUrl);
697     }
698 
699     /**
700      * Given a target attribute value, resolve the target using a base target for the page.
701      *
702      * @param elementTarget the target specified as an attribute of the element
703      * @return the resolved target to use for the element
704      */
705     public String getResolvedTarget(final String elementTarget) {
706         final String resolvedTarget;
707         if (base_ == null) {
708             resolvedTarget = elementTarget;
709         }
710         else if (elementTarget != null && !elementTarget.isEmpty()) {
711             resolvedTarget = elementTarget;
712         }
713         else {
714             resolvedTarget = base_.getTargetAttribute();
715         }
716         return resolvedTarget;
717     }
718 
719     /**
720      * Returns a list of ids (strings) that correspond to the tabbable elements
721      * in this page. Return them in the same order specified in {@link #getTabbableElements}
722      *
723      * @return the list of id's
724      */
725     public List<String> getTabbableElementIds() {
726         final List<String> list = new ArrayList<>();
727 
728         for (final HtmlElement element : getTabbableElements()) {
729             list.add(element.getId());
730         }
731 
732         return Collections.unmodifiableList(list);
733     }
734 
735     /**
736      * Returns a list of all elements that are tabbable in the order that will
737      * be used for tabbing.<p>
738      *
739      * The rules for determining tab order are as follows:
740      * <ol>
741      *   <li>Those elements that support the tabindex attribute and assign a
742      *   positive value to it are navigated first. Navigation proceeds from the
743      *   element with the lowest tabindex value to the element with the highest
744      *   value. Values need not be sequential nor must they begin with any
745      *   particular value. Elements that have identical tabindex values should
746      *   be navigated in the order they appear in the character stream.
747      *   <li>Those elements that do not support the tabindex attribute or
748      *   support it and assign it a value of "0" are navigated next. These
749      *   elements are navigated in the order they appear in the character
750      *   stream.
751      *   <li>Elements that are disabled do not participate in the tabbing
752      *   order.
753      * </ol>
754      * Additionally, the value of tabindex must be within 0 and 32767. Any
755      * values outside this range will be ignored.<p>
756      *
757      * The following elements support the <tt>tabindex</tt> attribute: A, AREA, BUTTON,
758      * INPUT, OBJECT, SELECT, and TEXTAREA.<p>
759      *
760      * @return all the tabbable elements in proper tab order
761      */
762     public List<HtmlElement> getTabbableElements() {
763         final List<HtmlElement> tabbableElements = new ArrayList<>();
764         for (final HtmlElement element : getHtmlElementDescendants()) {
765             final String tagName = element.getTagName();
766             if (TABBABLE_TAGS.contains(tagName)) {
767                 final boolean disabled = element.hasAttribute("disabled");
768                 if (!disabled && element.getTabIndex() != HtmlElement.TAB_INDEX_OUT_OF_BOUNDS) {
769                     tabbableElements.add(element);
770                 }
771             }
772         }
773         Collections.sort(tabbableElements, createTabOrderComparator());
774         return Collections.unmodifiableList(tabbableElements);
775     }
776 
777     private static Comparator<HtmlElement> createTabOrderComparator() {
778         return new Comparator<HtmlElement>() {
779             @Override
780             public int compare(final HtmlElement element1, final HtmlElement element2) {
781                 final Short i1 = element1.getTabIndex();
782                 final Short i2 = element2.getTabIndex();
783 
784                 final short index1;
785                 if (i1 != null) {
786                     index1 = i1.shortValue();
787                 }
788                 else {
789                     index1 = -1;
790                 }
791 
792                 final short index2;
793                 if (i2 != null) {
794                     index2 = i2.shortValue();
795                 }
796                 else {
797                     index2 = -1;
798                 }
799 
800                 final int result;
801                 if (index1 > 0 && index2 > 0) {
802                     result = index1 - index2;
803                 }
804                 else if (index1 > 0) {
805                     result = -1;
806                 }
807                 else if (index2 > 0) {
808                     result = +1;
809                 }
810                 else if (index1 == index2) {
811                     result = 0;
812                 }
813                 else {
814                     result = index2 - index1;
815                 }
816 
817                 return result;
818             }
819         };
820     }
821 
822     /**
823      * Returns the HTML element that is assigned to the specified access key. An
824      * access key (aka mnemonic key) is used for keyboard navigation of the
825      * page.<p>
826      *
827      * Only the following HTML elements may have <tt>accesskey</tt>s defined: A, AREA,
828      * BUTTON, INPUT, LABEL, LEGEND, and TEXTAREA.
829      *
830      * @param accessKey the key to look for
831      * @return the HTML element that is assigned to the specified key or null
832      *      if no elements can be found that match the specified key.
833      */
834     public HtmlElement getHtmlElementByAccessKey(final char accessKey) {
835         final List<HtmlElement> elements = getHtmlElementsByAccessKey(accessKey);
836         if (elements.isEmpty()) {
837             return null;
838         }
839         return elements.get(0);
840     }
841 
842     /**
843      * Returns all the HTML elements that are assigned to the specified access key. An
844      * access key (aka mnemonic key) is used for keyboard navigation of the
845      * page.<p>
846      *
847      * The HTML specification seems to indicate that one accesskey cannot be used
848      * for multiple elements however Internet Explorer does seem to support this.
849      * It's worth noting that Firefox does not support multiple elements with one
850      * access key so you are making your HTML browser specific if you rely on this
851      * feature.<p>
852      *
853      * Only the following HTML elements may have <tt>accesskey</tt>s defined: A, AREA,
854      * BUTTON, INPUT, LABEL, LEGEND, and TEXTAREA.
855      *
856      * @param accessKey the key to look for
857      * @return the elements that are assigned to the specified accesskey
858      */
859     public List<HtmlElement> getHtmlElementsByAccessKey(final char accessKey) {
860         final List<HtmlElement> elements = new ArrayList<>();
861 
862         final String searchString = Character.toString(accessKey).toLowerCase(Locale.ROOT);
863         for (final HtmlElement element : getHtmlElementDescendants()) {
864             if (ACCEPTABLE_TAG_NAMES.contains(element.getTagName())) {
865                 final String accessKeyAttribute = element.getAttribute("accesskey");
866                 if (searchString.equalsIgnoreCase(accessKeyAttribute)) {
867                     elements.add(element);
868                 }
869             }
870         }
871 
872         return elements;
873     }
874 
875     /**
876      * <p>Executes the specified JavaScript code within the page. The usage would be similar to what can
877      * be achieved to execute JavaScript in the current page by entering "javascript:...some JS code..."
878      * in the URL field of a native browser.</p>
879      * <p><b>Note:</b> the provided code won't be executed if JavaScript has been disabled on the WebClient
880      * (see {@link com.gargoylesoftware.htmlunit.WebClientOptions#isJavaScriptEnabled()}.</p>
881      * @param sourceCode the JavaScript code to execute
882      * @return a ScriptResult which will contain both the current page (which may be different than
883      * the previous page) and a JavaScript result object
884      */
885     public ScriptResult executeJavaScript(final String sourceCode) {
886         return executeJavaScript(sourceCode, "injected script", 1);
887     }
888 
889     /**
890      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
891      * <p>
892      * Execute the specified JavaScript if a JavaScript engine was successfully
893      * instantiated. If this JavaScript causes the current page to be reloaded
894      * (through location="" or form.submit()) then return the new page. Otherwise
895      * return the current page.
896      * </p>
897      * <p><b>Please note:</b> Although this method is public, it is not intended for
898      * general execution of JavaScript. Users of HtmlUnit should interact with the pages
899      * as a user would by clicking on buttons or links and having the JavaScript event
900      * handlers execute as needed..
901      * </p>
902      *
903      * @param sourceCode the JavaScript code to execute
904      * @param sourceName the name for this chunk of code (will be displayed in error messages)
905      * @param startLine the line at which the script source starts
906      * @return a ScriptResult which will contain both the current page (which may be different than
907      * the previous page and a JavaScript result object.
908      */
909     public ScriptResult executeJavaScript(String sourceCode, final String sourceName, final int startLine) {
910         if (!getWebClient().getOptions().isJavaScriptEnabled()) {
911             return new ScriptResult(null, this);
912         }
913 
914         if (StringUtils.startsWithIgnoreCase(sourceCode, JavaScriptURLConnection.JAVASCRIPT_PREFIX)) {
915             sourceCode = sourceCode.substring(JavaScriptURLConnection.JAVASCRIPT_PREFIX.length()).trim();
916             if (sourceCode.startsWith("return ")) {
917                 sourceCode = sourceCode.substring("return ".length());
918             }
919         }
920 
921         final Object result = getWebClient().getJavaScriptEngine().execute(this, sourceCode, sourceName, startLine);
922         return new ScriptResult(result, getWebClient().getCurrentWindow().getEnclosedPage());
923     }
924 
925     /** Various possible external JavaScript file loading results. */
926     enum JavaScriptLoadResult {
927         /** The load was aborted and nothing was done. */
928         NOOP,
929         /** The external JavaScript file was downloaded and compiled successfully. */
930         SUCCESS,
931         /** The external JavaScript file was not downloaded successfully. */
932         DOWNLOAD_ERROR,
933         /** The external JavaScript file was downloaded but was not compiled successfully. */
934         COMPILATION_ERROR
935     }
936 
937     /**
938      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
939      *
940      * @param srcAttribute the source attribute from the script tag
941      * @param scriptCharset the charset from the script tag
942      * @return the result of loading the specified external JavaScript file
943      * @throws FailingHttpStatusCodeException if the request's status code indicates a request
944      *         failure and the {@link WebClient} was configured to throw exceptions on failing
945      *         HTTP status codes
946      */
947     JavaScriptLoadResult loadExternalJavaScriptFile(final String srcAttribute, final Charset scriptCharset)
948         throws FailingHttpStatusCodeException {
949 
950         final WebClient client = getWebClient();
951         if (StringUtils.isBlank(srcAttribute) || !client.getOptions().isJavaScriptEnabled()) {
952             return JavaScriptLoadResult.NOOP;
953         }
954 
955         final URL scriptURL;
956         try {
957             scriptURL = getFullyQualifiedUrl(srcAttribute);
958             final String protocol = scriptURL.getProtocol();
959             if ("javascript".equals(protocol)) {
960                 LOG.info("Ignoring script src [" + srcAttribute + "]");
961                 return JavaScriptLoadResult.NOOP;
962             }
963             if (!"http".equals(protocol) && !"https".equals(protocol)
964                     && !"data".equals(protocol) && !"file".equals(protocol)) {
965                 client.getJavaScriptErrorListener().malformedScriptURL(this, srcAttribute,
966                         new MalformedURLException("unknown protocol: '" + protocol + "'"));
967                 return JavaScriptLoadResult.NOOP;
968             }
969         }
970         catch (final MalformedURLException e) {
971             client.getJavaScriptErrorListener().malformedScriptURL(this, srcAttribute, e);
972             return JavaScriptLoadResult.NOOP;
973         }
974 
975         final Object script;
976         try {
977             script = loadJavaScriptFromUrl(scriptURL, scriptCharset);
978         }
979         catch (final IOException e) {
980             client.getJavaScriptErrorListener().loadScriptError(this, scriptURL, e);
981             return JavaScriptLoadResult.DOWNLOAD_ERROR;
982         }
983         catch (final FailingHttpStatusCodeException e) {
984             client.getJavaScriptErrorListener().loadScriptError(this, scriptURL, e);
985             throw e;
986         }
987 
988         if (script == null) {
989             return JavaScriptLoadResult.COMPILATION_ERROR;
990         }
991 
992         @SuppressWarnings("unchecked")
993         final AbstractJavaScriptEngine<Object> engine = (AbstractJavaScriptEngine<Object>) client.getJavaScriptEngine();
994         engine.execute(this, script);
995         return JavaScriptLoadResult.SUCCESS;
996     }
997 
998     /**
999      * Loads JavaScript from the specified URL. This method may return {@code null} if
1000      * there is a problem loading the code from the specified URL.
1001      *
1002      * @param url the URL of the script
1003      * @param scriptCharset the charset from the script tag
1004      * @return the content of the file, or {@code null} if we ran into a compile error
1005      * @throws IOException if there is a problem downloading the JavaScript file
1006      * @throws FailingHttpStatusCodeException if the request's status code indicates a request
1007      *         failure and the {@link WebClient} was configured to throw exceptions on failing
1008      *         HTTP status codes
1009      */
1010     private Object loadJavaScriptFromUrl(final URL url, final Charset scriptCharset) throws IOException,
1011         FailingHttpStatusCodeException {
1012 
1013         final WebRequest referringRequest = getWebResponse().getWebRequest();
1014 
1015         final WebClient client = getWebClient();
1016         final BrowserVersion browser = client.getBrowserVersion();
1017         final WebRequest request = new WebRequest(url, browser.getScriptAcceptHeader());
1018         request.setAdditionalHeaders(new HashMap<>(referringRequest.getAdditionalHeaders()));
1019         request.setAdditionalHeader(HttpHeader.REFERER, referringRequest.getUrl().toString());
1020         request.setAdditionalHeader(HttpHeader.ACCEPT, client.getBrowserVersion().getScriptAcceptHeader());
1021 
1022         // our cache is a bit strange;
1023         // loadWebResponse check the cache for the web response
1024         // AND also fixes the request url for the following cache lookups
1025         final WebResponse response = client.loadWebResponse(request);
1026 
1027         // now we can look into the cache with the fixed request for
1028         // a cached script
1029         final Cache cache = client.getCache();
1030         final Object cachedScript = cache.getCachedObject(request);
1031         if (cachedScript instanceof Script) {
1032             return cachedScript;
1033         }
1034 
1035         client.printContentIfNecessary(response);
1036         client.throwFailingHttpStatusCodeExceptionIfNecessary(response);
1037 
1038         final int statusCode = response.getStatusCode();
1039         final boolean successful = statusCode >= HttpStatus.SC_OK && statusCode < HttpStatus.SC_MULTIPLE_CHOICES;
1040         final boolean noContent = statusCode == HttpStatus.SC_NO_CONTENT;
1041         if (!successful || noContent) {
1042             throw new IOException("Unable to download JavaScript from '" + url + "' (status " + statusCode + ").");
1043         }
1044 
1045         //http://www.ietf.org/rfc/rfc4329.txt
1046         final String contentType = response.getContentType();
1047         if (!"application/javascript".equalsIgnoreCase(contentType)
1048             && !"application/ecmascript".equalsIgnoreCase(contentType)) {
1049             // warn about obsolete or not supported content types
1050             if ("text/javascript".equals(contentType)
1051                     || "text/ecmascript".equals(contentType)
1052                     || "application/x-javascript".equalsIgnoreCase(contentType)) {
1053                 getWebClient().getIncorrectnessListener().notify(
1054                     "Obsolete content type encountered: '" + contentType + "'.", this);
1055             }
1056             else {
1057                 getWebClient().getIncorrectnessListener().notify(
1058                         "Expected content type of 'application/javascript' or 'application/ecmascript' for "
1059                         + "remotely loaded JavaScript element at '" + url + "', "
1060                         + "but got '" + contentType + "'.", this);
1061             }
1062         }
1063 
1064         Charset scriptEncoding = Charset.forName("windows-1252");
1065         boolean ignoreBom = false;
1066         final Charset contentCharset = EncodingSniffer.sniffEncodingFromHttpHeaders(response.getResponseHeaders());
1067         if (contentCharset == null) {
1068             // use info from script tag or fall back to utf-8
1069             if (scriptCharset != null && ISO_8859_1 != scriptCharset) {
1070                 ignoreBom = true;
1071                 scriptEncoding = scriptCharset;
1072             }
1073             else {
1074                 ignoreBom = ISO_8859_1 != scriptCharset;
1075             }
1076         }
1077         else if (ISO_8859_1 != contentCharset) {
1078             ignoreBom = true;
1079             scriptEncoding = contentCharset;
1080         }
1081         else {
1082             ignoreBom = true;
1083         }
1084 
1085         final String scriptCode = response.getContentAsString(scriptEncoding,
1086                                 ignoreBom
1087                                 && getWebClient().getBrowserVersion().hasFeature(JS_IGNORES_UTF8_BOM_SOMETIMES));
1088         if (null != scriptCode) {
1089             final AbstractJavaScriptEngine<?> javaScriptEngine = client.getJavaScriptEngine();
1090             final Object script = javaScriptEngine.compile(this, scriptCode, url.toExternalForm(), 1);
1091             if (script != null && cache.cacheIfPossible(request, response, script)) {
1092                 // no cleanup if the response is stored inside the cache
1093                 return script;
1094             }
1095 
1096             response.cleanUp();
1097             return script;
1098         }
1099 
1100         response.cleanUp();
1101         return null;
1102     }
1103 
1104     /**
1105      * Returns the title of this page or an empty string if the title wasn't specified.
1106      *
1107      * @return the title of this page or an empty string if the title wasn't specified
1108      */
1109     public String getTitleText() {
1110         final HtmlTitle titleElement = getTitleElement();
1111         if (titleElement != null) {
1112             return titleElement.asText();
1113         }
1114         return "";
1115     }
1116 
1117     /**
1118      * Sets the text for the title of this page. If there is not a title element
1119      * on this page, then one has to be generated.
1120      * @param message the new text
1121      */
1122     public void setTitleText(final String message) {
1123         HtmlTitle titleElement = getTitleElement();
1124         if (titleElement == null) {
1125             if (LOG.isDebugEnabled()) {
1126                 LOG.debug("No title element, creating one");
1127             }
1128             final HtmlHead head = (HtmlHead) getFirstChildElement(getDocumentElement(), HtmlHead.class);
1129             if (head == null) {
1130                 // perhaps should we create head too?
1131                 throw new IllegalStateException("Headelement was not defined for this page");
1132             }
1133             final Map<String, DomAttr> emptyMap = Collections.emptyMap();
1134             titleElement = new HtmlTitle(HtmlTitle.TAG_NAME, this, emptyMap);
1135             if (head.getFirstChild() != null) {
1136                 head.getFirstChild().insertBefore(titleElement);
1137             }
1138             else {
1139                 head.appendChild(titleElement);
1140             }
1141         }
1142 
1143         titleElement.setNodeValue(message);
1144     }
1145 
1146     /**
1147      * Gets the first child of startElement that is an instance of the given class.
1148      * @param startElement the parent element
1149      * @param clazz the class to search for
1150      * @return {@code null} if no child found
1151      */
1152     private static DomElement getFirstChildElement(final DomElement startElement, final Class<?> clazz) {
1153         if (startElement == null) {
1154             return null;
1155         }
1156         for (final DomElement element : startElement.getChildElements()) {
1157             if (clazz.isInstance(element)) {
1158                 return element;
1159             }
1160         }
1161 
1162         return null;
1163     }
1164 
1165     /**
1166      * Gets the first child of startElement or it's children that is an instance of the given class.
1167      * @param startElement the parent element
1168      * @param clazz the class to search for
1169      * @return {@code null} if no child found
1170      */
1171     private DomElement getFirstChildElementRecursive(final DomElement startElement, final Class<?> clazz) {
1172         if (startElement == null) {
1173             return null;
1174         }
1175         for (final DomElement element : startElement.getChildElements()) {
1176             if (clazz.isInstance(element)) {
1177                 return element;
1178             }
1179             final DomElement childFound = getFirstChildElementRecursive(element, clazz);
1180             if (childFound != null) {
1181                 return childFound;
1182             }
1183         }
1184 
1185         return null;
1186     }
1187 
1188     /**
1189      * Gets the title element for this page. Returns null if one is not found.
1190      *
1191      * @return the title element for this page or null if this is not one
1192      */
1193     private HtmlTitle getTitleElement() {
1194         return (HtmlTitle) getFirstChildElementRecursive(getDocumentElement(), HtmlTitle.class);
1195     }
1196 
1197     /**
1198      * Looks for and executes any appropriate event handlers. Looks for body and frame tags.
1199      * @param eventType either {@link Event#TYPE_LOAD}, {@link Event#TYPE_UNLOAD}, or {@link Event#TYPE_BEFORE_UNLOAD}
1200      * @return {@code true} if user accepted <tt>onbeforeunload</tt> (not relevant to other events)
1201      */
1202     private boolean executeEventHandlersIfNeeded(final String eventType) {
1203         // If JavaScript isn't enabled, there's nothing for us to do.
1204         if (!getWebClient().getOptions().isJavaScriptEnabled()) {
1205             return true;
1206         }
1207 
1208         // Execute the specified event on the document element.
1209         final WebWindow window = getEnclosingWindow();
1210         if (window.getScriptableObject() instanceof Window) {
1211             final DomElement element = getDocumentElement();
1212             if (element == null) { // happens for instance if document.documentElement has been removed from parent
1213                 return true;
1214             }
1215             final Event event;
1216             if (eventType.equals(Event.TYPE_BEFORE_UNLOAD)) {
1217                 event = new BeforeUnloadEvent(element, eventType);
1218             }
1219             else {
1220                 event = new Event(element, eventType);
1221             }
1222             final ScriptResult result = element.fireEvent(event);
1223             if (!isOnbeforeunloadAccepted(this, event, result)) {
1224                 return false;
1225             }
1226         }
1227 
1228         // If this page was loaded in a frame, execute the version of the event specified on the frame tag.
1229         if (window instanceof FrameWindow) {
1230             final FrameWindow fw = (FrameWindow) window;
1231             final BaseFrameElement frame = fw.getFrameElement();
1232 
1233             // if part of a document fragment, then the load event is not triggered
1234             if (Event.TYPE_LOAD.equals(eventType) && frame.getParentNode() instanceof DomDocumentFragment) {
1235                 return true;
1236             }
1237 
1238             if (frame.hasEventHandlers("on" + eventType)) {
1239                 if (LOG.isDebugEnabled()) {
1240                     LOG.debug("Executing on" + eventType + " handler for " + frame);
1241                 }
1242                 if (window.getScriptableObject() instanceof Window) {
1243                     final Event event;
1244                     if (eventType.equals(Event.TYPE_BEFORE_UNLOAD)) {
1245                         event = new BeforeUnloadEvent(frame, eventType);
1246                     }
1247                     else {
1248                         event = new Event(frame, eventType);
1249                     }
1250                     final ScriptResult result = ((Node) frame.getScriptableObject()).executeEventLocally(event);
1251                     if (!isOnbeforeunloadAccepted((HtmlPage) frame.getPage(), event, result)) {
1252                         return false;
1253                     }
1254                 }
1255             }
1256         }
1257 
1258         return true;
1259     }
1260 
1261     /**
1262      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
1263      *
1264      * @return true if the OnbeforeunloadHandler has accepted to change the page
1265      */
1266     public boolean isOnbeforeunloadAccepted() {
1267         return executeEventHandlersIfNeeded(Event.TYPE_BEFORE_UNLOAD);
1268     }
1269 
1270     private boolean isOnbeforeunloadAccepted(final HtmlPage page, final Event event, final ScriptResult result) {
1271         if (event.getType().equals(Event.TYPE_BEFORE_UNLOAD)) {
1272             final boolean ie = hasFeature(JS_CALL_RESULT_IS_LAST_RETURN_VALUE);
1273             final String message = getBeforeUnloadMessage(event, result, ie);
1274             if (message != null) {
1275                 final OnbeforeunloadHandler handler = getWebClient().getOnbeforeunloadHandler();
1276                 if (handler == null) {
1277                     LOG.warn("document.onbeforeunload() returned a string in event.returnValue,"
1278                             + " but no onbeforeunload handler installed.");
1279                 }
1280                 else {
1281                     return handler.handleEvent(page, message);
1282                 }
1283             }
1284         }
1285         return true;
1286     }
1287 
1288     private static String getBeforeUnloadMessage(final Event event, final ScriptResult result, final boolean ie) {
1289         String message = null;
1290         if (event.getReturnValue() != Undefined.instance) {
1291             if (!ie || event.getReturnValue() != null || result == null || result.getJavaScriptResult() == null
1292                     || result.getJavaScriptResult() == Undefined.instance) {
1293                 message = Context.toString(event.getReturnValue());
1294             }
1295         }
1296         else {
1297             if (result != null) {
1298                 if (ie) {
1299                     if (result.getJavaScriptResult() != Undefined.instance) {
1300                         message = Context.toString(result.getJavaScriptResult());
1301                     }
1302                 }
1303                 else if (result.getJavaScriptResult() != null
1304                         && result.getJavaScriptResult() != Undefined.instance) {
1305                     message = Context.toString(result.getJavaScriptResult());
1306                 }
1307             }
1308         }
1309         return message;
1310     }
1311 
1312     /**
1313      * If a refresh has been specified either through a meta tag or an HTTP
1314      * response header, then perform that refresh.
1315      * @throws IOException if an IO problem occurs
1316      */
1317     private void executeRefreshIfNeeded() throws IOException {
1318         // If this page is not in a frame then a refresh has already happened,
1319         // most likely through the JavaScript onload handler, so we don't do a
1320         // second refresh.
1321         final WebWindow window = getEnclosingWindow();
1322         if (window == null) {
1323             return;
1324         }
1325 
1326         final String refreshString = getRefreshStringOrNull();
1327         if (refreshString == null || refreshString.isEmpty()) {
1328             return;
1329         }
1330 
1331         final double time;
1332         final URL url;
1333 
1334         int index = StringUtils.indexOfAnyBut(refreshString, "0123456789");
1335         final boolean timeOnly = index == -1;
1336 
1337         if (timeOnly) {
1338             // Format: <meta http-equiv='refresh' content='10'>
1339             try {
1340                 time = Double.parseDouble(refreshString);
1341             }
1342             catch (final NumberFormatException e) {
1343                 LOG.error("Malformed refresh string (no ';' but not a number): " + refreshString, e);
1344                 return;
1345             }
1346             url = getUrl();
1347         }
1348         else {
1349             // Format: <meta http-equiv='refresh' content='10;url=http://www.blah.com'>
1350             try {
1351                 time = Double.parseDouble(refreshString.substring(0, index).trim());
1352             }
1353             catch (final NumberFormatException e) {
1354                 LOG.error("Malformed refresh string (no valid number before ';') " + refreshString, e);
1355                 return;
1356             }
1357             index = refreshString.toLowerCase(Locale.ROOT).indexOf("url=", index);
1358             if (index == -1) {
1359                 LOG.error("Malformed refresh string (found ';' but no 'url='): " + refreshString);
1360                 return;
1361             }
1362             final StringBuilder builder = new StringBuilder(refreshString.substring(index + 4));
1363             if (StringUtils.isBlank(builder.toString())) {
1364                 //content='10; URL=' is treated as content='10'
1365                 url = getUrl();
1366             }
1367             else {
1368                 if (builder.charAt(0) == '"' || builder.charAt(0) == 0x27) {
1369                     builder.deleteCharAt(0);
1370                 }
1371                 if (builder.charAt(builder.length() - 1) == '"' || builder.charAt(builder.length() - 1) == 0x27) {
1372                     builder.deleteCharAt(builder.length() - 1);
1373                 }
1374                 final String urlString = builder.toString();
1375                 try {
1376                     url = getFullyQualifiedUrl(urlString);
1377                 }
1378                 catch (final MalformedURLException e) {
1379                     LOG.error("Malformed URL in refresh string: " + refreshString, e);
1380                     throw e;
1381                 }
1382             }
1383         }
1384 
1385         final int timeRounded = (int) time;
1386         checkRecursion();
1387         getWebClient().getRefreshHandler().handleRefresh(this, url, timeRounded);
1388     }
1389 
1390     private void checkRecursion() {
1391         final StackTraceElement[] elements = new Exception().getStackTrace();
1392         if (elements.length > 500) {
1393             for (int i = 0; i < 500; i++) {
1394                 if (!elements[i].getClassName().startsWith("com.gargoylesoftware.htmlunit.")) {
1395                     return;
1396                 }
1397             }
1398             final WebResponse webResponse = getWebResponse();
1399             throw new FailingHttpStatusCodeException("Too much redirect for "
1400                     + webResponse.getWebRequest().getUrl(), webResponse);
1401         }
1402     }
1403 
1404     /**
1405      * Returns an auto-refresh string if specified. This will look in both the meta
1406      * tags and inside the HTTP response headers.
1407      * @return the auto-refresh string
1408      */
1409     private String getRefreshStringOrNull() {
1410         final List<HtmlMeta> metaTags = getMetaTags("refresh");
1411         if (!metaTags.isEmpty()) {
1412             return metaTags.get(0).getContentAttribute().trim();
1413         }
1414         return getWebResponse().getResponseHeaderValue("Refresh");
1415     }
1416 
1417     /**
1418      * Executes any deferred scripts, if necessary.
1419      */
1420     private void executeDeferredScriptsIfNeeded() {
1421         if (!getWebClient().getOptions().isJavaScriptEnabled()) {
1422             return;
1423         }
1424         if (hasFeature(JS_DEFERRED)) {
1425             final DomElement doc = getDocumentElement();
1426             final List<HtmlElement> elements = doc.getElementsByTagName("script");
1427             for (final HtmlElement e : elements) {
1428                 if (e instanceof HtmlScript) {
1429                     final HtmlScript script = (HtmlScript) e;
1430                     if (script.isDeferred()) {
1431                         script.executeScriptIfNeeded();
1432                     }
1433                 }
1434             }
1435         }
1436     }
1437 
1438     /**
1439      * Sets the ready state on any deferred scripts, if necessary.
1440      */
1441     private void setReadyStateOnDeferredScriptsIfNeeded() {
1442         if (getWebClient().getOptions().isJavaScriptEnabled() && hasFeature(JS_DEFERRED)) {
1443             final List<HtmlElement> elements = getDocumentElement().getElementsByTagName("script");
1444             for (final HtmlElement e : elements) {
1445                 if (e instanceof HtmlScript) {
1446                     final HtmlScript script = (HtmlScript) e;
1447                     if (script.isDeferred()) {
1448                         script.setAndExecuteReadyState(READY_STATE_COMPLETE);
1449                     }
1450                 }
1451             }
1452         }
1453     }
1454 
1455     /**
1456      * Deregister frames that are no longer in use.
1457      */
1458     public void deregisterFramesIfNeeded() {
1459         for (final WebWindow window : getFrames()) {
1460             getWebClient().deregisterWebWindow(window);
1461             final Page page = window.getEnclosedPage();
1462             if (page != null && page.isHtmlPage()) {
1463                 // seems quite silly, but for instance if the src attribute of an iframe is not
1464                 // set, the error only occurs when leaving the page
1465                 ((HtmlPage) page).deregisterFramesIfNeeded();
1466             }
1467         }
1468     }
1469 
1470     /**
1471      * Returns a list containing all the frames (from frame and iframe tags) in this page.
1472      * @return a list of {@link FrameWindow}
1473      */
1474     public List<FrameWindow> getFrames() {
1475         final List<FrameWindow> list = new ArrayList<>(frameElements_.size());
1476         for (final BaseFrameElement frameElement : frameElements_) {
1477             list.add(frameElement.getEnclosedWindow());
1478         }
1479         return list;
1480     }
1481 
1482     /**
1483      * Returns the first frame contained in this page with the specified name.
1484      * @param name the name to search for
1485      * @return the first frame found
1486      * @exception ElementNotFoundException If no frame exist in this page with the specified name.
1487      */
1488     public FrameWindow getFrameByName(final String name) throws ElementNotFoundException {
1489         for (final FrameWindow frame : getFrames()) {
1490             if (frame.getName().equals(name)) {
1491                 return frame;
1492             }
1493         }
1494 
1495         throw new ElementNotFoundException("frame or iframe", "name", name);
1496     }
1497 
1498     /**
1499      * Simulate pressing an access key. This may change the focus, may click buttons and may invoke
1500      * JavaScript.
1501      *
1502      * @param accessKey the key that will be pressed
1503      * @return the element that has the focus after pressing this access key or null if no element
1504      * has the focus.
1505      * @throws IOException if an IO error occurs during the processing of this access key (this
1506      *         would only happen if the access key triggered a button which in turn caused a page load)
1507      */
1508     public DomElement pressAccessKey(final char accessKey) throws IOException {
1509         final HtmlElement element = getHtmlElementByAccessKey(accessKey);
1510         if (element != null) {
1511             element.focus();
1512             final Page newPage;
1513             if (element instanceof HtmlAnchor || element instanceof HtmlArea || element instanceof HtmlButton
1514                     || element instanceof HtmlInput || element instanceof HtmlLabel || element instanceof HtmlLegend
1515                     || element instanceof HtmlTextArea || element instanceof HtmlArea) {
1516                 newPage = element.click();
1517             }
1518             else {
1519                 newPage = this;
1520             }
1521 
1522             if (newPage != this && getFocusedElement() == element) {
1523                 // The page was reloaded therefore no element on this page will have the focus.
1524                 getFocusedElement().blur();
1525             }
1526         }
1527 
1528         return getFocusedElement();
1529     }
1530 
1531     /**
1532      * Move the focus to the next element in the tab order. To determine the specified tab
1533      * order, refer to {@link HtmlPage#getTabbableElements()}
1534      *
1535      * @return the element that has focus after calling this method
1536      */
1537     public HtmlElement tabToNextElement() {
1538         final List<HtmlElement> elements = getTabbableElements();
1539         if (elements.isEmpty()) {
1540             setFocusedElement(null);
1541             return null;
1542         }
1543 
1544         final HtmlElement elementToGiveFocus;
1545         final DomElement elementWithFocus = getFocusedElement();
1546         if (elementWithFocus == null) {
1547             elementToGiveFocus = elements.get(0);
1548         }
1549         else {
1550             final int index = elements.indexOf(elementWithFocus);
1551             if (index == -1) {
1552                 // The element with focus isn't on this page
1553                 elementToGiveFocus = elements.get(0);
1554             }
1555             else {
1556                 if (index == elements.size() - 1) {
1557                     elementToGiveFocus = elements.get(0);
1558                 }
1559                 else {
1560                     elementToGiveFocus = elements.get(index + 1);
1561                 }
1562             }
1563         }
1564 
1565         setFocusedElement(elementToGiveFocus);
1566         return elementToGiveFocus;
1567     }
1568 
1569     /**
1570      * Move the focus to the previous element in the tab order. To determine the specified tab
1571      * order, refer to {@link HtmlPage#getTabbableElements()}
1572      *
1573      * @return the element that has focus after calling this method
1574      */
1575     public HtmlElement tabToPreviousElement() {
1576         final List<HtmlElement> elements = getTabbableElements();
1577         if (elements.isEmpty()) {
1578             setFocusedElement(null);
1579             return null;
1580         }
1581 
1582         final HtmlElement elementToGiveFocus;
1583         final DomElement elementWithFocus = getFocusedElement();
1584         if (elementWithFocus == null) {
1585             elementToGiveFocus = elements.get(elements.size() - 1);
1586         }
1587         else {
1588             final int index = elements.indexOf(elementWithFocus);
1589             if (index == -1) {
1590                 // The element with focus isn't on this page
1591                 elementToGiveFocus = elements.get(elements.size() - 1);
1592             }
1593             else {
1594                 if (index == 0) {
1595                     elementToGiveFocus = elements.get(elements.size() - 1);
1596                 }
1597                 else {
1598                     elementToGiveFocus = elements.get(index - 1);
1599                 }
1600             }
1601         }
1602 
1603         setFocusedElement(elementToGiveFocus);
1604         return elementToGiveFocus;
1605     }
1606 
1607     /**
1608      * Returns the HTML element with the specified ID. If more than one element
1609      * has this ID (not allowed by the HTML spec), then this method returns the
1610      * first one.
1611      *
1612      * @param elementId the ID value to search for
1613      * @param <E> the element type
1614      * @return the HTML element with the specified ID
1615      * @throws ElementNotFoundException if no element was found matching the specified ID
1616      */
1617     @SuppressWarnings("unchecked")
1618     public <E extends HtmlElement> E getHtmlElementById(final String elementId) throws ElementNotFoundException {
1619         final DomElement element = getElementById(elementId);
1620         if (element == null) {
1621             throw new ElementNotFoundException("*", "id", elementId);
1622         }
1623         return (E) element;
1624     }
1625 
1626     /**
1627      * Returns the elements with the specified ID. If there are no elements
1628      * with the specified ID, this method returns an empty list. Please note that
1629      * the lists returned by this method are immutable.
1630      *
1631      * @param elementId the ID value to search for
1632      * @return the elements with the specified name attribute
1633      */
1634     public List<DomElement> getElementsById(final String elementId) {
1635         final SortedSet<DomElement> elements = idMap_.get(elementId);
1636         if (elements != null) {
1637             return new ArrayList<>(elements);
1638         }
1639         return Collections.emptyList();
1640     }
1641 
1642     /**
1643      * Returns the element with the specified name. If more than one element
1644      * has this name, then this method returns the first one.
1645      *
1646      * @param name the name value to search for
1647      * @param <E> the element type
1648      * @return the element with the specified name
1649      * @throws ElementNotFoundException if no element was found matching the specified name
1650      */
1651     @SuppressWarnings("unchecked")
1652     public <E extends DomElement> E getElementByName(final String name) throws ElementNotFoundException {
1653         final SortedSet<DomElement> elements = nameMap_.get(name);
1654         if (elements != null) {
1655             return (E) elements.first();
1656         }
1657         throw new ElementNotFoundException("*", "name", name);
1658     }
1659 
1660     /**
1661      * Returns the elements with the specified name attribute. If there are no elements
1662      * with the specified name, this method returns an empty list. Please note that
1663      * the lists returned by this method are immutable.
1664      *
1665      * @param name the name value to search for
1666      * @return the elements with the specified name attribute
1667      */
1668     public List<DomElement> getElementsByName(final String name) {
1669         final SortedSet<DomElement> elements = nameMap_.get(name);
1670         if (elements != null) {
1671             return new ArrayList<>(elements);
1672         }
1673         return Collections.emptyList();
1674     }
1675 
1676     /**
1677      * Returns the elements with the specified string for their name or ID. If there are
1678      * no elements with the specified name or ID, this method returns an empty list.
1679      *
1680      * @param idAndOrName the value to search for
1681      * @return the elements with the specified string for their name or ID
1682      */
1683     public List<DomElement> getElementsByIdAndOrName(final String idAndOrName) {
1684         final Collection<DomElement> list1 = idMap_.get(idAndOrName);
1685         final Collection<DomElement> list2 = nameMap_.get(idAndOrName);
1686         final List<DomElement> list = new ArrayList<>();
1687         if (list1 != null) {
1688             list.addAll(list1);
1689         }
1690         if (list2 != null) {
1691             for (final DomElement elt : list2) {
1692                 if (!list.contains(elt)) {
1693                     list.add(elt);
1694                 }
1695             }
1696         }
1697         return list;
1698     }
1699 
1700     /**
1701      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
1702      *
1703      * @param node the node that has just been added to the document
1704      */
1705     void notifyNodeAdded(final DomNode node) {
1706         if (node instanceof DomElement) {
1707             addMappedElement((DomElement) node, true);
1708 
1709             if (node instanceof BaseFrameElement) {
1710                 frameElements_.add((BaseFrameElement) node);
1711             }
1712             for (final HtmlElement child : node.getHtmlElementDescendants()) {
1713                 if (child instanceof BaseFrameElement) {
1714                     frameElements_.add((BaseFrameElement) child);
1715                 }
1716             }
1717 
1718             if ("base".equals(node.getNodeName())) {
1719                 calculateBase();
1720             }
1721         }
1722         node.onAddedToPage();
1723     }
1724 
1725     /**
1726      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
1727      *
1728      * @param node the node that has just been removed from the tree
1729      */
1730     void notifyNodeRemoved(final DomNode node) {
1731         if (node instanceof HtmlElement) {
1732             removeMappedElement((HtmlElement) node, true, true);
1733 
1734             if (node instanceof BaseFrameElement) {
1735                 frameElements_.remove(node);
1736             }
1737             for (final HtmlElement child : node.getHtmlElementDescendants()) {
1738                 if (child instanceof BaseFrameElement) {
1739                     frameElements_.remove(child);
1740                 }
1741             }
1742 
1743             if ("base".equals(node.getNodeName())) {
1744                 calculateBase();
1745             }
1746         }
1747     }
1748 
1749     /**
1750      * Adds an element to the ID and name maps, if necessary.
1751      * @param element the element to be added to the ID and name maps
1752      */
1753     void addMappedElement(final DomElement element) {
1754         addMappedElement(element, false);
1755     }
1756 
1757     /**
1758      * Adds an element to the ID and name maps, if necessary.
1759      * @param element the element to be added to the ID and name maps
1760      * @param recurse indicates if children must be added too
1761      */
1762     void addMappedElement(final DomElement element, final boolean recurse) {
1763         if (isAncestorOf(element)) {
1764             addElement(idMap_, element, "id", recurse);
1765             addElement(nameMap_, element, "name", recurse);
1766         }
1767     }
1768 
1769     private void addElement(final Map<String, SortedSet<DomElement>> map, final DomElement element,
1770             final String attribute, final boolean recurse) {
1771         final String value = getAttributeValue(element, attribute);
1772 
1773         if (DomElement.ATTRIBUTE_NOT_DEFINED != value) {
1774             SortedSet<DomElement> elements = map.get(value);
1775             if (elements == null) {
1776                 elements = new TreeSet<>(documentPositionComparator);
1777                 elements.add(element);
1778                 map.put(value, elements);
1779             }
1780             else if (!elements.contains(element)) {
1781                 elements.add(element);
1782             }
1783         }
1784         if (recurse) {
1785             for (final DomElement child : element.getChildElements()) {
1786                 addElement(map, child, attribute, true);
1787             }
1788         }
1789     }
1790 
1791     private static String getAttributeValue(final DomElement element, final String attribute) {
1792         // first try real attributes
1793         String value = element.getAttribute(attribute);
1794 
1795         if (DomElement.ATTRIBUTE_NOT_DEFINED == value
1796                 && !(element instanceof HtmlApplet)
1797                 && !(element instanceof HtmlObject)) {
1798             // second try are JavaScript attributes
1799             // ...but applets/objects are a bit special so ignore them
1800             final Object o = element.getScriptableObject();
1801             if (o instanceof ScriptableObject) {
1802                 final ScriptableObject scriptObject = (ScriptableObject) o;
1803                 // we have to make sure the scriptObject has a slot for the given attribute.
1804                 // just using get() may use e.g. getWithPreemption().
1805                 if (scriptObject.has(attribute, scriptObject)) {
1806                     final Object jsValue = scriptObject.get(attribute, scriptObject);
1807                     if (jsValue != Scriptable.NOT_FOUND && jsValue instanceof String) {
1808                         value = (String) jsValue;
1809                     }
1810                 }
1811             }
1812         }
1813         return value;
1814     }
1815 
1816     /**
1817      * Removes an element from the ID and name maps, if necessary.
1818      * @param element the element to be removed from the ID and name maps
1819      */
1820     void removeMappedElement(final HtmlElement element) {
1821         removeMappedElement(element, false, false);
1822     }
1823 
1824     /**
1825      * Removes an element and optionally its children from the ID and name maps, if necessary.
1826      * @param element the element to be removed from the ID and name maps
1827      * @param recurse indicates if children must be removed too
1828      * @param descendant indicates of the element was descendant of this HtmlPage, but now its parent might be null
1829      */
1830     void removeMappedElement(final DomElement element, final boolean recurse, final boolean descendant) {
1831         if (descendant || isAncestorOf(element)) {
1832             removeElement(idMap_, element, "id", recurse);
1833             removeElement(nameMap_, element, "name", recurse);
1834         }
1835     }
1836 
1837     private void removeElement(final Map<String, SortedSet<DomElement>> map, final DomElement element,
1838             final String attribute, final boolean recurse) {
1839         final String value = getAttributeValue(element, attribute);
1840 
1841         if (DomElement.ATTRIBUTE_NOT_DEFINED != value) {
1842             final SortedSet<DomElement> elements = map.remove(value);
1843             if (elements != null && (elements.size() != 1 || !elements.contains(element))) {
1844                 elements.remove(element);
1845                 map.put(value, elements);
1846             }
1847         }
1848         if (recurse) {
1849             for (final DomElement child : element.getChildElements()) {
1850                 removeElement(map, child, attribute, true);
1851             }
1852         }
1853     }
1854 
1855     /**
1856      * Indicates if the attribute name indicates that the owning element is mapped.
1857      * @param document the owning document
1858      * @param attributeName the name of the attribute to consider
1859      * @return {@code true} if the owning element should be mapped in its owning page
1860      */
1861     static boolean isMappedElement(final Document document, final String attributeName) {
1862         return document instanceof HtmlPage
1863             && ("name".equals(attributeName) || "id".equals(attributeName));
1864     }
1865 
1866     private void calculateBase() {
1867         final List<HtmlElement> baseElements = getDocumentElement().getElementsByTagName("base");
1868         switch (baseElements.size()) {
1869             case 0:
1870                 base_ = null;
1871                 break;
1872 
1873             case 1:
1874                 base_ = (HtmlBase) baseElements.get(0);
1875                 break;
1876 
1877             default:
1878                 base_ = (HtmlBase) baseElements.get(0);
1879                 notifyIncorrectness("Multiple 'base' detected, only the first is used.");
1880         }
1881     }
1882 
1883     /**
1884      * Loads the content of the contained frames. This is done after the page is completely loaded, to allow script
1885      * contained in the frames to reference elements from the page located after the closing &lt;/frame&gt; tag.
1886      * @throws FailingHttpStatusCodeException if the server returns a failing status code AND the property
1887      *         {@link WebClient#setThrowExceptionOnFailingStatusCode(boolean)} is set to {@code true}
1888      */
1889     void loadFrames() throws FailingHttpStatusCodeException {
1890         for (final FrameWindow w : getFrames()) {
1891             final BaseFrameElement frame = w.getFrameElement();
1892             // test if the frame should really be loaded:
1893             // if a script has already changed its content, it should be skipped
1894             // use == and not equals(...) to identify initial content (versus URL set to "about:blank")
1895             if (frame.getEnclosedWindow() != null
1896                     && WebClient.URL_ABOUT_BLANK == frame.getEnclosedPage().getUrl()
1897                     && !frame.isContentLoaded()) {
1898                 frame.loadInnerPage();
1899             }
1900         }
1901     }
1902 
1903     /**
1904      * Gives a basic representation for debugging purposes.
1905      * @return a basic representation
1906      */
1907     @Override
1908     public String toString() {
1909         final StringBuilder builder = new StringBuilder();
1910         builder.append("HtmlPage(");
1911         builder.append(getUrl());
1912         builder.append(")@");
1913         builder.append(hashCode());
1914         return builder.toString();
1915     }
1916 
1917     /**
1918      * Gets the meta tag for a given {@code http-equiv} value.
1919      * @param httpEquiv the {@code http-equiv} value
1920      * @return a list of {@link HtmlMeta}
1921      */
1922     protected List<HtmlMeta> getMetaTags(final String httpEquiv) {
1923         if (getDocumentElement() == null) {
1924             return Collections.emptyList(); // weird case, for instance if document.documentElement has been removed
1925         }
1926         final String nameLC = httpEquiv.toLowerCase(Locale.ROOT);
1927         final List<HtmlMeta> tags = getDocumentElement().getElementsByTagNameImpl("meta");
1928         final List<HtmlMeta> foundTags = new ArrayList<>();
1929         for (HtmlMeta htmlMeta : tags) {
1930             if (nameLC.equals(htmlMeta.getHttpEquivAttribute().toLowerCase(Locale.ROOT))) {
1931                 foundTags.add(htmlMeta);
1932             }
1933         }
1934         return foundTags;
1935     }
1936 
1937     /**
1938      * Creates a clone of this instance, and clears cached state to be not shared with the original.
1939      *
1940      * @return a clone of this instance
1941      */
1942     @Override
1943     protected HtmlPage clone() {
1944         final HtmlPage result = (HtmlPage) super.clone();
1945         result.elementWithFocus_ = null;
1946 
1947         result.idMap_ = Collections.synchronizedMap(new HashMap<String, SortedSet<DomElement>>());
1948         result.nameMap_ = Collections.synchronizedMap(new HashMap<String, SortedSet<DomElement>>());
1949 
1950         return result;
1951     }
1952 
1953     /**
1954      * {@inheritDoc}
1955      */
1956     @Override
1957     public HtmlPage cloneNode(final boolean deep) {
1958         // we need the ScriptObject clone before cloning the kids.
1959         final HtmlPage result = (HtmlPage) super.cloneNode(false);
1960         final SimpleScriptable jsObjClone = ((SimpleScriptable) getScriptableObject()).clone();
1961         jsObjClone.setDomNode(result);
1962 
1963         // if deep, clone the kids too, and re initialize parts of the clone
1964         if (deep) {
1965             synchronized (lock_) {
1966                 result.attributeListeners_ = null;
1967             }
1968             result.selectionRanges_ = new ArrayList<>(3);
1969             result.afterLoadActions_ = new ArrayList<>();
1970             result.frameElements_ = new TreeSet<>(documentPositionComparator);
1971             for (DomNode child = getFirstChild(); child != null; child = child.getNextSibling()) {
1972                 result.appendChild(child.cloneNode(true));
1973             }
1974         }
1975         return result;
1976     }
1977 
1978     /**
1979      * Adds an HtmlAttributeChangeListener to the listener list.
1980      * The listener is registered for all attributes of all HtmlElements contained in this page.
1981      *
1982      * @param listener the attribute change listener to be added
1983      * @see #removeHtmlAttributeChangeListener(HtmlAttributeChangeListener)
1984      */
1985     public void addHtmlAttributeChangeListener(final HtmlAttributeChangeListener listener) {
1986         WebAssert.notNull("listener", listener);
1987         synchronized (lock_) {
1988             if (attributeListeners_ == null) {
1989                 attributeListeners_ = new LinkedHashSet<>();
1990             }
1991             attributeListeners_.add(listener);
1992         }
1993     }
1994 
1995     /**
1996      * Removes an HtmlAttributeChangeListener from the listener list.
1997      * This method should be used to remove HtmlAttributeChangeListener that were registered
1998      * for all attributes of all HtmlElements contained in this page.
1999      *
2000      * @param listener the attribute change listener to be removed
2001      * @see #addHtmlAttributeChangeListener(HtmlAttributeChangeListener)
2002      */
2003     public void removeHtmlAttributeChangeListener(final HtmlAttributeChangeListener listener) {
2004         WebAssert.notNull("listener", listener);
2005         synchronized (lock_) {
2006             if (attributeListeners_ != null) {
2007                 attributeListeners_.remove(listener);
2008             }
2009         }
2010     }
2011 
2012     /**
2013      * Notifies all registered listeners for the given event to add an attribute.
2014      * @param event the event to fire
2015      */
2016     void fireHtmlAttributeAdded(final HtmlAttributeChangeEvent event) {
2017         final List<HtmlAttributeChangeListener> listeners = safeGetAttributeListeners();
2018         if (listeners != null) {
2019             for (final HtmlAttributeChangeListener listener : listeners) {
2020                 listener.attributeAdded(event);
2021             }
2022         }
2023     }
2024 
2025     /**
2026      * Notifies all registered listeners for the given event to replace an attribute.
2027      * @param event the event to fire
2028      */
2029     void fireHtmlAttributeReplaced(final HtmlAttributeChangeEvent event) {
2030         final List<HtmlAttributeChangeListener> listeners = safeGetAttributeListeners();
2031         if (listeners != null) {
2032             for (final HtmlAttributeChangeListener listener : listeners) {
2033                 listener.attributeReplaced(event);
2034             }
2035         }
2036     }
2037 
2038     /**
2039      * Notifies all registered listeners for the given event to remove an attribute.
2040      * @param event the event to fire
2041      */
2042     void fireHtmlAttributeRemoved(final HtmlAttributeChangeEvent event) {
2043         final List<HtmlAttributeChangeListener> listeners = safeGetAttributeListeners();
2044         if (listeners != null) {
2045             for (final HtmlAttributeChangeListener listener : listeners) {
2046                 listener.attributeRemoved(event);
2047             }
2048         }
2049     }
2050 
2051     private List<HtmlAttributeChangeListener> safeGetAttributeListeners() {
2052         synchronized (lock_) {
2053             if (attributeListeners_ != null) {
2054                 return new ArrayList<>(attributeListeners_);
2055             }
2056             return null;
2057         }
2058     }
2059 
2060     /**
2061      * {@inheritDoc}
2062      */
2063     @Override
2064     protected void checkChildHierarchy(final org.w3c.dom.Node newChild) throws DOMException {
2065         if (newChild instanceof Element) {
2066             if (getDocumentElement() != null) {
2067                 throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR,
2068                     "The Document may only have a single child Element.");
2069             }
2070         }
2071         else if (newChild instanceof DocumentType) {
2072             if (getDoctype() != null) {
2073                 throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR,
2074                     "The Document may only have a single child DocumentType.");
2075             }
2076         }
2077         else if (!(newChild instanceof Comment || newChild instanceof ProcessingInstruction)) {
2078             throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR,
2079                 "The Document may not have a child of this type: " + newChild.getNodeType());
2080         }
2081         super.checkChildHierarchy(newChild);
2082     }
2083 
2084     /**
2085      * Returns {@code true} if an HTML parser is operating on this page, adding content to it.
2086      * @return {@code true} if an HTML parser is operating on this page, adding content to it
2087      */
2088     public boolean isBeingParsed() {
2089         return parserCount_ > 0;
2090     }
2091 
2092     /**
2093      * Called by the HTML parser to let the page know that it has started parsing some content for this page.
2094      */
2095     void registerParsingStart() {
2096         parserCount_++;
2097     }
2098 
2099     /**
2100      * Called by the HTML parser to let the page know that it has finished parsing some content for this page.
2101      */
2102     void registerParsingEnd() {
2103         parserCount_--;
2104     }
2105 
2106     /**
2107      * Returns {@code true} if an HTML parser is parsing a non-inline HTML snippet to add content
2108      * to this page. Non-inline content is content that is parsed for the page, but not in the
2109      * same stream as the page itself -- basically anything other than <tt>document.write()</tt>
2110      * or <tt>document.writeln()</tt>: <tt>innerHTML</tt>, <tt>outerHTML</tt>,
2111      * <tt>document.createElement()</tt>, etc.
2112      *
2113      * @return {@code true} if an HTML parser is parsing a non-inline HTML snippet to add content
2114      *         to this page
2115      */
2116     boolean isParsingHtmlSnippet() {
2117         return snippetParserCount_ > 0;
2118     }
2119 
2120     /**
2121      * Called by the HTML parser to let the page know that it has started parsing a non-inline HTML snippet.
2122      */
2123     void registerSnippetParsingStart() {
2124         snippetParserCount_++;
2125     }
2126 
2127     /**
2128      * Called by the HTML parser to let the page know that it has finished parsing a non-inline HTML snippet.
2129      */
2130     void registerSnippetParsingEnd() {
2131         snippetParserCount_--;
2132     }
2133 
2134     /**
2135      * Returns {@code true} if an HTML parser is parsing an inline HTML snippet to add content
2136      * to this page. Inline content is content inserted into the parser stream dynamically
2137      * while the page is being parsed (i.e. <tt>document.write()</tt> or <tt>document.writeln()</tt>).
2138      *
2139      * @return {@code true} if an HTML parser is parsing an inline HTML snippet to add content
2140      *         to this page
2141      */
2142     boolean isParsingInlineHtmlSnippet() {
2143         return inlineSnippetParserCount_ > 0;
2144     }
2145 
2146     /**
2147      * Called by the HTML parser to let the page know that it has started parsing an inline HTML snippet.
2148      */
2149     void registerInlineSnippetParsingStart() {
2150         inlineSnippetParserCount_++;
2151     }
2152 
2153     /**
2154      * Called by the HTML parser to let the page know that it has finished parsing an inline HTML snippet.
2155      */
2156     void registerInlineSnippetParsingEnd() {
2157         inlineSnippetParserCount_--;
2158     }
2159 
2160     /**
2161      * Refreshes the page by sending the same parameters as previously sent to get this page.
2162      * @return the newly loaded page.
2163      * @throws IOException if an IO problem occurs
2164      */
2165     public Page refresh() throws IOException {
2166         return getWebClient().getPage(getWebResponse().getWebRequest());
2167     }
2168 
2169     /**
2170      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2171      * <p>
2172      * Parses the given string as would it belong to the content being parsed
2173      * at the current parsing position
2174      * </p>
2175      * @param string the HTML code to write in place
2176      */
2177     public void writeInParsedStream(final String string) {
2178         builder_.pushInputString(string);
2179     }
2180 
2181     /**
2182      * Sets the builder to allow page to send content from document.write(ln) calls.
2183      * @param htmlUnitDOMBuilder the builder
2184      */
2185     void setBuilder(final HtmlUnitDOMBuilder htmlUnitDOMBuilder) {
2186         builder_ = htmlUnitDOMBuilder;
2187     }
2188 
2189     /**
2190      * Returns the current builder.
2191      * @return the current builder
2192      */
2193     HtmlUnitDOMBuilder getBuilder() {
2194         return builder_;
2195     }
2196 
2197     /**
2198      * <p>Returns all namespaces defined in the root element of this page.</p>
2199      * <p>The default namespace has a key of an empty string.</p>
2200      * @return all namespaces defined in the root element of this page
2201      */
2202     public Map<String, String> getNamespaces() {
2203         final org.w3c.dom.NamedNodeMap attributes = getDocumentElement().getAttributes();
2204         final Map<String, String> namespaces = new HashMap<>();
2205         for (int i = 0; i < attributes.getLength(); i++) {
2206             final Attr attr = (Attr) attributes.item(i);
2207             String name = attr.getName();
2208             if (name.startsWith("xmlns")) {
2209                 int startPos = 5;
2210                 if (name.length() > 5 && name.charAt(5) == ':') {
2211                     startPos = 6;
2212                 }
2213                 name = name.substring(startPos);
2214                 namespaces.put(name, attr.getValue());
2215             }
2216         }
2217         return namespaces;
2218     }
2219 
2220     /**
2221      * {@inheritDoc}
2222      */
2223     @Override
2224     protected void setDocumentType(final DocumentType type) {
2225         super.setDocumentType(type);
2226     }
2227 
2228     /**
2229      * Saves the current page, with all images, to the specified location.
2230      * The default behavior removes all script elements.
2231      *
2232      * @param file file to write this page into
2233      * @throws IOException If an error occurs
2234      */
2235     public void save(final File file) throws IOException {
2236         new XmlSerializer().save(this, file);
2237     }
2238 
2239     /**
2240      * Returns whether the current page mode is in {@code quirks mode} or in {@code standards mode}.
2241      * @return true for {@code quirks mode}, false for {@code standards mode}
2242      */
2243     public boolean isQuirksMode() {
2244         return "BackCompat".equals(((HTMLDocument) getScriptableObject()).getCompatMode());
2245     }
2246 
2247     /**
2248      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2249      * {@inheritDoc}
2250      */
2251     @Override
2252     public boolean isAttachedToPage() {
2253         return true;
2254     }
2255 
2256     /**
2257      * {@inheritDoc}
2258      */
2259     @Override
2260     public boolean isHtmlPage() {
2261         return true;
2262     }
2263 
2264     /**
2265      * The base URL used to resolve relative URLs.
2266      * @return the base URL
2267      */
2268     public URL getBaseURL() {
2269         URL baseUrl;
2270         if (base_ == null) {
2271             baseUrl = getUrl();
2272             final WebWindow window = getEnclosingWindow();
2273             final boolean frame = window != window.getTopWindow();
2274             if (frame) {
2275                 final boolean frameSrcIsNotSet = baseUrl == WebClient.URL_ABOUT_BLANK;
2276                 final boolean frameSrcIsJs = "javascript".equals(baseUrl.getProtocol());
2277                 if (frameSrcIsNotSet || frameSrcIsJs) {
2278                     baseUrl = ((HtmlPage) window.getTopWindow().getEnclosedPage()).getWebResponse()
2279                         .getWebRequest().getUrl();
2280                 }
2281             }
2282             else if (baseUrl_ != null) {
2283                 baseUrl = baseUrl_;
2284             }
2285         }
2286         else {
2287             final String href = base_.getHrefAttribute().trim();
2288             if (StringUtils.isEmpty(href)) {
2289                 baseUrl = getUrl();
2290             }
2291             else {
2292                 final URL url = getUrl();
2293                 try {
2294                     if (href.startsWith("http://") || href.startsWith("https://")) {
2295                         baseUrl = new URL(href);
2296                     }
2297                     else if (href.startsWith("//")) {
2298                         baseUrl = new URL(String.format("%s:%s", url.getProtocol(), href));
2299                     }
2300                     else if (href.startsWith("/")) {
2301                         final int port = Window.getPort(url);
2302                         baseUrl = new URL(String.format("%s://%s:%d%s", url.getProtocol(), url.getHost(), port, href));
2303                     }
2304                     else if (url.toString().endsWith("/")) {
2305                         baseUrl = new URL(String.format("%s%s", url.toString(), href));
2306                     }
2307                     else {
2308                         baseUrl = new URL(UrlUtils.resolveUrl(url, href));
2309                     }
2310                 }
2311                 catch (final MalformedURLException e) {
2312                     notifyIncorrectness("Invalid base url: \"" + href + "\", ignoring it");
2313                     baseUrl = url;
2314                 }
2315             }
2316         }
2317 
2318         return baseUrl;
2319     }
2320 
2321     /**
2322      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2323      *
2324      * Adds an {@link AutoCloseable}, which would be closed during the {@link #cleanUp()}.
2325      * @param autoCloseable the autoclosable
2326      */
2327     public void addAutoCloseable(final AutoCloseable autoCloseable) {
2328         if (autoCloseableList_ == null) {
2329             autoCloseableList_ = new ArrayList<>();
2330         }
2331         autoCloseableList_.add(autoCloseable);
2332     }
2333 
2334     /**
2335      * {@inheritDoc}
2336      */
2337     @Override
2338     public boolean handles(final Event event) {
2339         if (Event.TYPE_BLUR.equals(event.getType()) || Event.TYPE_FOCUS.equals(event.getType())) {
2340             return true;
2341         }
2342         return super.handles(event);
2343     }
2344 
2345     /**
2346      * Sets the {@link ElementFromPointHandler}.
2347      * @param elementFromPointHandler the handler
2348      */
2349     public void setElementFromPointHandler(final ElementFromPointHandler elementFromPointHandler) {
2350         elementFromPointHandler_ = elementFromPointHandler;
2351     }
2352 
2353     /**
2354      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2355      *
2356      * Returns the element for the specified x coordinate and the specified y coordinate.
2357      *
2358      * @param x the x offset, in pixels
2359      * @param y the y offset, in pixels
2360      * @return the element for the specified x coordinate and the specified y coordinate
2361      */
2362     public HtmlElement getElementFromPoint(final int x, final int y) {
2363         if (elementFromPointHandler_ == null) {
2364             LOG.warn("ElementFromPointHandler was not specicifed for " + this);
2365             if (x <= 0 || y <= 0) {
2366                 return null;
2367             }
2368             return getBody();
2369         }
2370         return elementFromPointHandler_.getElementFromPoint(this, x, y);
2371     }
2372 
2373     /**
2374      * Moves the focus to the specified element. This will trigger any relevant JavaScript
2375      * event handlers.
2376      *
2377      * @param newElement the element that will receive the focus, use {@code null} to remove focus from any element
2378      * @return true if the specified element now has the focus
2379      * @see #getFocusedElement()
2380      */
2381     public boolean setFocusedElement(final DomElement newElement) {
2382         return setFocusedElement(newElement, false);
2383     }
2384 
2385     /**
2386      * Moves the focus to the specified element. This will trigger any relevant JavaScript
2387      * event handlers.
2388      *
2389      * @param newElement the element that will receive the focus, use {@code null} to remove focus from any element
2390      * @param windowActivated - whether the enclosing window got focus resulting in specified element getting focus
2391      * @return true if the specified element now has the focus
2392      * @see #getFocusedElement()
2393      */
2394     public boolean setFocusedElement(final DomElement newElement, final boolean windowActivated) {
2395         if (elementWithFocus_ == newElement && !windowActivated) {
2396             // nothing to do
2397             return true;
2398         }
2399 
2400         final DomElement oldFocusedElement = elementWithFocus_;
2401         elementWithFocus_ = null;
2402 
2403         if (!windowActivated) {
2404             if (hasFeature(EVENT_FOCUS_IN_FOCUS_OUT_BLUR)) {
2405                 if (oldFocusedElement != null) {
2406                     oldFocusedElement.fireEvent(Event.TYPE_FOCUS_OUT);
2407                 }
2408 
2409                 if (newElement != null) {
2410                     newElement.fireEvent(Event.TYPE_FOCUS_IN);
2411                 }
2412             }
2413 
2414             if (oldFocusedElement != null) {
2415                 oldFocusedElement.removeFocus();
2416                 oldFocusedElement.fireEvent(Event.TYPE_BLUR);
2417             }
2418         }
2419 
2420         elementWithFocus_ = newElement;
2421 
2422         if (elementWithFocus_ instanceof SelectableTextInput
2423                 && hasFeature(PAGE_SELECTION_RANGE_FROM_SELECTABLE_TEXT_INPUT)) {
2424             final SelectableTextInput sti = (SelectableTextInput) elementWithFocus_;
2425             setSelectionRange(new SimpleRange(sti, sti.getSelectionStart(), sti, sti.getSelectionEnd()));
2426         }
2427 
2428         if (elementWithFocus_ != null) {
2429             elementWithFocus_.focus();
2430             elementWithFocus_.fireEvent(Event.TYPE_FOCUS);
2431         }
2432 
2433         if (hasFeature(EVENT_FOCUS_FOCUS_IN_BLUR_OUT)) {
2434             if (oldFocusedElement != null) {
2435                 oldFocusedElement.fireEvent(Event.TYPE_FOCUS_OUT);
2436             }
2437 
2438             if (newElement != null) {
2439                 newElement.fireEvent(Event.TYPE_FOCUS_IN);
2440             }
2441         }
2442 
2443         // If a page reload happened as a result of the focus change then obviously this
2444         // element will not have the focus because its page has gone away.
2445         return this == getEnclosingWindow().getEnclosedPage();
2446     }
2447 
2448     /**
2449      * Returns the element with the focus or null if no element has the focus.
2450      * @return the element with focus or null
2451      * @see #setFocusedElement(DomElement)
2452      */
2453     public DomElement getFocusedElement() {
2454         return elementWithFocus_;
2455     }
2456 
2457     /**
2458      * <p><span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span></p>
2459      *
2460      * Sets the element with focus.
2461      * @param elementWithFocus the element with focus
2462      */
2463     public void setElementWithFocus(final DomElement elementWithFocus) {
2464         elementWithFocus_ = elementWithFocus;
2465     }
2466 
2467     /**
2468      * <p><span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span></p>
2469      *
2470      * <p>Returns the page's current selection ranges. Note that some browsers, like IE, only allow
2471      * a single selection at a time.</p>
2472      *
2473      * @return the page's current selection ranges
2474      */
2475     public List<Range> getSelectionRanges() {
2476         return selectionRanges_;
2477     }
2478 
2479     /**
2480      * <p><span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span></p>
2481      *
2482      * <p>Makes the specified selection range the *only* selection range on this page.</p>
2483      *
2484      * @param selectionRange the selection range
2485      */
2486     public void setSelectionRange(final Range selectionRange) {
2487         selectionRanges_.clear();
2488         selectionRanges_.add(selectionRange);
2489     }
2490 
2491     /**
2492      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2493      *
2494      * Execute a Function in the given context.
2495      *
2496      * @param function the JavaScript Function to call
2497      * @param thisObject the "this" object to be used during invocation
2498      * @param args the arguments to pass into the call
2499      * @param htmlElementScope the HTML element for which this script is being executed
2500      *        This element will be the context during the JavaScript execution. If null,
2501      *        the context will default to the page.
2502      * @return a ScriptResult which will contain both the current page (which may be different than
2503      *        the previous page and a JavaScript result object.
2504      */
2505     public ScriptResult executeJavaScriptFunction(final Object function, final Object thisObject,
2506             final Object[] args, final DomNode htmlElementScope) {
2507         if (!getWebClient().getOptions().isJavaScriptEnabled()) {
2508             return new ScriptResult(null, this);
2509         }
2510 
2511         return executeJavaScriptFunction((Function) function, (Scriptable) thisObject, args, htmlElementScope);
2512     }
2513 
2514     private ScriptResult executeJavaScriptFunction(final Function function, final Scriptable thisObject,
2515             final Object[] args, final DomNode htmlElementScope) {
2516 
2517         final JavaScriptEngine engine = (JavaScriptEngine) getWebClient().getJavaScriptEngine();
2518         final Object result = engine.callFunction(this, function, thisObject, args, htmlElementScope);
2519 
2520         return new ScriptResult(result, getWebClient().getCurrentWindow().getEnclosedPage());
2521     }
2522 
2523     private void writeObject(final ObjectOutputStream oos) throws IOException {
2524         oos.defaultWriteObject();
2525         oos.writeObject(originalCharset_ == null ? null : originalCharset_.name());
2526     }
2527 
2528     private void readObject(final ObjectInputStream ois) throws ClassNotFoundException, IOException {
2529         ois.defaultReadObject();
2530         final String charsetName = (String) ois.readObject();
2531         if (charsetName != null) {
2532             originalCharset_ = Charset.forName(charsetName);
2533         }
2534     }
2535 }