1 /*
   2  * Copyright (c) 2014 Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 package org.openjdk.bench.javax.xml;
  24 
  25 import org.openjdk.jmh.annotations.Benchmark;
  26 import org.w3c.dom.Document;
  27 import org.w3c.dom.Element;
  28 import org.w3c.dom.Node;
  29 import org.xml.sax.InputSource;
  30 import org.xml.sax.SAXException;
  31 
  32 import javax.xml.parsers.DocumentBuilder;
  33 import javax.xml.parsers.DocumentBuilderFactory;
  34 import javax.xml.parsers.ParserConfigurationException;
  35 import java.io.ByteArrayInputStream;
  36 import java.io.IOException;
  37 
  38 public class DOM extends AbstractXMLMicro {
  39 
  40     @Benchmark
  41     public Document testBuild() throws Exception {
  42         DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
  43         byte[] bytes = getFileBytesFromResource(doc);
  44         InputSource source = new InputSource();
  45         ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
  46 
  47         source.setByteStream(bais);
  48         return buildDocument(dbf, source);
  49     }
  50 
  51     @Benchmark
  52     public Document testModify() throws Exception {
  53         DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
  54         byte[] bytes = getFileBytesFromResource(doc);
  55         ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
  56         InputSource source = new InputSource(bais);
  57         Document doc1 = buildDocument(dbf, source);
  58 
  59         modifyElementRecursive(doc1.getDocumentElement());
  60         return doc1;
  61     }
  62 
  63     @Benchmark
  64     public Document testWalk() throws Exception {
  65         DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
  66         byte[] bytes = getFileBytesFromResource(doc);
  67         ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
  68         InputSource source = new InputSource(bais);
  69         Document doc1 = buildDocument(dbf, source);
  70 
  71         walkElementRecursive(doc1.getDocumentElement());
  72         return doc1;
  73     }
  74 
  75     private Document buildDocument(DocumentBuilderFactory dbf, InputSource source)
  76             throws ParserConfigurationException, SAXException, IOException {
  77         dbf.setValidating(false);
  78         dbf.setNamespaceAware(true);
  79         DocumentBuilder docBuilder = dbf.newDocumentBuilder();
  80         return docBuilder.parse(source);
  81     }
  82 
  83     // TODO Fix so it isn't recursive?
  84     private static void walkElementRecursive(Element element) {
  85         // loop through children
  86         if (element.hasChildNodes()) {
  87             Node child = element.getFirstChild();
  88             while (child != null) {
  89 
  90                 // handle child by type
  91                 int type = child.getNodeType();
  92                 if (type == Node.ELEMENT_NODE) {
  93                     walkElementRecursive((Element) child);
  94                 }
  95                 child = child.getNextSibling();
  96             }
  97         }
  98     }
  99 
 100     // TODO Fix so it isn't recursive?
 101     private void modifyElementRecursive(Element element) {
 102 
 103         // check for children present
 104         if (element.hasChildNodes()) {
 105 
 106             // loop through child nodes
 107             boolean content = false;
 108 
 109             // Should not be null since we already have made a .hasChildNodes()
 110             // check.
 111             Node child = element.getFirstChild();
 112 
 113             do {
 114                 // Handle child by node type.
 115                 if (child.getNodeType() == Node.TEXT_NODE) {
 116                     String trimmed = child.getNodeValue().trim();
 117                     if (trimmed.length() == 0) {
 118                         // delete child if nothing but whitespace
 119                         element.removeChild(child);
 120                     } else {
 121                         // make sure we have the parent element information
 122                         content = true;
 123                         Document doc = element.getOwnerDocument();
 124                         String uri = element.getNamespaceURI();
 125                         String prefix = element.getPrefix();
 126                         content = true;
 127 
 128                         // Create a "text" element matching parent namespace.
 129                         Element text = (uri == null) ? doc.createElement("text") : doc.createElementNS(uri, prefix
 130                                 + ":text");
 131 
 132                         // wrap the trimmed content with new element
 133                         text.appendChild(doc.createTextNode(trimmed));
 134                         element.replaceChild(text, child);
 135 
 136                     }
 137                 } else if (child.getNodeType() == Node.ELEMENT_NODE) {
 138                     modifyElementRecursive((Element) child);
 139                 }
 140 
 141             } while ((child = child.getNextSibling()) != null);
 142 
 143             // check if we've seen any non-whitespace content for element
 144             if (content) {
 145                 String prefix = element.getPrefix();
 146                 String uri = element.getNamespaceURI();
 147                 // add attribute flagging content found
 148                 if (prefix == null || prefix.length() == 0) {
 149                     element.setAttribute("text", "true");
 150                 } else {
 151                     element.setAttributeNS(uri, prefix + ":text", "true");
 152                 }
 153 
 154             }
 155         }
 156     }
 157 
 158 }