1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xalan.internal.lib;
  22 
  23 import java.util.StringTokenizer;
  24 import com.sun.org.apache.xpath.internal.NodeSet;
  25 import jdk.xml.internal.JdkXmlUtils;
  26 
  27 import org.w3c.dom.Document;
  28 import org.w3c.dom.Element;
  29 import org.w3c.dom.Node;
  30 import org.w3c.dom.NodeList;
  31 import org.w3c.dom.Text;
  32 
  33 /**
  34  * This class contains EXSLT strings extension functions.
  35  *
  36  * It is accessed by specifying a namespace URI as follows:
  37  * <pre>
  38  *    xmlns:str="http://exslt.org/strings"
  39  * </pre>
  40  * The documentation for each function has been copied from the relevant
  41  * EXSLT Implementer page.
  42  *
  43  * @see <a href="http://www.exslt.org/">EXSLT</a>
  44 
  45  * @xsl.usage general
  46  */
  47 public class ExsltStrings extends ExsltBase
  48 {
  49 
  50   /**
  51    * The str:align function aligns a string within another string.
  52    * <p>
  53    * The first argument gives the target string to be aligned. The second argument gives
  54    * the padding string within which it is to be aligned.
  55    * <p>
  56    * If the target string is shorter than the padding string then a range of characters
  57    * in the padding string are repaced with those in the target string. Which characters
  58    * are replaced depends on the value of the third argument, which gives the type of
  59    * alignment. It can be one of 'left', 'right' or 'center'. If no third argument is
  60    * given or if it is not one of these values, then it defaults to left alignment.
  61    * <p>
  62    * With left alignment, the range of characters replaced by the target string begins
  63    * with the first character in the padding string. With right alignment, the range of
  64    * characters replaced by the target string ends with the last character in the padding
  65    * string. With center alignment, the range of characters replaced by the target string
  66    * is in the middle of the padding string, such that either the number of unreplaced
  67    * characters on either side of the range is the same or there is one less on the left
  68    * than there is on the right.
  69    * <p>
  70    * If the target string is longer than the padding string, then it is truncated to be
  71    * the same length as the padding string and returned.
  72    *
  73    * @param targetStr The target string
  74    * @param paddingStr The padding string
  75    * @param type The type of alignment
  76    *
  77    * @return The string after alignment
  78    */
  79   public static String align(String targetStr, String paddingStr, String type)
  80   {
  81     if (targetStr.length() >= paddingStr.length())
  82       return targetStr.substring(0, paddingStr.length());
  83 
  84     if (type.equals("right"))
  85     {
  86       return paddingStr.substring(0, paddingStr.length() - targetStr.length()) + targetStr;
  87     }
  88     else if (type.equals("center"))
  89     {
  90       int startIndex = (paddingStr.length() - targetStr.length()) / 2;
  91       return paddingStr.substring(0, startIndex) + targetStr + paddingStr.substring(startIndex + targetStr.length());
  92     }
  93     // Default is left
  94     else
  95     {
  96       return targetStr + paddingStr.substring(targetStr.length());
  97     }
  98   }
  99 
 100   /**
 101    * See above
 102    */
 103   public static String align(String targetStr, String paddingStr)
 104   {
 105     return align(targetStr, paddingStr, "left");
 106   }
 107 
 108   /**
 109    * The str:concat function takes a node set and returns the concatenation of the
 110    * string values of the nodes in that node set. If the node set is empty, it returns
 111    * an empty string.
 112    *
 113    * @param nl A node set
 114    * @return The concatenation of the string values of the nodes in that node set
 115    */
 116   public static String concat(NodeList nl)
 117   {
 118     StringBuffer sb = new StringBuffer();
 119     for (int i = 0; i < nl.getLength(); i++)
 120     {
 121       Node node = nl.item(i);
 122       String value = toString(node);
 123 
 124       if (value != null && value.length() > 0)
 125         sb.append(value);
 126     }
 127 
 128     return sb.toString();
 129   }
 130 
 131   /**
 132    * The str:padding function creates a padding string of a certain length.
 133    * The first argument gives the length of the padding string to be created.
 134    * The second argument gives a string to be used to create the padding. This
 135    * string is repeated as many times as is necessary to create a string of the
 136    * length specified by the first argument; if the string is more than a character
 137    * long, it may have to be truncated to produce the required length. If no second
 138    * argument is specified, it defaults to a space (' '). If the second argument is
 139    * an empty string, str:padding returns an empty string.
 140    *
 141    * @param length The length of the padding string to be created
 142    * @param pattern The string to be used as pattern
 143    *
 144    * @return A padding string of the given length
 145    */
 146   public static String padding(double length, String pattern)
 147   {
 148     if (pattern == null || pattern.length() == 0)
 149       return "";
 150 
 151     StringBuffer sb = new StringBuffer();
 152     int len = (int)length;
 153     int numAdded = 0;
 154     int index = 0;
 155     while (numAdded < len)
 156     {
 157       if (index == pattern.length())
 158         index = 0;
 159 
 160       sb.append(pattern.charAt(index));
 161       index++;
 162       numAdded++;
 163     }
 164 
 165     return sb.toString();
 166   }
 167 
 168   /**
 169    * See above
 170    */
 171   public static String padding(double length)
 172   {
 173     return padding(length, " ");
 174   }
 175 
 176   /**
 177    * The str:split function splits up a string and returns a node set of token
 178    * elements, each containing one token from the string.
 179    * <p>
 180    * The first argument is the string to be split. The second argument is a pattern
 181    * string. The string given by the first argument is split at any occurrence of
 182    * this pattern. For example:
 183    * <pre>
 184    * str:split('a, simple, list', ', ') gives the node set consisting of:
 185    *
 186    * <token>a</token>
 187    * <token>simple</token>
 188    * <token>list</token>
 189    * </pre>
 190    * If the second argument is omitted, the default is the string ' ' (i.e. a space).
 191    *
 192    * @param str The string to be split
 193    * @param pattern The pattern
 194    *
 195    * @return A node set of split tokens
 196    */
 197   public static NodeList split(String str, String pattern)
 198   {
 199 
 200 
 201     NodeSet resultSet = new NodeSet();
 202     resultSet.setShouldCacheNodes(true);
 203 
 204     boolean done = false;
 205     int fromIndex = 0;
 206     int matchIndex = 0;
 207     String token = null;
 208 
 209     while (!done && fromIndex < str.length())
 210     {
 211       matchIndex = str.indexOf(pattern, fromIndex);
 212       if (matchIndex >= 0)
 213       {
 214         token = str.substring(fromIndex, matchIndex);
 215         fromIndex = matchIndex + pattern.length();
 216       }
 217       else
 218       {
 219         done = true;
 220         token = str.substring(fromIndex);
 221       }
 222 
 223       Document doc = JdkXmlUtils.getDOMDocument();
 224       synchronized (doc)
 225       {
 226         Element element = doc.createElement("token");
 227         Text text = doc.createTextNode(token);
 228         element.appendChild(text);
 229         resultSet.addNode(element);
 230       }
 231     }
 232 
 233     return resultSet;
 234   }
 235 
 236   /**
 237    * See above
 238    */
 239   public static NodeList split(String str)
 240   {
 241     return split(str, " ");
 242   }
 243 
 244   /**
 245    * The str:tokenize function splits up a string and returns a node set of token
 246    * elements, each containing one token from the string.
 247    * <p>
 248    * The first argument is the string to be tokenized. The second argument is a
 249    * string consisting of a number of characters. Each character in this string is
 250    * taken as a delimiting character. The string given by the first argument is split
 251    * at any occurrence of any of these characters. For example:
 252    * <pre>
 253    * str:tokenize('2001-06-03T11:40:23', '-T:') gives the node set consisting of:
 254    *
 255    * <token>2001</token>
 256    * <token>06</token>
 257    * <token>03</token>
 258    * <token>11</token>
 259    * <token>40</token>
 260    * <token>23</token>
 261    * </pre>
 262    * If the second argument is omitted, the default is the string '&#x9;&#xA;&#xD; '
 263    * (i.e. whitespace characters).
 264    * <p>
 265    * If the second argument is an empty string, the function returns a set of token
 266    * elements, each of which holds a single character.
 267    * <p>
 268    * Note: This one is different from the tokenize extension function in the Xalan
 269    * namespace. The one in Xalan returns a set of Text nodes, while this one wraps
 270    * the Text nodes inside the token Element nodes.
 271    *
 272    * @param toTokenize The string to be tokenized
 273    * @param delims The delimiter string
 274    *
 275    * @return A node set of split token elements
 276    */
 277   public static NodeList tokenize(String toTokenize, String delims)
 278   {
 279 
 280 
 281     NodeSet resultSet = new NodeSet();
 282 
 283     if (delims != null && delims.length() > 0)
 284     {
 285       StringTokenizer lTokenizer = new StringTokenizer(toTokenize, delims);
 286 
 287       Document doc = JdkXmlUtils.getDOMDocument();
 288       synchronized (doc)
 289       {
 290         while (lTokenizer.hasMoreTokens())
 291         {
 292           Element element = doc.createElement("token");
 293           element.appendChild(doc.createTextNode(lTokenizer.nextToken()));
 294           resultSet.addNode(element);
 295         }
 296       }
 297     }
 298     // If the delimiter is an empty string, create one token Element for
 299     // every single character.
 300     else
 301     {
 302 
 303       Document doc = JdkXmlUtils.getDOMDocument();
 304       synchronized (doc)
 305       {
 306         for (int i = 0; i < toTokenize.length(); i++)
 307         {
 308           Element element = doc.createElement("token");
 309           element.appendChild(doc.createTextNode(toTokenize.substring(i, i+1)));
 310           resultSet.addNode(element);
 311         }
 312       }
 313     }
 314 
 315     return resultSet;
 316   }
 317 
 318   /**
 319    * See above
 320    */
 321   public static NodeList tokenize(String toTokenize)
 322   {
 323     return tokenize(toTokenize, " \t\n\r");
 324   }
 325 }