1 /*
   2  * Copyright (c) 2000, 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package javax.print;
  27 
  28 import java.io.Serializable;
  29 
  30 import java.util.AbstractMap;
  31 import java.util.AbstractSet;
  32 import java.util.Iterator;
  33 import java.util.Map;
  34 import java.util.NoSuchElementException;
  35 import java.util.Set;
  36 import java.util.Vector;
  37 
  38 /**
  39  * Class MimeType encapsulates a Multipurpose Internet Mail Extensions (MIME)
  40  * media type as defined in <A HREF="http://www.ietf.org/rfc/rfc2045.txt">RFC
  41  * 2045</A> and <A HREF="http://www.ietf.org/rfc/rfc2046.txt">RFC 2046</A>. A
  42  * MIME type object is part of a {@link DocFlavor DocFlavor} object and
  43  * specifies the format of the print data.
  44  * <P>
  45  * Class MimeType is similar to the like-named
  46  * class in package {@link java.awt.datatransfer java.awt.datatransfer}. Class
  47  * java.awt.datatransfer.MimeType is not used in the Jini Print Service API
  48  * for two reasons:
  49  * <OL TYPE=1>
  50  * <LI>
  51  * Since not all Java profiles include the AWT, the Jini Print Service should
  52  * not depend on an AWT class.
  53  * <P>
  54  * <LI>
  55  * The implementation of class java.awt.datatransfer.MimeType does not
  56  * guarantee
  57  * that equivalent MIME types will have the same serialized representation.
  58  * Thus, since the Jini Lookup Service (JLUS) matches service attributes based
  59  * on equality of serialized representations, JLUS searches involving MIME
  60  * types encapsulated in class java.awt.datatransfer.MimeType may incorrectly
  61  * fail to match.
  62  * </OL>
  63  * <P>
  64  * Class MimeType's serialized representation is based on the following
  65  * canonical form of a MIME type string. Thus, two MIME types that are not
  66  * identical but that are equivalent (that have the same canonical form) will
  67  * be considered equal by the JLUS's matching algorithm.
  68  * <UL>
  69  * <LI> The media type, media subtype, and parameters are retained, but all
  70  *      comments and whitespace characters are discarded.
  71  * <LI> The media type, media subtype, and parameter names are converted to
  72  *      lowercase.
  73  * <LI> The parameter values retain their original case, except a charset
  74  *      parameter value for a text media type is converted to lowercase.
  75  * <LI> Quote characters surrounding parameter values are removed.
  76  * <LI> Quoting backslash characters inside parameter values are removed.
  77  * <LI> The parameters are arranged in ascending order of parameter name.
  78  * </UL>
  79  * <P>
  80  *
  81  * @author  Alan Kaminsky
  82  */
  83 class MimeType implements Serializable, Cloneable {
  84 
  85     private static final long serialVersionUID = -2785720609362367683L;
  86 
  87     /**
  88      * Array of strings that hold pieces of this MIME type's canonical form.
  89      * If the MIME type has <I>n</I> parameters, <I>n</I> &gt;= 0, then the
  90      * strings in the array are:
  91      * <BR>Index 0 -- Media type.
  92      * <BR>Index 1 -- Media subtype.
  93      * <BR>Index 2<I>i</I>+2 -- Name of parameter <I>i</I>,
  94      * <I>i</I>=0,1,...,<I>n</I>-1.
  95      * <BR>Index 2<I>i</I>+3 -- Value of parameter <I>i</I>,
  96      * <I>i</I>=0,1,...,<I>n</I>-1.
  97      * <BR>Parameters are arranged in ascending order of parameter name.
  98      * @serial
  99      */
 100     private String[] myPieces;
 101 
 102     /**
 103      * String value for this MIME type. Computed when needed and cached.
 104      */
 105     private transient String myStringValue = null;
 106 
 107     /**
 108      * Parameter map entry set. Computed when needed and cached.
 109      */
 110     private transient ParameterMapEntrySet myEntrySet = null;
 111 
 112     /**
 113      * Parameter map. Computed when needed and cached.
 114      */
 115     private transient ParameterMap myParameterMap = null;
 116 
 117     /**
 118      * Parameter map entry.
 119      */
 120     private class ParameterMapEntry implements Map.Entry {
 121         private int myIndex;
 122         public ParameterMapEntry(int theIndex) {
 123             myIndex = theIndex;
 124         }
 125         public Object getKey(){
 126             return myPieces[myIndex];
 127         }
 128         public Object getValue(){
 129             return myPieces[myIndex+1];
 130         }
 131         public Object setValue (Object value) {
 132             throw new UnsupportedOperationException();
 133         }
 134         public boolean equals(Object o) {
 135             return (o != null &&
 136                     o instanceof Map.Entry &&
 137                     getKey().equals (((Map.Entry) o).getKey()) &&
 138                     getValue().equals(((Map.Entry) o).getValue()));
 139         }
 140         public int hashCode() {
 141             return getKey().hashCode() ^ getValue().hashCode();
 142         }
 143     }
 144 
 145     /**
 146      * Parameter map entry set iterator.
 147      */
 148     private class ParameterMapEntrySetIterator implements Iterator {
 149         private int myIndex = 2;
 150         public boolean hasNext() {
 151             return myIndex < myPieces.length;
 152         }
 153         public Object next() {
 154             if (hasNext()) {
 155                 ParameterMapEntry result = new ParameterMapEntry (myIndex);
 156                 myIndex += 2;
 157                 return result;
 158             } else {
 159                 throw new NoSuchElementException();
 160             }
 161         }
 162         public void remove() {
 163             throw new UnsupportedOperationException();
 164         }
 165     }
 166 
 167     /**
 168      * Parameter map entry set.
 169      */
 170     private class ParameterMapEntrySet extends AbstractSet {
 171         public Iterator iterator() {
 172             return new ParameterMapEntrySetIterator();
 173         }
 174         public int size() {
 175             return (myPieces.length - 2) / 2;
 176         }
 177     }
 178 
 179     /**
 180      * Parameter map.
 181      */
 182     private class ParameterMap extends AbstractMap {
 183         public Set entrySet() {
 184             if (myEntrySet == null) {
 185                 myEntrySet = new ParameterMapEntrySet();
 186             }
 187             return myEntrySet;
 188         }
 189     }
 190 
 191     /**
 192      * Construct a new MIME type object from the given string. The given
 193      * string is converted into canonical form and stored internally.
 194      *
 195      * @param  s  MIME media type string.
 196      *
 197      * @exception  NullPointerException
 198      *     (unchecked exception) Thrown if <CODE>s</CODE> is null.
 199      * @exception  IllegalArgumentException
 200      *     (unchecked exception) Thrown if <CODE>s</CODE> does not obey the
 201      *     syntax for a MIME media type string.
 202      */
 203     public MimeType(String s) {
 204         parse (s);
 205     }
 206 
 207     /**
 208      * Returns this MIME type object's MIME type string based on the canonical
 209      * form. Each parameter value is enclosed in quotes.
 210      */
 211     public String getMimeType() {
 212         return getStringValue();
 213     }
 214 
 215     /**
 216      * Returns this MIME type object's media type.
 217      */
 218     public String getMediaType() {
 219         return myPieces[0];
 220     }
 221 
 222     /**
 223      * Returns this MIME type object's media subtype.
 224      */
 225     public String getMediaSubtype() {
 226         return myPieces[1];
 227     }
 228 
 229     /**
 230      * Returns an unmodifiable map view of the parameters in this MIME type
 231      * object. Each entry in the parameter map view consists of a parameter
 232      * name String (key) mapping to a parameter value String. If this MIME
 233      * type object has no parameters, an empty map is returned.
 234      *
 235      * @return  Parameter map for this MIME type object.
 236      */
 237     public Map getParameterMap() {
 238         if (myParameterMap == null) {
 239             myParameterMap = new ParameterMap();
 240         }
 241         return myParameterMap;
 242     }
 243 
 244     /**
 245      * Converts this MIME type object to a string.
 246      *
 247      * @return  MIME type string based on the canonical form. Each parameter
 248      *          value is enclosed in quotes.
 249      */
 250     public String toString() {
 251         return getStringValue();
 252     }
 253 
 254     /**
 255      * Returns a hash code for this MIME type object.
 256      */
 257     public int hashCode() {
 258         return getStringValue().hashCode();
 259     }
 260 
 261     /**
 262      * Determine if this MIME type object is equal to the given object. The two
 263      * are equal if the given object is not null, is an instance of class
 264      * net.jini.print.data.MimeType, and has the same canonical form as this
 265      * MIME type object (that is, has the same type, subtype, and parameters).
 266      * Thus, if two MIME type objects are the same except for comments, they are
 267      * considered equal. However, "text/plain" and "text/plain;
 268      * charset=us-ascii" are not considered equal, even though they represent
 269      * the same media type (because the default character set for plain text is
 270      * US-ASCII).
 271      *
 272      * @param  obj  Object to test.
 273      *
 274      * @return  True if this MIME type object equals <CODE>obj</CODE>, false
 275      *          otherwise.
 276      */
 277     public boolean equals (Object obj) {
 278         return(obj != null &&
 279                obj instanceof MimeType &&
 280                getStringValue().equals(((MimeType) obj).getStringValue()));
 281     }
 282 
 283     /**
 284      * Returns this MIME type's string value in canonical form.
 285      */
 286     private String getStringValue() {
 287         if (myStringValue == null) {
 288             StringBuffer result = new StringBuffer();
 289             result.append (myPieces[0]);
 290             result.append ('/');
 291             result.append (myPieces[1]);
 292             int n = myPieces.length;
 293             for (int i = 2; i < n; i += 2) {
 294                 result.append(';');
 295                 result.append(' ');
 296                 result.append(myPieces[i]);
 297                 result.append('=');
 298                 result.append(addQuotes (myPieces[i+1]));
 299             }
 300             myStringValue = result.toString();
 301         }
 302         return myStringValue;
 303     }
 304 
 305 // Hidden classes, constants, and operations for parsing a MIME media type
 306 // string.
 307 
 308     // Lexeme types.
 309     private static final int TOKEN_LEXEME         = 0;
 310     private static final int QUOTED_STRING_LEXEME = 1;
 311     private static final int TSPECIAL_LEXEME      = 2;
 312     private static final int EOF_LEXEME           = 3;
 313     private static final int ILLEGAL_LEXEME       = 4;
 314 
 315     // Class for a lexical analyzer.
 316     private static class LexicalAnalyzer {
 317         protected String mySource;
 318         protected int mySourceLength;
 319         protected int myCurrentIndex;
 320         protected int myLexemeType;
 321         protected int myLexemeBeginIndex;
 322         protected int myLexemeEndIndex;
 323 
 324         public LexicalAnalyzer(String theSource) {
 325             mySource = theSource;
 326             mySourceLength = theSource.length();
 327             myCurrentIndex = 0;
 328             nextLexeme();
 329         }
 330 
 331         public int getLexemeType() {
 332             return myLexemeType;
 333         }
 334 
 335         public String getLexeme() {
 336             return(myLexemeBeginIndex >= mySourceLength ?
 337                    null :
 338                    mySource.substring(myLexemeBeginIndex, myLexemeEndIndex));
 339         }
 340 
 341         public char getLexemeFirstCharacter() {
 342             return(myLexemeBeginIndex >= mySourceLength ?
 343                    '\u0000' :
 344                    mySource.charAt(myLexemeBeginIndex));
 345         }
 346 
 347         public void nextLexeme() {
 348             int state = 0;
 349             int commentLevel = 0;
 350             char c;
 351             while (state >= 0) {
 352                 switch (state) {
 353                     // Looking for a token, quoted string, or tspecial
 354                 case 0:
 355                     if (myCurrentIndex >= mySourceLength) {
 356                         myLexemeType = EOF_LEXEME;
 357                         myLexemeBeginIndex = mySourceLength;
 358                         myLexemeEndIndex = mySourceLength;
 359                         state = -1;
 360                     } else if (Character.isWhitespace
 361                                (c = mySource.charAt (myCurrentIndex ++))) {
 362                         state = 0;
 363                     } else if (c == '\"') {
 364                         myLexemeType = QUOTED_STRING_LEXEME;
 365                         myLexemeBeginIndex = myCurrentIndex;
 366                         state = 1;
 367                     } else if (c == '(') {
 368                         ++ commentLevel;
 369                         state = 3;
 370                     } else if (c == '/'  || c == ';' || c == '=' ||
 371                                c == ')'  || c == '<' || c == '>' ||
 372                                c == '@'  || c == ',' || c == ':' ||
 373                                c == '\\' || c == '[' || c == ']' ||
 374                                c == '?') {
 375                         myLexemeType = TSPECIAL_LEXEME;
 376                         myLexemeBeginIndex = myCurrentIndex - 1;
 377                         myLexemeEndIndex = myCurrentIndex;
 378                         state = -1;
 379                     } else {
 380                         myLexemeType = TOKEN_LEXEME;
 381                         myLexemeBeginIndex = myCurrentIndex - 1;
 382                         state = 5;
 383                     }
 384                     break;
 385                     // In a quoted string
 386                 case 1:
 387                     if (myCurrentIndex >= mySourceLength) {
 388                         myLexemeType = ILLEGAL_LEXEME;
 389                         myLexemeBeginIndex = mySourceLength;
 390                         myLexemeEndIndex = mySourceLength;
 391                         state = -1;
 392                     } else if ((c = mySource.charAt (myCurrentIndex ++)) == '\"') {
 393                         myLexemeEndIndex = myCurrentIndex - 1;
 394                         state = -1;
 395                     } else if (c == '\\') {
 396                         state = 2;
 397                     } else {
 398                         state = 1;
 399                     }
 400                     break;
 401                     // In a quoted string, backslash seen
 402                 case 2:
 403                     if (myCurrentIndex >= mySourceLength) {
 404                         myLexemeType = ILLEGAL_LEXEME;
 405                         myLexemeBeginIndex = mySourceLength;
 406                         myLexemeEndIndex = mySourceLength;
 407                         state = -1;
 408                     } else {
 409                         ++ myCurrentIndex;
 410                         state = 1;
 411                     } break;
 412                     // In a comment
 413                 case 3: if (myCurrentIndex >= mySourceLength) {
 414                     myLexemeType = ILLEGAL_LEXEME;
 415                     myLexemeBeginIndex = mySourceLength;
 416                     myLexemeEndIndex = mySourceLength;
 417                     state = -1;
 418                 } else if ((c = mySource.charAt (myCurrentIndex ++)) == '(') {
 419                     ++ commentLevel;
 420                     state = 3;
 421                 } else if (c == ')') {
 422                     -- commentLevel;
 423                     state = commentLevel == 0 ? 0 : 3;
 424                 } else if (c == '\\') {
 425                     state = 4;
 426                 } else { state = 3;
 427                 }
 428                 break;
 429                 // In a comment, backslash seen
 430                 case 4:
 431                     if (myCurrentIndex >= mySourceLength) {
 432                         myLexemeType = ILLEGAL_LEXEME;
 433                         myLexemeBeginIndex = mySourceLength;
 434                         myLexemeEndIndex = mySourceLength;
 435                         state = -1;
 436                     } else {
 437                         ++ myCurrentIndex;
 438                         state = 3;
 439                     }
 440                     break;
 441                     // In a token
 442                 case 5:
 443                     if (myCurrentIndex >= mySourceLength) {
 444                         myLexemeEndIndex = myCurrentIndex;
 445                         state = -1;
 446                     } else if (Character.isWhitespace
 447                                (c = mySource.charAt (myCurrentIndex ++))) {
 448                         myLexemeEndIndex = myCurrentIndex - 1;
 449                         state = -1;
 450                     } else if (c == '\"' || c == '(' || c == '/' ||
 451                                c == ';'  || c == '=' || c == ')' ||
 452                                c == '<' || c == '>'  || c == '@' ||
 453                                c == ',' || c == ':' || c == '\\' ||
 454                                c == '[' || c == ']' || c == '?') {
 455                         -- myCurrentIndex;
 456                         myLexemeEndIndex = myCurrentIndex;
 457                         state = -1;
 458                     } else {
 459                         state = 5;
 460                     }
 461                     break;
 462                 }
 463             }
 464 
 465         }
 466 
 467     }
 468 
 469     /**
 470      * Returns a lowercase version of the given string. The lowercase version
 471      * is constructed by applying Character.toLowerCase() to each character of
 472      * the given string, which maps characters to lowercase using the rules of
 473      * Unicode. This mapping is the same regardless of locale, whereas the
 474      * mapping of String.toLowerCase() may be different depending on the
 475      * default locale.
 476      */
 477     private static String toUnicodeLowerCase(String s) {
 478         int n = s.length();
 479         char[] result = new char [n];
 480         for (int i = 0; i < n; ++ i) {
 481             result[i] = Character.toLowerCase (s.charAt (i));
 482         }
 483         return new String (result);
 484     }
 485 
 486     /**
 487      * Returns a version of the given string with backslashes removed.
 488      */
 489     private static String removeBackslashes(String s) {
 490         int n = s.length();
 491         char[] result = new char [n];
 492         int i;
 493         int j = 0;
 494         char c;
 495         for (i = 0; i < n; ++ i) {
 496             c = s.charAt (i);
 497             if (c == '\\') {
 498                 c = s.charAt (++ i);
 499             }
 500             result[j++] = c;
 501         }
 502         return new String (result, 0, j);
 503     }
 504 
 505     /**
 506      * Returns a version of the string surrounded by quotes and with interior
 507      * quotes preceded by a backslash.
 508      */
 509     private static String addQuotes(String s) {
 510         int n = s.length();
 511         int i;
 512         char c;
 513         StringBuffer result = new StringBuffer (n+2);
 514         result.append ('\"');
 515         for (i = 0; i < n; ++ i) {
 516             c = s.charAt (i);
 517             if (c == '\"') {
 518                 result.append ('\\');
 519             }
 520             result.append (c);
 521         }
 522         result.append ('\"');
 523         return result.toString();
 524     }
 525 
 526     /**
 527      * Parses the given string into canonical pieces and stores the pieces in
 528      * {@link #myPieces <CODE>myPieces</CODE>}.
 529      * <P>
 530      * Special rules applied:
 531      * <UL>
 532      * <LI> If the media type is text, the value of a charset parameter is
 533      *      converted to lowercase.
 534      * </UL>
 535      *
 536      * @param  s  MIME media type string.
 537      *
 538      * @exception  NullPointerException
 539      *     (unchecked exception) Thrown if <CODE>s</CODE> is null.
 540      * @exception  IllegalArgumentException
 541      *     (unchecked exception) Thrown if <CODE>s</CODE> does not obey the
 542      *     syntax for a MIME media type string.
 543      */
 544     private void parse(String s) {
 545         // Initialize.
 546         if (s == null) {
 547             throw new NullPointerException();
 548         }
 549         LexicalAnalyzer theLexer = new LexicalAnalyzer (s);
 550         int theLexemeType;
 551         Vector thePieces = new Vector();
 552         boolean mediaTypeIsText = false;
 553         boolean parameterNameIsCharset = false;
 554 
 555         // Parse media type.
 556         if (theLexer.getLexemeType() == TOKEN_LEXEME) {
 557             String mt = toUnicodeLowerCase (theLexer.getLexeme());
 558             thePieces.add (mt);
 559             theLexer.nextLexeme();
 560             mediaTypeIsText = mt.equals ("text");
 561         } else {
 562             throw new IllegalArgumentException();
 563         }
 564         // Parse slash.
 565         if (theLexer.getLexemeType() == TSPECIAL_LEXEME &&
 566               theLexer.getLexemeFirstCharacter() == '/') {
 567             theLexer.nextLexeme();
 568         } else {
 569             throw new IllegalArgumentException();
 570         }
 571         if (theLexer.getLexemeType() == TOKEN_LEXEME) {
 572             thePieces.add (toUnicodeLowerCase (theLexer.getLexeme()));
 573             theLexer.nextLexeme();
 574         } else {
 575             throw new IllegalArgumentException();
 576         }
 577         // Parse zero or more parameters.
 578         while (theLexer.getLexemeType() == TSPECIAL_LEXEME &&
 579                theLexer.getLexemeFirstCharacter() == ';') {
 580             // Parse semicolon.
 581             theLexer.nextLexeme();
 582 
 583             // Parse parameter name.
 584             if (theLexer.getLexemeType() == TOKEN_LEXEME) {
 585                 String pn = toUnicodeLowerCase (theLexer.getLexeme());
 586                 thePieces.add (pn);
 587                 theLexer.nextLexeme();
 588                 parameterNameIsCharset = pn.equals ("charset");
 589             } else {
 590                 throw new IllegalArgumentException();
 591             }
 592 
 593             // Parse equals.
 594             if (theLexer.getLexemeType() == TSPECIAL_LEXEME &&
 595                 theLexer.getLexemeFirstCharacter() == '=') {
 596                 theLexer.nextLexeme();
 597             } else {
 598                 throw new IllegalArgumentException();
 599             }
 600 
 601             // Parse parameter value.
 602             if (theLexer.getLexemeType() == TOKEN_LEXEME) {
 603                 String pv = theLexer.getLexeme();
 604                 thePieces.add(mediaTypeIsText && parameterNameIsCharset ?
 605                               toUnicodeLowerCase (pv) :
 606                               pv);
 607                 theLexer.nextLexeme();
 608             } else if (theLexer.getLexemeType() == QUOTED_STRING_LEXEME) {
 609                 String pv = removeBackslashes (theLexer.getLexeme());
 610                 thePieces.add(mediaTypeIsText && parameterNameIsCharset ?
 611                               toUnicodeLowerCase (pv) :
 612                               pv);
 613                 theLexer.nextLexeme();
 614             } else {
 615                 throw new IllegalArgumentException();
 616             }
 617         }
 618 
 619         // Make sure we've consumed everything.
 620         if (theLexer.getLexemeType() != EOF_LEXEME) {
 621             throw new IllegalArgumentException();
 622         }
 623 
 624         // Save the pieces. Parameters are not in ascending order yet.
 625         int n = thePieces.size();
 626         myPieces = (String[]) thePieces.toArray (new String [n]);
 627 
 628         // Sort the parameters into ascending order using an insertion sort.
 629         int i, j;
 630         String temp;
 631         for (i = 4; i < n; i += 2) {
 632             j = 2;
 633             while (j < i && myPieces[j].compareTo (myPieces[i]) <= 0) {
 634                 j += 2;
 635             }
 636             while (j < i) {
 637                 temp = myPieces[j];
 638                 myPieces[j] = myPieces[i];
 639                 myPieces[i] = temp;
 640                 temp = myPieces[j+1];
 641                 myPieces[j+1] = myPieces[i+1];
 642                 myPieces[i+1] = temp;
 643                 j += 2;
 644             }
 645         }
 646     }
 647 }