1 /*
   2  * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.bind.v2.util;
  27 
  28 import java.util.Collection;
  29 import java.util.Arrays;
  30 
  31 /**
  32  * Computes the string edit distance.
  33  *
  34  * <p>
  35  * Refer to a computer science text book for the definition
  36  * of the "string edit distance".
  37  *
  38  * @author
  39  *     Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
  40  */
  41 public class EditDistance {
  42 
  43     /**
  44      * Computes the edit distance between two strings.
  45      *
  46      * <p>
  47      * The complexity is O(nm) where n=a.length() and m=b.length().
  48      */
  49     public static int editDistance( String a, String b ) {
  50         return new EditDistance(a,b).calc();
  51     }
  52 
  53     /**
  54      * Finds the string in the <code>group</code> closest to
  55      * <code>key</code> and returns it.
  56      *
  57      * @return null if group.length==0.
  58      */
  59     public static String findNearest( String key, String[] group ) {
  60         return findNearest(key, Arrays.asList(group));
  61     }
  62 
  63     /**
  64      * Finds the string in the <code>group</code> closest to
  65      * <code>key</code> and returns it.
  66      *
  67      * @return null if group.length==0.
  68      */
  69     public static String findNearest( String key, Collection<String> group ) {
  70         int c = Integer.MAX_VALUE;
  71         String r = null;
  72 
  73         for (String s : group) {
  74             int ed = editDistance(key,s);
  75             if( c>ed ) {
  76                 c = ed;
  77                 r = s;
  78             }
  79         }
  80         return r;
  81     }
  82 
  83     /** cost vector. */
  84     private int[] cost;
  85     /** back buffer. */
  86     private int[] back;
  87 
  88     /** Two strings to be compared. */
  89     private final String a,b;
  90 
  91     private EditDistance( String a, String b ) {
  92         this.a=a;
  93         this.b=b;
  94         cost = new int[a.length()+1];
  95         back = new int[a.length()+1]; // back buffer
  96 
  97         for( int i=0; i<=a.length(); i++ )
  98             cost[i] = i;
  99     }
 100 
 101     /**
 102      * Swaps two buffers.
 103      */
 104     private void flip() {
 105         int[] t = cost;
 106         cost = back;
 107         back = t;
 108     }
 109 
 110     private int min(int a,int b,int c) {
 111         return Math.min(a,Math.min(b,c));
 112     }
 113 
 114     private int calc() {
 115         for( int j=0; j<b.length(); j++ ) {
 116             flip();
 117             cost[0] = j+1;
 118             for( int i=0; i<a.length(); i++ ) {
 119                 int match = (a.charAt(i)==b.charAt(j))?0:1;
 120                 cost[i+1] = min( back[i]+match, cost[i]+1, back[i+1]+1 );
 121             }
 122         }
 123         return cost[a.length()];
 124     }
 125 }