Changeset 1450


Ignore:
Timestamp:
May 11, 2012, 9:58:22 PM (6 years ago)
Author:
joerg
Message:

longest common subsequence was not a good measure for similarity, replaced it by longest common substring

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/jlatexeditor/bib/BibAssistant.java

    r1449 r1450  
    313313
    314314  /**
    315    * Longest common subsequence: http://introcs.cs.princeton.edu/java/96optimization/.
     315   * Longest common substring: http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_substring
    316316   */
    317317  private int lcs(String x, String y) {
     318    if (x == null || y == null || x.length() == 0 || y.length() == 0) {
     319      return 0;
     320    }
     321
     322    int maxLen = 0;
    318323    int xlength = x.length();
    319324    int ylength = y.length();
    320 
    321     int[][] opt = new int[xlength+1][ylength+1];
    322 
    323     for (int i = xlength-1; i >= 0; i--) {
    324       for (int j = ylength-1; j >= 0; j--) {
    325         if (x.charAt(i) == y.charAt(j))
    326           opt[i][j] = opt[i+1][j+1] + 1;
    327         else
    328           opt[i][j] = Math.max(opt[i+1][j], opt[i][j+1]);
    329       }
    330     }
    331 
    332     int length = 0;
    333     int i = 0, j = 0;
    334     while(i < xlength && j < ylength) {
    335       if (x.charAt(i) == y.charAt(j)) {
    336         length++;
    337         i++;
    338         j++;
    339       }
    340       else if (opt[i+1][j] >= opt[i][j+1]) i++;
    341       else                                 j++;
    342     }
    343 
    344     return length;
     325    int[][] table = new int[xlength][ylength];
     326
     327    for (int i = 0; i < xlength; i++) {
     328      for (int j = 0; j < ylength; j++) {
     329        if (x.charAt(i) == y.charAt(j)) {
     330          if (i == 0 || j == 0) {
     331            table[i][j] = 1;
     332          }
     333          else {
     334            table[i][j] = table[i - 1][j - 1] + 1;
     335          }
     336          if (table[i][j] > maxLen) {
     337            maxLen = table[i][j];
     338          }
     339        }
     340      }
     341    }
     342
     343    return maxLen;
    345344  }
    346345
Note: See TracChangeset for help on using the changeset viewer.