time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

suggestSimilar.js (2765B)


      1 const maxDistance = 3;
      2 
      3 function editDistance(a, b) {
      4   // https://en.wikipedia.org/wiki/Damerau–Levenshtein_distance
      5   // Calculating optimal string alignment distance, no substring is edited more than once.
      6   // (Simple implementation.)
      7 
      8   // Quick early exit, return worst case.
      9   if (Math.abs(a.length - b.length) > maxDistance) return Math.max(a.length, b.length);
     10 
     11   // distance between prefix substrings of a and b
     12   const d = [];
     13 
     14   // pure deletions turn a into empty string
     15   for (let i = 0; i <= a.length; i++) {
     16     d[i] = [i];
     17   }
     18   // pure insertions turn empty string into b
     19   for (let j = 0; j <= b.length; j++) {
     20     d[0][j] = j;
     21   }
     22 
     23   // fill matrix
     24   for (let j = 1; j <= b.length; j++) {
     25     for (let i = 1; i <= a.length; i++) {
     26       let cost = 1;
     27       if (a[i - 1] === b[j - 1]) {
     28         cost = 0;
     29       } else {
     30         cost = 1;
     31       }
     32       d[i][j] = Math.min(
     33         d[i - 1][j] + 1, // deletion
     34         d[i][j - 1] + 1, // insertion
     35         d[i - 1][j - 1] + cost // substitution
     36       );
     37       // transposition
     38       if (i > 1 && j > 1 && a[i - 1] === b[j - 2] && a[i - 2] === b[j - 1]) {
     39         d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + 1);
     40       }
     41     }
     42   }
     43 
     44   return d[a.length][b.length];
     45 }
     46 
     47 /**
     48  * Find close matches, restricted to same number of edits.
     49  *
     50  * @param {string} word
     51  * @param {string[]} candidates
     52  * @returns {string}
     53  */
     54 
     55 function suggestSimilar(word, candidates) {
     56   if (!candidates || candidates.length === 0) return '';
     57   // remove possible duplicates
     58   candidates = Array.from(new Set(candidates));
     59 
     60   const searchingOptions = word.startsWith('--');
     61   if (searchingOptions) {
     62     word = word.slice(2);
     63     candidates = candidates.map(candidate => candidate.slice(2));
     64   }
     65 
     66   let similar = [];
     67   let bestDistance = maxDistance;
     68   const minSimilarity = 0.4;
     69   candidates.forEach((candidate) => {
     70     if (candidate.length <= 1) return; // no one character guesses
     71 
     72     const distance = editDistance(word, candidate);
     73     const length = Math.max(word.length, candidate.length);
     74     const similarity = (length - distance) / length;
     75     if (similarity > minSimilarity) {
     76       if (distance < bestDistance) {
     77         // better edit distance, throw away previous worse matches
     78         bestDistance = distance;
     79         similar = [candidate];
     80       } else if (distance === bestDistance) {
     81         similar.push(candidate);
     82       }
     83     }
     84   });
     85 
     86   similar.sort((a, b) => a.localeCompare(b));
     87   if (searchingOptions) {
     88     similar = similar.map(candidate => `--${candidate}`);
     89   }
     90 
     91   if (similar.length > 1) {
     92     return `\n(Did you mean one of ${similar.join(', ')}?)`;
     93   }
     94   if (similar.length === 1) {
     95     return `\n(Did you mean ${similar[0]}?)`;
     96   }
     97   return '';
     98 }
     99 
    100 exports.suggestSimilar = suggestSimilar;