优化版编辑距离算法
java"> public static int minDistance(String str, String str1) {int len = str.length(), len1 = str1.length();// 预处理char[] arr1 = str.toCharArray();char[] arr2 = str1.toCharArray();// 动态规划数组int[] dp = new int[len1 + 1];for (int j = 0; j <= len1; j++) dp[j] = j;for (int i = 1; i <= len; i++) {int prevDiagonal = dp[0];dp[0] = i;char c1 = arr1[i-1];for (int j = 1; j <= len1; j++) {int cost = (c1 == arr2[j-1]) ? 0 : 1;int newVal = dp[j] + 1;newVal = newVal < dp[j-1] + 1 ? newVal : dp[j-1] + 1;newVal = newVal < prevDiagonal + cost ? newVal : prevDiagonal + cost;prevDiagonal = dp[j];dp[j] = newVal;}}return dp[len1];}
ES自定义评分脚本
java">## 库名称:zhCNenUS-基建化工-油气-木木有限公司23456crm-QM、zhCNenUS-汽车-汽车-木木有限公司23456crm-QM
POST /tm/_search
{"query": {"bool": {"filter": [{ "terms": { "dbId": ["101476","110316"] }},{ "match": { "original": {"query": "姓名:上云测试用户01","minimum_should_match": "69%"}}}],"must": [{ "function_score": {"functions": [{ "script_score": {"script": {"source": """// 定义相似度计算函数(放于脚本顶部,str1是请求参数)double calculate(String str, String str1) {// 快速返回条件if (str == null || str1 == null || str.isEmpty() || str1.isEmpty()) return 10.0;int len = str.length(), len1 = str1.length();int minLen = len < len1 ? len : len1;int maxLen = len < len1 ? len1 : len;double ratio = (double)minLen / maxLen;if(ratio < 0.7) {return 30;}String strLower = str.toLowerCase();String str1Lower = str1.toLowerCase();if(str.equals(str1)){return 100;}if(strLower.equals(str1Lower)){return 90;}// 预处理char[] arr1 = strLower.toCharArray();char[] arr2 = str1Lower.toCharArray();// 动态规划数组int[] dp = new int[len1 + 1];for (int j = 0; j <= len1; j++) dp[j] = j;for (int i = 1; i <= len; i++) {int prevDiagonal = dp[0];dp[0] = i;char c1 = arr1[i-1];for (int j = 1; j <= len1; j++) {int cost = (c1 == arr2[j-1]) ? 0 : 1;// 取最小值int newVal = dp[j] + 1;newVal = newVal < dp[j-1] + 1 ? newVal : dp[j-1] + 1;newVal = newVal < prevDiagonal + cost ? newVal : prevDiagonal + cost;prevDiagonal = dp[j];dp[j] = newVal;}}return 80.0 * (1.0 / dp[len1]);}String es1 = doc['original.keyword'].value;String es2 = doc['translation.keyword'].value;String str1 = params.val1;String str2 = params.val2;// 计算纯文本的评分double textRatio = calculate(es1, str1);// 计算标签格式的评分double tagRatio = calculate(es2, str2);// 如果原文没有标签,语料有标签,则需要扣分if(str2.length()==0 && es2.length()!=0) {tagRatio=-tagRatio;}// 根据情况调整权重return textRatio + 0.1 * tagRatio;""","params": {"val1": "姓名:上云测试用户01","val2": "<1>4<2/>4</1>"}}}}],"boost_mode": "replace"}}]}}
}