src/main/java/org/starrier/common/utils/SensitiveWordUtil.java
package org.starrier.common.utils; import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.Map;import java.util.Set;import java.util.stream.IntStream; import static org.starrier.common.constant.Constant.IS_END;import static org.starrier.common.constant.Constant.MAX_MATCH_TYPE;import static org.starrier.common.constant.Constant.MIN_MATCH_T_YPE;import static org.starrier.common.constant.Constant.ONE;import static org.starrier.common.constant.Constant.ZERO; /** * @author Starrier * @date 2019/4/10. * <p> * Description : */public class SensitiveWordUtil { /** * 敏感词集合 */ public static Map sensitiveWordMap; private SensitiveWordUtil() { init(null); } public static Map getSensitiveWordMap() { return sensitiveWordMap; } public static void setSensitiveWordMap(Map sensitiveWordMap) { SensitiveWordUtil.sensitiveWordMap = sensitiveWordMap; } /** * 初始化敏感词库,构建DFA算法模型 * * @param sensitiveWordSet 敏感词库 */ public static synchronized void init(Set<String> sensitiveWordSet) { sensitiveWordSet = new HashSet<>(7); sensitiveWordSet.add("太多"); sensitiveWordSet.add("爱恋"); sensitiveWordSet.add("静静"); sensitiveWordSet.add("哈哈"); sensitiveWordSet.add("啦啦"); sensitiveWordSet.add("感动"); sensitiveWordSet.add("发呆"); initSensitiveWordMap(sensitiveWordSet); } /** * 初始化敏感词库,构建 DFA 算法模型 * * @param sensitiveWordSet 敏感词库 */Method `initSensitiveWordMap` has a Cognitive Complexity of 10 (exceeds 5 allowed). Consider refactoring. @SuppressWarnings({"rawtypes", "unchecked"}) private static void initSensitiveWordMap(Set<String> sensitiveWordSet) { //初始化敏感词容器,减少扩容操作 sensitiveWordMap = new HashMap<>(sensitiveWordSet.size()); String key; Map nowMap; Map newWorMap; //迭代sensitiveWordSet for (String s : sensitiveWordSet) { //关键字 key = s; nowMap = sensitiveWordMap; for (int i = 0; i < key.length(); i++) { //转换成char型 char keyChar = key.charAt(i); //库中获取关键字 Object wordMap = nowMap.get(keyChar); //如果存在该key,直接赋值,用于下一个循环获取 if (wordMap != null) { nowMap = (Map) wordMap; } else { //不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个 newWorMap = new HashMap<>(sensitiveWordSet.size()); //不是最后一个 newWorMap.put(IS_END, ZERO); nowMap.put(keyChar, newWorMap); nowMap = newWorMap; } if (i == key.length() - 1) { //最后一个 nowMap.put(IS_END, ONE); } } } } /** * 判断文字是否包含敏感字符 * * @param txt 文字 * @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则 * @return 若包含返回true,否则返回false */ private static boolean contains(String txt, int matchType) { boolean flag = false; for (int i = 0; i < txt.length(); i++) { if (checkSensitiveWord(txt, i, matchType) > 0) { flag = true; } } return flag; } /** * 判断文字是否包含敏感字符 * * @param txt 文字 * @return 若包含返回true,否则返回false */ public static boolean contains(String txt) { return contains(txt, MAX_MATCH_TYPE); } /** * 获取文字中的敏感词 * * @param txt 文字 * @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则 * @return Set<String> */ private static Set<String> getSensitiveWord(String txt, int matchType) { Set<String> sensitiveWordList = new HashSet<>(txt.length()); IntStream.range(0, txt.length()).forEach(i -> { int length = checkSensitiveWord(txt, i, matchType); //存在,加入list中 if (length > 0) { sensitiveWordList.add(txt.substring(i, i + length)); //减1的原因,是因为for会自增 i = i + length - 1; } }); return sensitiveWordList; } /** * 获取文字中的敏感词 * * @param txt content. * @return return. */ public static Set<String> getSensitiveWord(String txt) { return getSensitiveWord(txt, MAX_MATCH_TYPE); } /** * 替换敏感字字符 * * @param txt 文本 * @param replaceChar 替换的字符,匹配的敏感词以字符逐个替换, * 如 语句:我爱中国人 敏感词:中国人,替换字符:*, 替换结果:我爱*** * @param matchType 敏感词匹配规则 * @return return */ private static String replaceSensitiveWord(String txt, char replaceChar, int matchType) { String resultTxt = txt; //获取所有的敏感词 Set<String> set = getSensitiveWord(txt, matchType); Iterator<String> iterator = set.iterator(); String word; String replaceString; while (iterator.hasNext()) { word = iterator.next(); replaceString = getReplaceChars(replaceChar, word.length()); resultTxt = resultTxt.replaceAll(word, replaceString); } return resultTxt; } /** * 替换敏感字字符 * * @param txt 文本 * @param replaceChar 替换的字符,匹配的敏感词以字符逐个替换, * 如 语句:我爱中国人 敏感词:中国人,替换字符:*, 替换结果:我爱*** * @return result come from {@see SensitiveWordUtil#replaceSensitiveWord(String, char)} */ public static String replaceSensitiveWord(String txt, char replaceChar) { return replaceSensitiveWord(txt, replaceChar, MAX_MATCH_TYPE); } /** * 替换敏感字字符 * * @param txt 文本 * @param replaceStr 替换的字符串,匹配的敏感词以字符逐个替换, * 如 语句:我爱中国人 敏感词:中国人,替换字符串:[屏蔽],替换结果:我爱[屏蔽] * @param matchType 敏感词匹配规则 * @return return */ private static String replaceSensitiveWord(String txt, String replaceStr, int matchType) { String resultTxt = txt; //获取所有的敏感词 Set<String> set = getSensitiveWord(txt, matchType); Iterator<String> iterator = set.iterator(); String word; while (iterator.hasNext()) { word = iterator.next(); resultTxt = resultTxt.replaceAll(word, replaceStr); } return resultTxt; } /** * 替换敏感字字符 * * @param txt 文本 * @param replaceStr 替换的字符串,匹配的敏感词以字符逐个替换 * 如 语句:我爱中国人 敏感词:中国人,替换字符串:[屏蔽],替换结果:我爱[屏蔽] * @return return. */ public static String replaceSensitiveWord(String txt, String replaceStr) { return replaceSensitiveWord(txt, replaceStr, MAX_MATCH_TYPE); } /** * 获取替换字符串 * * @param replaceChar replace char. * @param length length. * @return return. */ private static String getReplaceChars(char replaceChar, int length) { //In JDK 11+ /*return String.valueOf(replaceChar) + String.valueOf( ).repeat(Math.max(0, length - 1));*/ /** * In JDK 1.8 */ return null; } /** * 检查文字中是否包含敏感字符,检查规则如下:<br> * * @param txt content. * @param beginIndex begin start index. * @param matchType match type. * @return 如果存在,则返回敏感词字符的长度,不存在返回0 */Method `checkSensitiveWord` has a Cognitive Complexity of 13 (exceeds 5 allowed). Consider refactoring. private static int checkSensitiveWord(String txt, int beginIndex, int matchType) { //敏感词结束标识位:用于敏感词只有1位的情况 boolean flag = false; //匹配标识数默认为0 int matchFlag = 0; char word; Map nowMap = sensitiveWordMap; for (int i = beginIndex; i < txt.length(); i++) { word = txt.charAt(i); //获取指定key nowMap = (Map) nowMap.get(word); //存在,则判断是否为最后一个 if (nowMap != null) { //找到相应key,匹配标识+1 matchFlag++; //如果为最后一个匹配规则,结束循环,返回匹配标识数 if ("1".equals(nowMap.get("isEnd"))) { //结束标志位为true flag = true; //最小规则,直接返回,最大规则还需继续查找 if (MIN_MATCH_T_YPE == matchType) { break; } } } else { //不存在,直接返回 break; } } //长度必须大于等于1,为词 if (matchFlag < 2 || !flag) { matchFlag = 0; } return matchFlag; }}