diff --git a/dl_admin/ruoyi-admin/pom.xml b/dl_admin/ruoyi-admin/pom.xml index cbfa14f..b4ab22d 100644 --- a/dl_admin/ruoyi-admin/pom.xml +++ b/dl_admin/ruoyi-admin/pom.xml @@ -139,6 +139,11 @@ ip2region 2.7.0 + + org.jsoup + jsoup + 1.11.3 + diff --git a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/controller/BusiNewController.java b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/controller/BusiNewController.java index ae7a4ef..c83dc4c 100644 --- a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/controller/BusiNewController.java +++ b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/controller/BusiNewController.java @@ -7,6 +7,8 @@ import com.ruoyi.busi.domain.BusiProdNew; import com.ruoyi.busi.domain.BusiProdRandom; import com.ruoyi.busi.service.IBusiProdNewService; import com.ruoyi.busi.service.IBusiProdRandomService; +import com.ruoyi.busi.utils.SimHash; +import com.ruoyi.busi.utils.TextPreprocessor; import com.ruoyi.busi.vo.ProdNewVO; import com.ruoyi.busi.vo.ProdRandomVO; import com.ruoyi.common.annotation.Log; @@ -110,6 +112,7 @@ public class BusiNewController extends BaseController @PostMapping public AjaxResult add(@RequestBody ProdNewVO prodNewVO){ prodNewVO.setDataType(DATA_TYPE_NEWS); + prodNewVO.setSimHash(SimHash.compute(TextPreprocessor.combineFields(prodNewVO.getTitle(),prodNewVO.getDescription(),prodNewVO.getContent()))); busiProdNewService.save(prodNewVO); if(null!=prodNewVO.getFileList() && !prodNewVO.getFileList().isEmpty()){ prodNewVO.getFileList().forEach(item->{ @@ -130,6 +133,7 @@ public class BusiNewController extends BaseController @PutMapping public AjaxResult edit(@RequestBody ProdNewVO prodNewVO){ prodNewVO.setDataType(DATA_TYPE_NEWS); + prodNewVO.setSimHash(SimHash.compute(TextPreprocessor.combineFields(prodNewVO.getTitle(),prodNewVO.getDescription(),prodNewVO.getContent()))); busiProdNewService.updateById(prodNewVO); if(null!=prodNewVO.getFileList() && !prodNewVO.getFileList().isEmpty()){ prodNewVO.getFileList().forEach(item->{ @@ -187,4 +191,13 @@ public class BusiNewController extends BaseController prodRandomService.delProdRandom(randomVO.getProdId(),randomVO.getRandomId(),randomVO.getTenantId()); return success(); } + + /** + * 相似度检测 + */ + @PostMapping("/checkContent") + public AjaxResult checkContent(@RequestBody ProdNewVO prodNewVO){ + prodNewVO.setDataType(DATA_TYPE_NEWS); + return success(busiProdNewService.checkContent(prodNewVO)); + } } diff --git a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/controller/BusiProdController.java b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/controller/BusiProdController.java index 1167ab3..46b6039 100644 --- a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/controller/BusiProdController.java +++ b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/controller/BusiProdController.java @@ -4,13 +4,18 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import javax.servlet.http.HttpServletResponse; + +import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.ruoyi.base.service.IBasePicsService; import com.ruoyi.busi.domain.BusiProdRandom; import com.ruoyi.busi.service.IBusiProdRandomService; +import com.ruoyi.busi.utils.SimHash; +import com.ruoyi.busi.utils.TextPreprocessor; import com.ruoyi.busi.vo.ProdNewVO; import com.ruoyi.busi.vo.ProdRandomVO; +import com.ruoyi.common.utils.StringUtils; import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.*; @@ -118,6 +123,7 @@ public class BusiProdController extends BaseController @PostMapping public AjaxResult add(@RequestBody ProdNewVO prodNewVO){ prodNewVO.setDataType(DATA_TYPE_PRODUCT); + prodNewVO.setSimHash(SimHash.compute(TextPreprocessor.combineFields(prodNewVO.getTitle(),prodNewVO.getDescription(),prodNewVO.getContent()))); busiProdNewService.save(prodNewVO); if(null!=prodNewVO.getFileList() && !prodNewVO.getFileList().isEmpty()){ prodNewVO.getFileList().forEach(item->{ @@ -138,6 +144,7 @@ public class BusiProdController extends BaseController @PutMapping public AjaxResult edit(@RequestBody ProdNewVO prodNewVO){ prodNewVO.setDataType(DATA_TYPE_PRODUCT); + prodNewVO.setSimHash(SimHash.compute(TextPreprocessor.combineFields(prodNewVO.getTitle(),prodNewVO.getDescription(),prodNewVO.getContent()))); busiProdNewService.updateById(prodNewVO); if(null!=prodNewVO.getFileList() && !prodNewVO.getFileList().isEmpty()){ prodNewVO.getFileList().forEach(item->{ @@ -195,4 +202,13 @@ public class BusiProdController extends BaseController prodRandomService.delProdRandom(randomVO.getProdId(),randomVO.getRandomId(),randomVO.getTenantId()); return success(); } + + /** + * 相似度检测 + */ + @PostMapping("/checkContent") + public AjaxResult checkContent(@RequestBody ProdNewVO prodNewVO){ + prodNewVO.setDataType(DATA_TYPE_PRODUCT); + return success(busiProdNewService.checkContent(prodNewVO)); + } } diff --git a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/domain/BusiProdNew.java b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/domain/BusiProdNew.java index 2d625ca..fa75a05 100644 --- a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/domain/BusiProdNew.java +++ b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/domain/BusiProdNew.java @@ -97,6 +97,10 @@ public class BusiProdNew extends DlBaseEntity @ApiModelProperty("排序") private Long sort; + /** simHash值 */ + @ApiModelProperty("simHash值") + private Long simHash; + /** 是否首页推荐显示 */ @Excel(name = "是否首页推荐显示") @ApiModelProperty("是否首页推荐显示") diff --git a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/mapper/BusiProdNewMapper.java b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/mapper/BusiProdNewMapper.java index 609c1bf..58f4d99 100644 --- a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/mapper/BusiProdNewMapper.java +++ b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/mapper/BusiProdNewMapper.java @@ -20,6 +20,8 @@ public interface BusiProdNewMapper extends BaseMapper { IPage queryListPage(@Param("entity") ProdNewVO entity, Page page); + List selectCusList(@Param("entity") ProdNewVO entity); + Long selectMaxSort(@Param("tenantId")String tenantId,@Param("dataType")String dataType); /** diff --git a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/service/IBusiProdNewService.java b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/service/IBusiProdNewService.java index 932e0b2..6cf390b 100644 --- a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/service/IBusiProdNewService.java +++ b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/service/IBusiProdNewService.java @@ -74,4 +74,13 @@ public interface IBusiProdNewService extends IService * @return com.baomidou.mybatisplus.core.metadata.IPage **/ IPage searchTextAll(String tenantId, String text,Page page); + + /** + * 相似度检测 + * @author vinjor-M + * @date 10:24 2025/7/18 + * @param prodNewVO + * @return java.util.List + **/ + List checkContent(ProdNewVO prodNewVO); } diff --git a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/service/impl/BusiProdNewServiceImpl.java b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/service/impl/BusiProdNewServiceImpl.java index cefca73..5eea029 100644 --- a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/service/impl/BusiProdNewServiceImpl.java +++ b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/service/impl/BusiProdNewServiceImpl.java @@ -1,9 +1,6 @@ package com.ruoyi.busi.service.impl; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import cn.hutool.core.date.DateUtil; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; @@ -11,12 +8,15 @@ import com.baomidou.mybatisplus.extension.conditions.query.LambdaQueryChainWrapp import com.ruoyi.busi.domain.BusiProdRandom; import com.ruoyi.busi.mapper.BusiProdRandomMapper; import com.ruoyi.busi.service.IBusiCategoryService; +import com.ruoyi.busi.utils.SimHash; +import com.ruoyi.busi.utils.TextPreprocessor; import com.ruoyi.busi.vo.ProdNewVO; import com.ruoyi.busi.vo.ProdRandomVO; import com.ruoyi.busi.vo.WebDetailVO; import com.ruoyi.common.utils.DateUtils; import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import com.ruoyi.common.utils.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; @@ -26,6 +26,8 @@ import com.ruoyi.busi.service.IBusiProdNewService; import static com.ruoyi.constant.DictConstants.DATA_TYPE_NEWS; import static com.ruoyi.constant.DictConstants.DATA_TYPE_PRODUCT; +import static com.ruoyi.constant.StrConstants.HASH_BITS; +import static com.ruoyi.constant.StrConstants.MAX_HAMMING_DISTANCE; /** * 产品、文章Service业务层处理 @@ -203,4 +205,40 @@ public class BusiProdNewServiceImpl extends ServiceImpl + * @author vinjor-M + * @date 10:24 2025/7/18 + **/ + @Override + public List checkContent(ProdNewVO prodNewVO) { + //查出除了本产品/新闻以外的所有其他产品/新闻 + prodNewVO.setIfPublic(true); + if(StringUtils.isNotEmpty(prodNewVO.getId())){ + prodNewVO.setExcludeProdId(prodNewVO.getId()); + } + List list = busiProdNewMapper.selectCusList(prodNewVO); + //本产品/新闻的hash值 + long thisHash = SimHash.compute(TextPreprocessor.combineFields(prodNewVO.getTitle(),prodNewVO.getDescription(),prodNewVO.getContent())); + //相似的文章 + List similarArticles = new ArrayList<>(); + //遍历每一个产品/新闻 + for (ProdNewVO prodItem:list){ + if(null!=prodItem.getSimHash()){ + if (SimHash.isSimilar(thisHash, prodItem.getSimHash(), MAX_HAMMING_DISTANCE)) { + double similarity = 1 - (SimHash.hammingDistance(thisHash, prodItem.getSimHash()) / (double)HASH_BITS); + prodItem.setSimilarity(similarity); + prodItem.setSimilarityStr(String.format("%.2f%%", similarity * 100)); + similarArticles.add(prodItem); + } + } + } + // 按相似度降序排序 + similarArticles.sort((a, b) -> Double.compare(b.getSimilarity(), a.getSimilarity())); + return similarArticles; + } } diff --git a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/utils/SimHash.java b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/utils/SimHash.java new file mode 100644 index 0000000..24499a9 --- /dev/null +++ b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/utils/SimHash.java @@ -0,0 +1,111 @@ +package com.ruoyi.busi.utils; +import java.util.*; + +import static com.ruoyi.constant.StrConstants.HASH_BITS; +import static com.ruoyi.constant.StrConstants.TOP_N; + +/** + * SimHash实现类 + * @author vinjor-M + * @date 17:25 2025/7/17 +**/ +public class SimHash { + + + /** + * 计算文本的SimHash值 + */ + public static long compute(String text) { + if (text == null || text.trim().isEmpty()) { + return 0L; + } + // 1. 分词并计算词频 + Map words = tokenize(text); + // 2. 初始化向量 + int[] v = new int[HASH_BITS]; + // 3. 计算每个词的哈希贡献 + for (Map.Entry entry : words.entrySet()) { + String word = entry.getKey(); + int weight = entry.getValue(); + // 计算词的哈希值 + long hash = hash(word); + // 更新向量 + for (int i = 0; i < HASH_BITS; i++) { + long mask = 1L << i; + if ((hash & mask) != 0) { + v[i] += weight; + } else { + v[i] -= weight; + } + } + } + // 4. 生成最终的SimHash值 + long simHash = 0; + for (int i = 0; i < HASH_BITS; i++) { + if (v[i] > 0) { + simHash |= 1L << i; + } + } + return simHash; + } + + /** + * 计算两个SimHash值的海明距离 + */ + public static int hammingDistance(long hash1, long hash2) { + long diff = hash1 ^ hash2; + int distance = 0; + + while (diff != 0) { + distance += diff & 1; + diff >>>= 1; + } + + return distance; + } + + /** + * 判断是否相似(根据海明距离) + */ + public static boolean isSimilar(long hash1, long hash2, int maxDistance) { + return hammingDistance(hash1, hash2) <= maxDistance; + } + + // 辅助方法:分词并计算词频(简化版,实际项目中可以使用更专业的分词工具) + private static Map tokenize(String text) { + String[] words = text.split("\\s+"); + Map wordCounts = new HashMap<>(); + + for (String word : words) { + if (word.length() > 1) { + // 忽略单字 + wordCounts.put(word, wordCounts.getOrDefault(word, 0) + 1); + } + } + + // 按词频排序,取权重最高的前TOP_N个词 + List> entries = new ArrayList<>(wordCounts.entrySet()); + entries.sort((a, b) -> b.getValue().compareTo(a.getValue())); + + Map topWords = new HashMap<>(); + for (int i = 0; i < Math.min(TOP_N, entries.size()); i++) { + topWords.put(entries.get(i).getKey(), entries.get(i).getValue()); + } + + return topWords; + } + + // 辅助方法:计算字符串的哈希值 + private static long hash(String source) { + if (source == null || source.isEmpty()) { + return 0; + } + + long hash = 0; + for (char c : source.toCharArray()) { + hash = (hash << 5) - hash + c; + } + + return hash; + } +} diff --git a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/utils/TextPreprocessor.java b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/utils/TextPreprocessor.java new file mode 100644 index 0000000..442da82 --- /dev/null +++ b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/utils/TextPreprocessor.java @@ -0,0 +1,45 @@ +package com.ruoyi.busi.utils; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.Jsoup; +/** + * 文本预处理工具类 + * @author vinjor-M + * @date 17:23 2025/7/17 +**/ +public class TextPreprocessor { + /** + * 预处理文本:清除HTML、转小写、去除非文字字符 + */ + public static String preprocess(String htmlContent) { + if (htmlContent == null || htmlContent.trim().isEmpty()) { + return ""; + } + // 1. 清除HTML标签 + String text = Jsoup.parse(htmlContent).text(); + // 2. 转换为小写 + text = text.toLowerCase(); + // 3. 移除标点符号和特殊字符(保留中文和英文) + text = text.replaceAll("[^a-zA-Z0-9\\u4e00-\\u9fa5]", " "); + // 4. 去除多余空格 + text = StringUtils.normalizeSpace(text); + return text; + } + + /** + * 合并多个字段的文本 + */ + public static String combineFields(String title, String summary, String content) { + StringBuilder sb = new StringBuilder(); + if (!StringUtils.isBlank(title)) { + sb.append(preprocess(title)).append(" "); + } + if (!StringUtils.isBlank(summary)) { + sb.append(preprocess(summary)).append(" "); + } + if (!StringUtils.isBlank(content)) { + // 内容字段已经包含HTML,需要特殊处理 + sb.append(preprocess(content)); + } + return sb.toString(); + } +} diff --git a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/vo/ProdNewVO.java b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/vo/ProdNewVO.java index 989f4b6..98fe363 100644 --- a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/vo/ProdNewVO.java +++ b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/busi/vo/ProdNewVO.java @@ -41,7 +41,10 @@ public class ProdNewVO extends BusiProdNew { /**同分类下产品数量*/ private Integer amount; - + /**文章相似度*/ + private Double similarity; + /**文章相似度*/ + private String similarityStr; /** * 批量传的图片 **/ diff --git a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/constant/StrConstants.java b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/constant/StrConstants.java index 7c37070..19f76d9 100644 --- a/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/constant/StrConstants.java +++ b/dl_admin/ruoyi-admin/src/main/java/com/ruoyi/constant/StrConstants.java @@ -14,4 +14,17 @@ public class StrConstants { * 结束时间 */ public static final String END_DATE =" 23:59:59"; + + /** + * 相似度阈值(海明距离),值越小越严格 + */ + public static final int MAX_HAMMING_DISTANCE = 3; + /** + * 哈希位数 + */ + public static final int HASH_BITS = 64; + /** + * 取权重最高的前N个词 + */ + public static final int TOP_N = 3; } diff --git a/dl_admin/ruoyi-admin/src/main/resources/mapper/busi/BusiProdNewMapper.xml b/dl_admin/ruoyi-admin/src/main/resources/mapper/busi/BusiProdNewMapper.xml index 1fbe055..cc249fe 100644 --- a/dl_admin/ruoyi-admin/src/main/resources/mapper/busi/BusiProdNewMapper.xml +++ b/dl_admin/ruoyi-admin/src/main/resources/mapper/busi/BusiProdNewMapper.xml @@ -152,4 +152,31 @@ ORDER BY sort DESC LIMIT 1 + \ No newline at end of file diff --git a/dl_vue/src/api/busi/new.js b/dl_vue/src/api/busi/new.js index cbfe2f5..84358cf 100644 --- a/dl_vue/src/api/busi/new.js +++ b/dl_vue/src/api/busi/new.js @@ -78,3 +78,13 @@ export function delProdRandom(data) { data: data }) } + + +// 查相似新闻 +export function checkContent(data) { + return request({ + url: '/busi/new/checkContent', + method: 'post', + data: data + }) +} diff --git a/dl_vue/src/api/busi/prod.js b/dl_vue/src/api/busi/prod.js index ce11b34..76af42d 100644 --- a/dl_vue/src/api/busi/prod.js +++ b/dl_vue/src/api/busi/prod.js @@ -78,3 +78,12 @@ export function delProdRandom(data) { data: data }) } + +// 查相似产品 +export function checkContent(data) { + return request({ + url: '/busi/prod/checkContent', + method: 'post', + data: data + }) +} diff --git a/dl_vue/src/views/busi/new/newForm.vue b/dl_vue/src/views/busi/new/newForm.vue index 95a1983..144985b 100644 --- a/dl_vue/src/views/busi/new/newForm.vue +++ b/dl_vue/src/views/busi/new/newForm.vue @@ -5,6 +5,7 @@ 返 回 暂 存 发 布 + 相似度检测 @@ -100,24 +101,27 @@ 返 回 暂 存 发 布 + 相似度检测 + + + diff --git a/dl_vue/src/views/busi/prod/prodForm.vue b/dl_vue/src/views/busi/prod/prodForm.vue index e0f1c39..5b6b3b1 100644 --- a/dl_vue/src/views/busi/prod/prodForm.vue +++ b/dl_vue/src/views/busi/prod/prodForm.vue @@ -5,6 +5,7 @@ 返 回 暂 存 发 布 + 相似度检测 @@ -22,7 +23,9 @@
- +
添加产品分类
@@ -36,12 +39,12 @@ - + - + @@ -54,7 +57,7 @@ 图片库选择 - + @@ -102,36 +105,47 @@ 返 回 暂 存 发 布 + 相似度检测 - + +