From 5bb2a9a1ee5b53f7c6e71950c7db3d118f2177ea Mon Sep 17 00:00:00 2001 From: menft <17554333016@163.com> Date: Fri, 24 Oct 2025 02:31:17 +0800 Subject: [PATCH] =?UTF-8?q?feat(utils):=20=E6=96=B0=E5=A2=9EGoogleRankUtil?= =?UTF-8?q?=E5=B7=A5=E5=85=B7=E7=B1=BB=EF=BC=8C=E5=9F=BA=E4=BA=8Especulati?= =?UTF-8?q?onrules=E5=AE=9E=E7=8E=B0=E7=A8=B3=E5=AE=9A=E7=9A=84Google?= =?UTF-8?q?=E6=8E=92=E5=90=8D=E6=9F=A5=E8=AF=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ✨ 核心特性: - 基于Google speculationrules的JSON解析,不依赖DOM结构 - 支持多页搜索(前20名),自动翻页直到找到目标 - 精准的排名计算,baseRank + validRank机制 - 智能过滤Google自身域名,只计数真实搜索结果 - 会话复用机制,降低验证码触发率 - 丰富的配置选项(无头模式、请求间隔、重试策略等) 🔧 技术实现: - Jackson解析speculationrules脚本中的prefetch URLs - Selenium WebDriver自动化浏览器 - 人性化延时和滚动,模拟真实用户行为 - User-Agent随机化,增强反爬友好性 📊 测试结果: - "货车" → 维基百科排名第1名 ✓ - "大卡车" → 维基百科排名第12名(第2页第2个结果)✓ 优势:相比传统DOM解析方案更稳定、准确,不受页面结构变化影响 --- .../ruoyi/common/utils/GoogleRankUtil.java | 630 ++++++++++++++++++ 1 file changed, 630 insertions(+) create mode 100644 dl_admin/ruoyi-common/src/main/java/com/ruoyi/common/utils/GoogleRankUtil.java diff --git a/dl_admin/ruoyi-common/src/main/java/com/ruoyi/common/utils/GoogleRankUtil.java b/dl_admin/ruoyi-common/src/main/java/com/ruoyi/common/utils/GoogleRankUtil.java new file mode 100644 index 0000000..479f98b --- /dev/null +++ b/dl_admin/ruoyi-common/src/main/java/com/ruoyi/common/utils/GoogleRankUtil.java @@ -0,0 +1,630 @@ +package com.ruoyi.common.utils; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.github.bonigarcia.wdm.WebDriverManager; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.openqa.selenium.By; +import org.openqa.selenium.JavascriptExecutor; +import org.openqa.selenium.WebDriver; +import org.openqa.selenium.chrome.ChromeDriver; +import org.openqa.selenium.chrome.ChromeOptions; +import org.openqa.selenium.support.ui.ExpectedConditions; +import org.openqa.selenium.support.ui.WebDriverWait; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URL; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Google排名查询工具类(基于speculationrules解析) + * + * 本工具类采用全新的解析方案: + * 1. 不再解析复杂的DOM结构 + * 2. 直接提取Google返回的