import { chromium } from 'playwright'; import cron from 'node-cron'; import fs from 'node:fs'; import path from 'node:path'; import readline from 'node:readline'; import { config, datasets } from './config.js'; import { sendLoginAlert, sendRuntimeErrorAlert } from './notify.js'; import { closeDbPool, customerExists, getExistingMessageIds, getExistingMessageFingerprints, getLatestBillConsumptionTimeFromDb, getLatestMessageTimeFromDb, getLatestOrderTimeFromDb, hasDbConfig, upsertBills, upsertCustomerDetails, upsertCustomers, upsertMessages, upsertOrderDetails, upsertOrders, } from './db.js'; import { diffRecords, loadCurrentState, nowStamp, saveCheckpoint, saveDatasetRun, saveDelta, saveRunSummary, withHash, } from './storage.js'; const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); const scheduleEventFile = path.join(config.dataDir, 'runs', 'schedule-events.jsonl'); let _context = null; let _runtimeController = null; let _browser = null; let _isAttachedBrowser = false; const runningJobs = new Set(); function recordScheduleEvent(payload) { try { fs.mkdirSync(path.dirname(scheduleEventFile), { recursive: true }); fs.appendFileSync(scheduleEventFile, `${JSON.stringify({ at: new Date().toISOString(), ...payload })}\n`, 'utf8'); } catch (error) { console.warn(`[schedule-event] 写入失败: ${error.message}`); } } const AUTH_PAGE_KEYWORDS = [ 'RAM 用户登录', '主账号登录', '钉钉扫码登录', '用户名', '下一步', '登录并使用 RAM', ]; async function closeContextIfNeeded() { if (!_context) return; if (_isAttachedBrowser) { _context = null; return; } await _context.close(); _context = null; } function getRuntimeController() { if (_runtimeController) return _runtimeController; let paused = false; let terminated = false; let keypressBound = false; const onKeypress = (_str, key = {}) => { if (key.name === 'f7') { if (!paused) { paused = true; console.log('[控制] 已暂停(F7)。按 F8 继续,按 F9 终止。'); } return; } if (key.name === 'f8') { if (paused) { paused = false; console.log('[控制] 已继续(F8)。'); } return; } if (key.name === 'f9') { terminated = true; paused = false; console.log('[控制] 已请求终止(F9),将在安全检查点停止。'); } }; const bind = () => { if (keypressBound || !process.stdin.isTTY) return; readline.emitKeypressEvents(process.stdin); if (typeof process.stdin.setRawMode === 'function') { process.stdin.setRawMode(true); } process.stdin.resume(); process.stdin.on('keypress', onKeypress); keypressBound = true; console.log('[控制] 热键已启用:F7 暂停 / F8 继续 / F9 终止'); }; const unbind = () => { if (!keypressBound) return; process.stdin.off('keypress', onKeypress); if (process.stdin.isTTY && typeof process.stdin.setRawMode === 'function') { process.stdin.setRawMode(false); } keypressBound = false; }; const waitIfPaused = async (label = '任务') => { if (terminated) { throw new Error(`[控制] 已终止:${label}`); } while (paused) { await sleep(300); if (terminated) { throw new Error(`[控制] 已终止:${label}`); } } }; const throwIfTerminated = (label = '任务') => { if (terminated) { throw new Error(`[控制] 已终止:${label}`); } }; _runtimeController = { bind, unbind, waitIfPaused, throwIfTerminated, }; return _runtimeController; } async function runtimeCheckpoint(label) { const controller = getRuntimeController(); controller.throwIfTerminated(label); await controller.waitIfPaused(label); } function clearStaleBrowserProfileLocks() { const lockFiles = ['SingletonLock', 'SingletonCookie', 'SingletonSocket']; const now = Date.now(); const staleMs = 10 * 60 * 1000; for (const fileName of lockFiles) { const filePath = path.join(config.userDataDir, fileName); if (!fs.existsSync(filePath)) { continue; } try { const stat = fs.statSync(filePath); const ageMs = now - stat.mtimeMs; if (ageMs < staleMs) { console.log(`[浏览器锁] 检测到活跃锁文件,保留: ${fileName}`); continue; } fs.rmSync(filePath, { force: true }); console.log(`[浏览器锁] 已清理陈旧锁文件: ${fileName}`); } catch (error) { console.warn(`[浏览器锁] 清理 ${fileName} 失败: ${error.message}`); } } } async function getContext() { if (_context) return _context; if (config.browserMode === 'cdp') { try { _browser = await chromium.connectOverCDP(config.cdpUrl); _isAttachedBrowser = true; const contexts = _browser.contexts(); _context = contexts[0] || await _browser.newContext(); console.log(`[CDP] 已附着到手动浏览器: ${config.cdpUrl}`); return _context; } catch (error) { throw new Error(`无法通过 CDP 连接到手动浏览器(${config.cdpUrl})。请先手动启动 Chrome 并开启远程调试端口。原始错误: ${error.message}`); } } _isAttachedBrowser = false; clearStaleBrowserProfileLocks(); const launchOptions = { headless: config.headless, acceptDownloads: true, downloadsPath: config.downloadDir, }; if (config.browserChannel) { launchOptions.channel = config.browserChannel; } if (config.browserExecutablePath) { launchOptions.executablePath = config.browserExecutablePath; } try { _context = await chromium.launchPersistentContext(config.userDataDir, launchOptions); } catch (error) { const browserHint = config.browserExecutablePath ? `executablePath=${config.browserExecutablePath}` : config.browserChannel ? `channel=${config.browserChannel}` : 'bundled-chromium'; throw new Error(`浏览器启动失败(${browserHint})。请确认没有其他浏览器占用 .browser 目录,或删除 .browser 后重新执行 npm run login。原始错误: ${error.message}`); } await restoreStorageState(_context); return _context; } async function resolveActivePage(context, targetUrl = '') { const pages = context.pages(); let page = null; if (config.browserMode === 'cdp' && targetUrl) { page = pages.find((item) => item.url().includes(targetUrl)); } if (!page) { page = pages[0] || await context.newPage(); } if (config.browserMode === 'cdp') { const pageIndex = pages.indexOf(page); console.log(`[CDP] 使用 tab=${pageIndex >= 0 ? pageIndex : 'new'} url=${page.url() || '(blank)'}`); await page.bringToFront().catch(() => null); } return page; } async function restoreStorageState(context) { if (!fs.existsSync(config.storageStateFile)) { return; } try { const state = JSON.parse(fs.readFileSync(config.storageStateFile, 'utf-8')); if (Array.isArray(state.cookies) && state.cookies.length > 0) { await context.addCookies(state.cookies); console.log(`[storageState] 已恢复 ${state.cookies.length} 个 cookie`); } } catch (error) { console.warn(`[storageState] 恢复失败,继续使用 .browser profile: ${error.message}`); } } async function saveStorageState(context) { await context.storageState({ path: config.storageStateFile }); console.log(`[storageState] 已保存登录态快照: ${config.storageStateFile}`); } function loadLatestBillsCheckpoint() { const checkpointDir = path.join(config.dataDir, 'checkpoints', 'bills'); if (!fs.existsSync(checkpointDir)) { return null; } const candidates = fs.readdirSync(checkpointDir) .filter((fileName) => fileName.endsWith('.json')) .map((fileName) => { const filePath = path.join(checkpointDir, fileName); const stat = fs.statSync(filePath); return { fileName, filePath, mtimeMs: stat.mtimeMs }; }) .sort((a, b) => b.mtimeMs - a.mtimeMs); if (candidates.length === 0) { return null; } try { const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8')); if (!latest || typeof latest !== 'object') { return null; } return latest; } catch (error) { console.warn(`[账单检查点] 读取失败,忽略断点续爬: ${error.message}`); return null; } } function loadLatestOrdersCheckpoint() { const checkpointDir = path.join(config.dataDir, 'checkpoints', 'orders'); if (!fs.existsSync(checkpointDir)) { return null; } const candidates = fs.readdirSync(checkpointDir) .filter((fileName) => fileName.endsWith('.json')) .map((fileName) => { const filePath = path.join(checkpointDir, fileName); const stat = fs.statSync(filePath); return { fileName, filePath, mtimeMs: stat.mtimeMs }; }) .sort((a, b) => b.mtimeMs - a.mtimeMs); if (candidates.length === 0) { return null; } try { const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8')); if (!latest || typeof latest !== 'object') { return null; } return latest; } catch (error) { console.warn(`[订单检查点] 读取失败,忽略断点续爬: ${error.message}`); return null; } } function subtractDays(dateValue, days) { const next = new Date(dateValue); next.setDate(next.getDate() - days); return next; } function subtractMonths(dateValue, months) { const next = new Date(dateValue); next.setMonth(next.getMonth() - months); return next; } function randomIntBetween(min, max) { return Math.floor(Math.random() * (max - min + 1)) + min; } function parseDbDateTime(value) { const normalized = String(value || '').trim(); if (!normalized) { return null; } const parsed = new Date(normalized.replace(' ', 'T')); return Number.isNaN(parsed.getTime()) ? null : parsed; } function formatDateTime(date) { return `${formatDate(date)} ${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}:${String(date.getSeconds()).padStart(2, '0')}`; } function isSameDate(value, date) { const parsed = parseDbDateTime(value); if (!parsed) { return false; } return formatDate(parsed) === formatDate(date); } function addMinutes(date, minutes) { const next = new Date(date); next.setMinutes(next.getMinutes() + minutes); return next; } function buildOrderFingerprint(record) { return [ String(record.orderStatus || '').trim(), String(record.actualPaidCny || '').trim(), String(record.orderOriginalPriceCny || '').trim(), String(record.orderType || '').trim(), String(record.customerCategory || '').trim(), String(record.createdAt || '').trim(), ].join('|'); } function isFinalOrderStatus(status) { const normalized = String(status || '').trim(); if (!normalized) { return false; } return config.hotFinalStatuses.some((item) => item === normalized); } async function runLockedJob(jobName, job) { if (runningJobs.has(jobName)) { console.log(`[任务锁] ${jobName} 已在运行,跳过本次执行`); return { skipped: true, reason: 'already_running', jobName }; } runningJobs.add(jobName); try { return await job(); } finally { runningJobs.delete(jobName); } } function buildTodayOrderWindow() { const today = formatDate(new Date()); return buildSingleDateWindow(today, today); } function computeChangedOrderIds(previousRecords, nextRecords) { const previousMap = new Map(); for (const record of previousRecords || []) { const orderId = String(record.orderId || '').trim(); if (!orderId) { continue; } previousMap.set(orderId, record); } const changedOrderIds = []; for (const record of nextRecords || []) { const orderId = String(record.orderId || '').trim(); if (!orderId) { continue; } const previous = previousMap.get(orderId); if (!previous) { changedOrderIds.push(orderId); continue; } if (buildOrderFingerprint(previous) !== buildOrderFingerprint(record)) { changedOrderIds.push(orderId); } } return Array.from(new Set(changedOrderIds)); } function selectOrderDetailCandidates(orderRecords, changedOrderIds, detailRecords) { const changedSet = new Set((changedOrderIds || []).map((item) => String(item || '').trim()).filter(Boolean)); const detailMap = new Map(); for (const record of detailRecords || []) { const orderId = String(record.orderId || '').trim(); if (!orderId) { continue; } detailMap.set(orderId, record); } const now = new Date(); const refreshBefore = addMinutes(now, -config.hotOrderDetailRefreshMinutes); const candidateIds = []; for (const record of orderRecords || []) { const orderId = String(record.orderId || '').trim(); if (!orderId || !isValidOrderId(orderId)) { continue; } if (changedSet.has(orderId)) { candidateIds.push(orderId); continue; } const status = String(record.orderStatus || '').trim(); if (isFinalOrderStatus(status)) { continue; } const detail = detailMap.get(orderId); if (!detail) { candidateIds.push(orderId); continue; } const lastSyncedAt = parseDbDateTime(detail.detailSyncedAt || detail.__detailSyncedAt || ''); if (!lastSyncedAt || lastSyncedAt <= refreshBefore) { candidateIds.push(orderId); } } return Array.from(new Set(candidateIds)); } function summarizeHotPage(previousOrderMap, normalizedPageRows) { let stableCount = 0; let changedCount = 0; let newCount = 0; let todayRowCount = 0; for (const record of normalizedPageRows) { if (isSameDate(record.createdAt, new Date())) { todayRowCount += 1; } const orderId = String(record.orderId || '').trim(); const previous = previousOrderMap.get(orderId); if (!previous) { newCount += 1; continue; } if (buildOrderFingerprint(previous) === buildOrderFingerprint(record)) { stableCount += 1; } else { changedCount += 1; } } return { stableCount, changedCount, newCount, todayRowCount }; } function buildSingleDateWindow(startDate, endDate) { return [{ windowStart: startDate, windowEnd: endDate, start: startDate, end: endDate, }]; } async function captureErrorArtifacts(page, metadata = {}) { const stamp = nowStamp(); const artifactDir = path.join(config.errorDir, metadata.dataset || 'general'); fs.mkdirSync(artifactDir, { recursive: true }); const jsonPath = path.join(artifactDir, `${stamp}.json`); const screenshotPath = path.join(artifactDir, `${stamp}.png`); const payload = { ...metadata, capturedAt: new Date().toISOString(), pageUrl: page?.url?.() || '', stack: metadata.error?.stack || metadata.errorMessage || '', }; fs.writeFileSync(jsonPath, JSON.stringify(payload, null, 2)); let screenshotSaved = false; if (page && !page.isClosed?.()) { try { await page.screenshot({ path: screenshotPath, fullPage: true, timeout: 5000, animations: 'disabled' }); screenshotSaved = true; } catch (error) { console.error('[错误截图] 保存失败:', error.message); } } return { jsonPath, screenshotPath: screenshotSaved ? screenshotPath : '', }; } async function reportRuntimeError(error, page, metadata = {}) { const artifacts = await captureErrorArtifacts(page, { ...metadata, errorMessage: error.message, error, }); const subject = `[APS同步异常] ${metadata.label || metadata.dataset || 'sync'} failed`; const text = [ `时间: ${new Date().toISOString()}`, `任务: ${metadata.label || ''}`, `数据集: ${metadata.dataset || ''}`, `模式: ${metadata.mode || ''}`, `URL: ${page?.url?.() || ''}`, `错误: ${error.message}`, `JSON: ${artifacts.jsonPath}`, artifacts.screenshotPath ? `截图: ${artifacts.screenshotPath}` : '截图: 保存失败', ].join('\n'); const attachments = [{ filename: path.basename(artifacts.jsonPath), path: artifacts.jsonPath }]; if (artifacts.screenshotPath) { attachments.push({ filename: path.basename(artifacts.screenshotPath), path: artifacts.screenshotPath }); } await sendRuntimeErrorAlert({ subject, text, attachments }); } async function getPageBodyPreview(page) { return page .evaluate(() => document.body?.innerText?.substring(0, 500) || '(空)') .catch(() => '(无法获取)'); } function isAuthUrl(url) { return /account\.aliyun\.com|signin\.aliyun\.com/.test(url) || url.includes('login.htm') || url.includes('/#/signin'); } function hasAuthKeywords(text) { return AUTH_PAGE_KEYWORDS.some((keyword) => text.includes(keyword)); } async function detectAuthRedirect(page) { const currentUrl = page.url(); const bodyText = await getPageBodyPreview(page); return { currentUrl, bodyText, isAuthPage: isAuthUrl(currentUrl) || hasAuthKeywords(bodyText), }; } async function ensureDatasetAccessible(page, dataset, timeout = 120000, options = {}) { await page.goto(dataset.url, { waitUntil: 'domcontentloaded' }); await waitUntilReady(page, dataset.heading, timeout, options); } export async function login() { const runtimeController = getRuntimeController(); runtimeController.bind(); const context = await getContext(); const cleanupAndExit = async (signal) => { console.log(`[login] 收到 ${signal},正在保存登录态并关闭浏览器...`); await closeContextIfNeeded(); process.exit(130); }; const onSigint = () => { void cleanupAndExit('SIGINT'); }; const onSigterm = () => { void cleanupAndExit('SIGTERM'); }; process.once('SIGINT', onSigint); process.once('SIGTERM', onSigterm); try { const page = await resolveActivePage(context, '/detail/my_customer/~/customer/list'); await page.goto(datasets.customers.url, { waitUntil: 'domcontentloaded' }); console.log('请在打开的浏览器里完成阿里云伙伴中心登录。检测到进入“我的客户”和“账单查询”页面后,脚本会自动保存登录态并关闭浏览器。'); await waitUntilReady(page, datasets.customers.heading, 10 * 60 * 1000, { allowInteractiveAuth: true }); console.log('[login] 我的客户页验证通过,继续验证账单页登录态...'); await ensureDatasetAccessible(page, datasets.bills, 60 * 1000, { allowInteractiveAuth: true }); await sleep(1000); await saveStorageState(context); console.log('登录态已写入 .browser 目录,且已验证“我的客户”和“账单查询”页面可访问,后续可直接执行 npm run sync 或 npm run bills。'); } finally { process.off('SIGINT', onSigint); process.off('SIGTERM', onSigterm); await closeContextIfNeeded(); runtimeController.unbind(); } } export async function syncAll(options = {}) { const runtimeController = getRuntimeController(); runtimeController.bind(); const context = await getContext(); let page = null; const { resume = false } = options; try { const summary = { startedAt: new Date().toISOString(), datasets: {} }; page = await resolveActivePage(context, '/detail/my_customer/~/customer/list'); if (config.fullSync) { summary.datasets.customers = await syncCustomers(page, { resume }); summary.datasets.customerDetails = await syncCustomerDetails(page, { resume }); } summary.datasets.orders = await syncOrders(page, { incremental: !config.fullSync, resume }); // syncOrders 完成后,从最新的 orders.json 读取 orderId 列表 const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey); const orderIdsForDetail = collectValidOrderIds(latestOrders.records || []); summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail, options); summary.datasets.bills = await syncBills(page, { incremental: !config.fullSync, resume }); summary.datasets.messages = await syncMessages(page, { incremental: !config.fullSync, resume }); summary.finishedAt = new Date().toISOString(); const stamp = nowStamp(); saveRunSummary(stamp, summary); return summary; } catch (error) { await reportRuntimeError(error, page, { label: 'syncAll', dataset: 'all', mode: config.fullSync ? 'full' : 'incremental' }); throw error; } finally { if (config.closeBrowser) { await closeContextIfNeeded(); } else { console.log('浏览器保持运行'); } await closeDbPool(); runtimeController.unbind(); } } export async function syncBillsOnly(options = {}) { const runtimeController = getRuntimeController(); runtimeController.bind(); const context = await getContext(); let page = null; try { const summary = { startedAt: new Date().toISOString(), datasets: {} }; page = await resolveActivePage(context, '/detail/bill/~/costCenter/bill'); summary.datasets.bills = await syncBills(page, options); summary.finishedAt = new Date().toISOString(); const stamp = nowStamp(); saveRunSummary(stamp, summary); return summary; } catch (error) { await reportRuntimeError(error, page, { label: 'syncBillsOnly', dataset: 'bills', mode: options.incremental ? 'incremental' : 'full' }); throw error; } finally { if (options.keepBrowserOpen === true) { console.log('浏览器保持运行(schedule bills)'); } else if (config.closeBrowser) { await closeContextIfNeeded(); } else { console.log('浏览器保持运行'); } await closeDbPool(); runtimeController.unbind(); } } export async function syncOrdersOnly(options = {}) { const runtimeController = getRuntimeController(); runtimeController.bind(); const context = await getContext(); let page = null; try { const summary = { startedAt: new Date().toISOString(), datasets: {} }; page = await resolveActivePage(context, '/detail/order/~/costCenter/order'); const orderSyncResult = await syncOrders(page, options); summary.datasets.orders = orderSyncResult; const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey); const orderDetailsState = loadCurrentState('orderDetails', datasets.orderDetails.uniqueKey); const orderIdsForDetail = options.hot ? selectOrderDetailCandidates(latestOrders.records || [], orderSyncResult.changedOrderIds || [], orderDetailsState.records || []) : collectValidOrderIds(latestOrders.records || []); summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail); summary.finishedAt = new Date().toISOString(); const stamp = nowStamp(); saveRunSummary(stamp, summary); return summary; } catch (error) { await reportRuntimeError(error, page, { label: 'syncOrdersOnly', dataset: 'orders', mode: options.incremental ? 'incremental' : 'full' }); throw error; } finally { if (config.closeBrowser) { await closeContextIfNeeded(); } else { console.log('浏览器保持运行'); } await closeDbPool(); runtimeController.unbind(); } } export async function syncMessagesOnly(options = {}) { const runtimeController = getRuntimeController(); runtimeController.bind(); const context = await getContext(); let page = null; try { const summary = { startedAt: new Date().toISOString(), datasets: {} }; page = await resolveActivePage(context, '/message'); summary.datasets.messages = await syncMessages(page, options); summary.finishedAt = new Date().toISOString(); const stamp = nowStamp(); saveRunSummary(stamp, summary); return summary; } catch (error) { await reportRuntimeError(error, page, { label: 'syncMessagesOnly', dataset: 'messages', mode: options.incremental ? 'incremental' : 'full' }); throw error; } finally { if (config.closeBrowser) { await closeContextIfNeeded(); } else { console.log('浏览器保持运行'); } await closeDbPool(); runtimeController.unbind(); } } export async function scheduleSync() { console.log(`定时任务已启动: bills=${config.cron}, hot=${config.hotCron} (${config.timezone})`); setInterval(() => { console.log(`[${new Date().toISOString()}] 定时守护存活中: bills=${config.cron}, hot=${config.hotCron}, mode=${config.scheduleMode}`); }, 60 * 1000); cron.schedule( config.cron, async () => { if (config.scheduleMode === 'hot') { return runLockedJob('schedule-shared', async () => { try { recordScheduleEvent({ track: 'bills', status: 'started', mode: 'bills-incremental' }); console.log(`[${new Date().toISOString()}] 开始执行账单定时同步 mode=bills-incremental`); const summary = await syncBillsOnly({ incremental: true, keepBrowserOpen: true }); recordScheduleEvent({ track: 'bills', status: 'completed', mode: 'bills-incremental', summary }); console.log(`[${new Date().toISOString()}] 账单定时同步完成`, JSON.stringify(summary, null, 2)); } catch (error) { recordScheduleEvent({ track: 'bills', status: 'failed', mode: 'bills-incremental', error: error.message }); console.error(`[${new Date().toISOString()}] 账单定时同步失败`, error); } }); } try { console.log(`[${new Date().toISOString()}] 开始执行同步 mode=${config.scheduleMode}`); const summary = config.scheduleMode === 'full' ? await syncAll() : await syncAllIncremental(); console.log(`[${new Date().toISOString()}] 同步完成`, JSON.stringify(summary, null, 2)); } catch (error) { console.error(`[${new Date().toISOString()}] 同步失败`, error); } }, { timezone: config.timezone }, ); cron.schedule( config.hotCron, async () => { if (config.scheduleMode !== 'hot') { return; } return runLockedJob('schedule-shared', async () => { try { recordScheduleEvent({ track: 'hot', status: 'started', mode: 'hot' }); console.log(`[${new Date().toISOString()}] 开始执行高频同步 mode=hot`); const summary = await syncHot({ keepBrowserOpen: true }); if (summary?.skipped) { recordScheduleEvent({ track: 'hot', status: 'skipped', mode: 'hot', reason: summary.reason || 'already_running' }); } else { recordScheduleEvent({ track: 'hot', status: 'completed', mode: 'hot', summary }); } console.log(`[${new Date().toISOString()}] 高频同步完成`, JSON.stringify(summary, null, 2)); } catch (error) { recordScheduleEvent({ track: 'hot', status: 'failed', mode: 'hot', error: error.message }); console.error(`[${new Date().toISOString()}] 高频同步失败`, error); } }); }, { timezone: config.timezone }, ); } export async function syncHot(options = {}) { return runLockedJob('hot-sync', async () => { const runtimeController = getRuntimeController(); runtimeController.bind(); const context = await getContext(); let page = null; try { const summary = { startedAt: new Date().toISOString(), mode: 'hot', datasets: {} }; page = await resolveActivePage(context, '/detail/order/~/costCenter/order'); const orderSyncResult = await syncOrders(page, { ...options, hot: true, incremental: true, resume: options.resume === true }); summary.datasets.orders = orderSyncResult; const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey); const orderDetailsState = loadCurrentState('orderDetails', datasets.orderDetails.uniqueKey); const orderIdsForDetail = selectOrderDetailCandidates( latestOrders.records || [], orderSyncResult.changedOrderIds || [], orderDetailsState.records || [], ); summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail, { resume: options.resume === true }); page = await resolveActivePage(context, '/message'); summary.datasets.messages = await syncMessages(page, { incremental: true, resume: options.resume === true, hot: true }); page = await resolveActivePage(context, '/detail/my_customer/~/customer/list'); summary.datasets.customerHot = await syncOneCustomerHot(page, { resume: options.resume === true }); summary.finishedAt = new Date().toISOString(); const stamp = nowStamp(); saveRunSummary(stamp, summary); return summary; } catch (error) { await reportRuntimeError(error, page, { label: 'syncHot', dataset: 'hot', mode: 'hot' }); throw error; } finally { if (options.keepBrowserOpen === true) { console.log('浏览器保持运行(schedule hot)'); } else if (config.closeBrowser) { await closeContextIfNeeded(); } else { console.log('浏览器保持运行'); } await closeDbPool(); runtimeController.unbind(); } }); } async function syncOneCustomerHot(page, options = {}) { await runtimeCheckpoint('高频同步客户'); const dataset = datasets.customers; await page.goto(dataset.url, { waitUntil: 'domcontentloaded' }); await waitUntilReady(page, dataset.heading); await trySetPageSize(page, dataset.pageSize); const pageData = await extractTable(page); const normalizedRows = normalizeDatasetRecords(dataset, pageData.rows || [], { pageNum: 1 }); const target = normalizedRows.find((record) => String(record.accountId || '').trim()); if (!target) { return { skipped: true, reason: 'no_customer_found' }; } if (await customerExists(target.accountId)) { console.log(`[客户高频] accountId=${target.accountId} 已存在,停止本轮客户抓取`); return { skipped: true, reason: 'customer_exists', accountId: target.accountId }; } await upsertCustomers([target]); const clicked = await clickCustomerDetailFromListWithRetry(page, target); if (!clicked) { return { skipped: false, inserted: true, accountId: target.accountId, detail: 'click_failed' }; } try { await page.waitForFunction( (text) => document.body && document.body.innerText.includes(text), '详情', { timeout: 15000 }, ); await sleep(1000); const detail = await extractCustomerDetail(page); const normalizedDetail = normalizeDatasetRecords(datasets.customerDetails, [{ ...detail, accountId: target.accountId, loginName: target.loginName }], { accountId: target.accountId }); await upsertCustomerDetails(normalizedDetail); return { skipped: false, inserted: true, accountId: target.accountId, detail: 'ok' }; } catch (error) { return { skipped: false, inserted: true, accountId: target.accountId, detail: `extract_failed:${error.message}` }; } } export async function syncAllIncremental() { const runtimeController = getRuntimeController(); runtimeController.bind(); const context = await getContext(); let page = null; try { const summary = { startedAt: new Date().toISOString(), mode: 'incremental', datasets: {} }; page = await resolveActivePage(context, '/detail/order/~/costCenter/order'); const orderSyncResult = await syncOrders(page, { incremental: true, resume: true }); summary.datasets.orders = orderSyncResult; const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey); const orderIdsForDetail = collectValidOrderIds(latestOrders.records || []); summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail, { resume: true }); summary.datasets.bills = await syncBills(page, { incremental: true, resume: true }); summary.datasets.messages = await syncMessages(page, { incremental: true, resume: true }); summary.finishedAt = new Date().toISOString(); const stamp = nowStamp(); saveRunSummary(stamp, summary); return summary; } catch (error) { await reportRuntimeError(error, page, { label: 'syncAllIncremental', dataset: 'incremental', mode: 'incremental' }); throw error; } finally { if (config.closeBrowser) { await closeContextIfNeeded(); } else { console.log('浏览器保持运行'); } await closeDbPool(); runtimeController.unbind(); } } async function syncCustomers(page, options = {}) { await runtimeCheckpoint('同步客户'); const dataset = datasets.customers; const { resume = false } = options; await page.goto(dataset.url, { waitUntil: 'domcontentloaded' }); await waitUntilReady(page, dataset.heading); await trySetPageSize(page, dataset.pageSize); const resumeCheckpoint = resume ? loadLatestCustomersCheckpoint() : null; let resumeFromPage = Number.parseInt(String(resumeCheckpoint?.pageNum || 0), 10) || 0; let shouldContinueScrape = true; let allNormalizedRecords = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : []; if (resumeFromPage > 0) { console.log(`[客户续爬] 从 checkpoint 恢复: page=${resumeFromPage}, records=${allNormalizedRecords.length}`); const moved = await moveCustomersToResumeStart(page, resumeFromPage); if (!moved) { console.log('[客户续爬] checkpoint 已在最后一页,无需继续抓取'); shouldContinueScrape = false; } } let records = []; if (shouldContinueScrape) { records = await scrapePagedTable(page, dataset, {}, { onPage: async ({ pageNum, pageRows }) => { const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, {}); allNormalizedRecords.push(...normalizedPageRows); if (hasDbConfig()) { await upsertCustomers(normalizedPageRows); } await saveCustomersCheckpoint(dataset, pageNum, allNormalizedRecords); }, skipInitialPage: resumeFromPage > 0, }); } if (resumeFromPage === 0) { allNormalizedRecords = normalizeDatasetRecords(dataset, records, {}); if (hasDbConfig()) { await upsertCustomers(dedupeByHash(allNormalizedRecords)); } } return persistDataset(dataset, dedupeByHash(allNormalizedRecords), {}); } async function syncCustomerDetails(page, options = {}) { await runtimeCheckpoint('同步客户详情'); const dataset = datasets.customerDetails; const customersState = loadCurrentState('customers', datasets.customers.uniqueKey); const customerTargets = collectCustomerDetailTargets(customersState.records || []); const resumeCheckpoint = options.resume ? loadLatestCustomerDetailsCheckpoint() : null; if (customerTargets.length === 0) { console.log('[客户详情] 本地无有效客户定位信息,跳过'); return persistDataset(dataset, [], {}); } console.log(`[客户详情] 共 ${customerTargets.length} 个客户需要获取详情`); const allDetails = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : []; let currentListPage = 0; let currentGroupKey = ''; let startIndex = Number.parseInt(String(resumeCheckpoint?.currentIndex || 0), 10) || 0; if (startIndex > 0) { console.log(`[客户详情续爬] 从 checkpoint 恢复: index=${startIndex}, records=${allDetails.length}`); } await page.goto(datasets.customers.url, { waitUntil: 'domcontentloaded' }); await waitUntilReady(page, datasets.customers.heading); await trySetPageSize(page, datasets.customers.pageSize); for (let index = startIndex; index < customerTargets.length; index += 1) { await runtimeCheckpoint(`客户详情 ${index + 1}/${customerTargets.length}`); const target = customerTargets[index]; console.log(`[客户详情] ${index + 1}/${customerTargets.length} accountId=${target.accountId} page=${target.pageNum}`); const pauseMs = randomIntBetween(1000, 3000); console.log(`[客户详情] 随机等待 ${pauseMs}ms 后继续`); await sleep(pauseMs); if (target.pageNum > 0 && currentListPage !== target.pageNum) { const reached = await jumpToCustomerPage(page, target.pageNum); if (!reached) { console.warn(`[客户详情] 无法跳到第 ${target.pageNum} 页,跳过 ${target.accountId}`); continue; } currentListPage = target.pageNum; } const nextGroupKey = `${target.pageNum}`; if (currentGroupKey !== nextGroupKey) { currentGroupKey = nextGroupKey; } const clicked = await clickCustomerDetailFromListWithRetry(page, target); if (!clicked) { console.warn(`[客户详情] 列表中未找到 accountId=${target.accountId},跳过`); continue; } try { await page.waitForFunction( (text) => document.body && document.body.innerText.includes(text), '详情', { timeout: 15000 }, ); await sleep(1000); } catch { console.warn(`[客户详情] ${target.accountId} 详情页加载超时,跳过`); await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null); await waitForStableCustomerList(page).catch(() => null); continue; } const detail = await extractCustomerDetail(page); allDetails.push({ ...detail, __context: { accountId: target.accountId } }); await saveCustomerDetailsCheckpoint(dataset, index + 1, allDetails); if (hasDbConfig()) { const normalizedDetail = normalizeDatasetRecords(dataset, [{ ...detail, __context: { accountId: target.accountId } }], {}); await upsertCustomerDetails(normalizedDetail); } await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null); await waitForStableCustomerList(page).catch(() => null); currentListPage = target.pageNum; } return persistDataset(dataset, dedupeByHash(allDetails), {}); } async function syncOrders(page, options = {}) { await runtimeCheckpoint('同步订单'); const dataset = datasets.orders; const { incremental = false, resume = false, hot = false } = options; let windows; if (hot) { windows = buildTodayOrderWindow(); } else if (!incremental) { windows = buildMonthlyDateWindows(config.orderStartDate); } else { windows = await buildIncrementalOrderWindows(); } const resumeCheckpoint = resume ? loadLatestOrdersCheckpoint() : null; if (resumeCheckpoint?.windowStart) { const resumeIndex = windows.findIndex((window) => window.start === resumeCheckpoint.windowStart && window.end === resumeCheckpoint.windowEnd); if (resumeIndex >= 0) { windows = windows.slice(resumeIndex); console.log(`[订单续爬] 从 checkpoint 恢复: ${resumeCheckpoint.windowStart} ~ ${resumeCheckpoint.windowEnd}, page=${resumeCheckpoint.pageNum || 1}, records=${(resumeCheckpoint.records || []).length}`); } } const previousState = loadCurrentState(dataset.name, dataset.uniqueKey); const previousRecords = previousState.records || []; const previousOrderMap = new Map(previousRecords.map((record) => [String(record.orderId || '').trim(), record])); const allNormalizedRecords = []; const hotStats = { pagesScanned: 0, stableRows: 0, newRows: 0, changedRows: 0, stoppedEarly: false, stopReason: '', }; for (const window of windows) { await runtimeCheckpoint(`订单窗口 ${window.start} ~ ${window.end}`); await page.goto(dataset.url, { waitUntil: 'domcontentloaded' }); await waitUntilReady(page, dataset.heading); await setDateRange(page, window.start, window.end); await clickQuery(page); await trySetPageSize(page, dataset.pageSize); let windowNormalizedRecords = []; let resumeFromPage = 0; let shouldContinueScrape = true; if (resumeCheckpoint?.windowStart === window.start && resumeCheckpoint?.windowEnd === window.end) { windowNormalizedRecords = Array.isArray(resumeCheckpoint.records) ? resumeCheckpoint.records : []; resumeFromPage = Number.parseInt(String(resumeCheckpoint.pageNum || 0), 10) || 0; if (resumeFromPage > 0) { const moved = await moveOrdersToResumeStart(page, resumeFromPage); if (!moved) { console.log(`[订单续爬] checkpoint 已在最后一页,无需继续抓取 window=${window.start}~${window.end}`); shouldContinueScrape = false; } } } let records = []; let stableRowsInARow = 0; let stablePagesInARow = 0; if (shouldContinueScrape) { records = await scrapePagedTable(page, dataset, window, { onPage: async ({ pageNum, pageRows }) => { const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, window); windowNormalizedRecords.push(...normalizedPageRows); if (hasDbConfig()) { await upsertOrders(normalizedPageRows); } await saveOrdersCheckpoint(dataset, window, pageNum, windowNormalizedRecords); if (hot) { hotStats.pagesScanned += 1; const pageSummary = summarizeHotPage(previousOrderMap, normalizedPageRows); hotStats.stableRows += pageSummary.stableCount; hotStats.newRows += pageSummary.newCount; hotStats.changedRows += pageSummary.changedCount; if (pageSummary.changedCount === 0 && pageSummary.newCount === 0) { stablePagesInARow += 1; } else { stablePagesInARow = 0; } if (pageSummary.stableCount === normalizedPageRows.length && normalizedPageRows.length > 0) { stableRowsInARow += normalizedPageRows.length; } else { stableRowsInARow = 0; } } }, shouldStop: hot ? async ({ pageNum }) => { if (pageNum >= config.hotOrderMaxPagesPerRun) { hotStats.stoppedEarly = true; hotStats.stopReason = `max_pages:${config.hotOrderMaxPagesPerRun}`; return true; } if (stableRowsInARow >= config.hotOrderStableThreshold) { hotStats.stoppedEarly = true; hotStats.stopReason = `stable_rows:${stableRowsInARow}`; return true; } if (stablePagesInARow >= config.hotOrderStablePageThreshold) { hotStats.stoppedEarly = true; hotStats.stopReason = `stable_pages:${stablePagesInARow}`; return true; } return false; } : undefined, }); } if (resumeFromPage === 0) { windowNormalizedRecords = normalizeDatasetRecords(dataset, records, window); if (hasDbConfig()) { await upsertOrders(dedupeByHash(windowNormalizedRecords)); } } allNormalizedRecords.push(...windowNormalizedRecords); } const normalizedRecords = dedupeByHash(allNormalizedRecords); const changedOrderIds = computeChangedOrderIds(previousRecords, normalizedRecords); const persisted = persistNormalizedDataset(dataset, normalizedRecords); return { ...persisted, changedOrderIds, hot: hot ? hotStats : undefined, }; } async function buildIncrementalOrderWindows() { const configuredStartDate = normalizeConfiguredDate(config.incrementalOrderStartDate); if (configuredStartDate) { const windows = buildMonthlyDateWindows(configuredStartDate); console.log(`[增量模式] 订单从指定日期开始查询: ${configuredStartDate}`); return windows; } if (!hasDbConfig()) { const yesterday = new Date(); yesterday.setDate(yesterday.getDate() - 1); const dateStr = formatDate(yesterday); console.log(`[增量模式] 未配置数据库,订单仅查询: ${dateStr}`); return buildSingleDateWindow(dateStr, dateStr); } const latestOrderTime = await getLatestOrderTimeFromDb(); const runAt = new Date(); const parsed = parseDbDateTime(latestOrderTime); if (!parsed) { const dateStr = formatDate(runAt); console.log(`[增量模式] 数据库无订单水位,订单仅查询当天: ${dateStr}`); return buildSingleDateWindow(dateStr, dateStr); } const startDate = formatDate(subtractDays(parsed, config.orderIncrementalOverlapDays)); const endDate = formatDate(runAt); console.log(`[增量模式] 订单窗口: ${startDate} ~ ${endDate} (db_last=${latestOrderTime}, overlap=${config.orderIncrementalOverlapDays}d)`); return buildSingleDateWindow(startDate, endDate); } function normalizeConfiguredDate(value) { const normalized = String(value || '').trim(); if (!normalized) { return ''; } if (!/^\d{4}-\d{2}-\d{2}$/.test(normalized)) { throw new Error(`ALIYUN_APS_INCREMENTAL_ORDER_START_DATE 格式无效: ${normalized},期望 YYYY-MM-DD`); } const parsed = new Date(`${normalized}T00:00:00+08:00`); if (Number.isNaN(parsed.getTime())) { throw new Error(`ALIYUN_APS_INCREMENTAL_ORDER_START_DATE 不是有效日期: ${normalized}`); } return normalized; } async function syncBills(page, options = {}) { await runtimeCheckpoint('同步账单'); const dataset = datasets.bills; const { resume = false, incremental = false } = options; let months; let latestConsumptionDate = null; if (!incremental) { months = buildMonthList(config.billStartMonth); } else { latestConsumptionDate = await getLatestBillConsumptionDate(); const startDate = latestConsumptionDate ? latestConsumptionDate.slice(0, 10) : formatDate(new Date()); const endDate = formatDate(new Date()); const startMonth = startDate.slice(0, 7); const endMonth = endDate.slice(0, 7); months = buildMonthList(startMonth).filter((month) => month <= endMonth); console.log(`[增量模式] 账单窗口: ${startDate} ~ ${endDate}${latestConsumptionDate ? `, 数据库最新消费时间: ${latestConsumptionDate}` : ''}`); } const resumeCheckpoint = resume ? loadLatestBillsCheckpoint() : null; if (resumeCheckpoint?.month) { const resumeIndex = months.indexOf(resumeCheckpoint.month); if (resumeIndex >= 0) { months = months.slice(resumeIndex); console.log(`[账单续爬] 从 checkpoint 恢复: month=${resumeCheckpoint.month}, page=${resumeCheckpoint.pageNum || 1}, records=${(resumeCheckpoint.records || []).length}`); } } const allNormalizedRecords = []; for (const month of months) { await runtimeCheckpoint(`账单月份 ${month}`); await page.goto(dataset.url, { waitUntil: 'domcontentloaded' }); await waitUntilReady(page, dataset.heading); await setMonthValue(page, month); await clickQuery(page); await trySetPageSize(page, dataset.pageSize); let monthNormalizedRecords = []; let resumeFromPage = 0; let shouldContinueScrape = true; if (resumeCheckpoint?.month === month) { monthNormalizedRecords = Array.isArray(resumeCheckpoint.records) ? resumeCheckpoint.records : []; resumeFromPage = Number.parseInt(String(resumeCheckpoint.pageNum || 0), 10) || 0; if (resumeFromPage > 0) { const moved = await moveBillsToResumeStart(page, resumeFromPage); if (!moved) { console.log(`[账单续爬] checkpoint 已在最后一页,无需继续抓取 month=${month}`); shouldContinueScrape = false; } } } let rawRecords = []; if (shouldContinueScrape) { rawRecords = await scrapePagedTable(page, dataset, { month }, { onPage: async ({ pageNum, pageRows }) => { const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, { month }); monthNormalizedRecords.push(...normalizedPageRows); if (hasDbConfig()) { await upsertBills(normalizedPageRows); } let checkpointRecords = monthNormalizedRecords; if (latestConsumptionDate) { checkpointRecords = monthNormalizedRecords.filter((record) => isAfterLatestConsumptionDate(record, latestConsumptionDate)); } await saveBillsCheckpoint(dataset, month, pageNum, checkpointRecords); }, }); } if (resumeFromPage === 0) { monthNormalizedRecords = normalizeDatasetRecords(dataset, rawRecords, { month }); } if (latestConsumptionDate) { const before = monthNormalizedRecords.length; monthNormalizedRecords = monthNormalizedRecords.filter((record) => isAfterLatestConsumptionDate(record, latestConsumptionDate)); console.log(`[增量模式] 账单按消费时间过滤: ${before} -> ${monthNormalizedRecords.length}`); } allNormalizedRecords.push(...monthNormalizedRecords); } return persistNormalizedDataset(dataset, dedupeByHash(allNormalizedRecords)); } async function syncMessages(page, options = {}) { await runtimeCheckpoint('同步消息'); const dataset = datasets.messages; const { incremental = false, resume = false, hot = false } = options; const fullSyncWatermark = !incremental && !hot ? subtractMonths(new Date(), 3) : null; await page.goto(dataset.url, { waitUntil: 'domcontentloaded' }); await waitUntilReady(page, dataset.heading); await trySetPageSize(page, dataset.pageSize); const resumeCheckpoint = resume ? loadLatestMessagesCheckpoint() : null; let resumeFromPage = Number.parseInt(String(resumeCheckpoint?.pageNum || 0), 10) || 0; let shouldContinueScrape = true; let allNormalizedRecords = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : []; const shouldStopForFullSyncPage = (pageRows) => { if (!fullSyncWatermark) { return false; } const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, {}); const pageTimeStats = getMessagePageTimeStats(normalizedPageRows); console.log(`[全量模式] 当前页时间范围: parsed=${pageTimeStats.parsed}/${pageTimeStats.total}, earliest=${pageTimeStats.earliest || 'N/A'}, latest=${pageTimeStats.latest || 'N/A'}, watermark=${formatDateTime(fullSyncWatermark)}`); return normalizedPageRows.length > 0 && normalizedPageRows.every((record) => !isAfterLatestMessageTime(record, fullSyncWatermark)); }; if (resumeFromPage > 0) { console.log(`[消息续爬] 从 checkpoint 恢复: page=${resumeFromPage}, records=${allNormalizedRecords.length}`); const moved = await moveMessagesToResumeStart(page, resumeFromPage); if (!moved) { console.log('[消息续爬] checkpoint 已在最后一页,无需继续抓取'); shouldContinueScrape = false; } else if (fullSyncWatermark) { await waitForTableRows(page); const resumedPageData = await extractTable(page); if (shouldStopForFullSyncPage(resumedPageData.rows)) { console.log(`[全量模式] 当前续爬页已超出近三个月范围,停止继续抓取: page=${resumeFromPage + 1}, watermark=${formatDateTime(fullSyncWatermark)}`); shouldContinueScrape = false; } } } let records = []; let hotWatermark = null; let stopByExistingPage = false; if (hot && hasDbConfig()) { const latestMessageTime = await getLatestMessageTimeFromDb(); const latest = parseDbDateTime(latestMessageTime); if (latest) { hotWatermark = addMinutes(latest, -config.hotMessageOverlapMinutes); } } if (shouldContinueScrape) { records = await scrapePagedTable(page, dataset, {}, { onPage: async ({ pageNum, pageRows }) => { const detailedPageRows = await enrichMessageRowsWithDetails(page, pageRows, pageNum); const normalizedPageRows = normalizeDatasetRecords(dataset, detailedPageRows, {}); const filteredPageRows = fullSyncWatermark ? normalizedPageRows.filter((record) => isAfterLatestMessageTime(record, fullSyncWatermark)) : normalizedPageRows; let pageRowsToPersist = filteredPageRows; if (hasDbConfig() && filteredPageRows.length > 0) { const pageMsgIds = filteredPageRows.map((record) => record.msgId).filter(Boolean); const existingIds = await getExistingMessageIds(pageMsgIds); const fingerprintCandidates = filteredPageRows .map((record) => String(record.receivedAt || record.gmtModified || record.gmtCreated || '').trim()) .filter(Boolean); const existingFingerprintRows = await getExistingMessageFingerprints(fingerprintCandidates); const existingFingerprints = new Set( existingFingerprintRows.map((row) => buildMessageFingerprint({ title: row.title, receivedAt: row.received_at, orderNo: row.order_no })), ); stopByExistingPage = filteredPageRows.length > 0 && filteredPageRows.every((record) => { if (record.msgId) { return existingIds.has(record.msgId); } return existingFingerprints.has(buildMessageFingerprint(record)); }); pageRowsToPersist = filteredPageRows.filter((record) => { if (record.msgId) { return !existingIds.has(record.msgId); } return !existingFingerprints.has(buildMessageFingerprint(record)); }); if (pageRowsToPersist.length > 0) { await upsertMessages(pageRowsToPersist); } } allNormalizedRecords.push(...pageRowsToPersist); await saveMessagesCheckpoint(dataset, pageNum, allNormalizedRecords); }, skipInitialPage: resumeFromPage > 0, shouldStop: hot ? async ({ pageNum, pageRows }) => { if (pageNum >= config.hotMessageMaxPagesPerRun) { return true; } if (stopByExistingPage) { return true; } if (!hotWatermark) { return false; } const detailedPageRows = await enrichMessageRowsWithDetails(page, pageRows, pageNum); const normalizedPageRows = normalizeDatasetRecords(dataset, detailedPageRows, {}); return normalizedPageRows.length > 0 && normalizedPageRows.every((record) => !isAfterLatestMessageTime(record, hotWatermark)); } : fullSyncWatermark ? async ({ pageNum, pageRows }) => { const detailedPageRows = await enrichMessageRowsWithDetails(page, pageRows, pageNum); return stopByExistingPage || shouldStopForFullSyncPage(detailedPageRows); } : undefined, }); } if (resumeFromPage === 0) { allNormalizedRecords = normalizeDatasetRecords(dataset, records, {}); } if (fullSyncWatermark) { const before = allNormalizedRecords.length; allNormalizedRecords = allNormalizedRecords.filter((record) => isAfterLatestMessageTime(record, fullSyncWatermark)); console.log(`[全量模式] 消息仅保留近三个月: ${before} -> ${allNormalizedRecords.length} (watermark=${formatDateTime(fullSyncWatermark)})`); } if ((incremental || hot) && hasDbConfig()) { try { const latestMessageTime = await getLatestMessageTimeFromDb(); if (latestMessageTime) { const latest = parseDbDateTime(latestMessageTime); if (latest) { const watermark = hot ? addMinutes(latest, -config.hotMessageOverlapMinutes) : subtractDays(latest, config.messageIncrementalOverlapDays); const before = allNormalizedRecords.length; allNormalizedRecords = allNormalizedRecords.filter((record) => isAfterLatestMessageTime(record, watermark)); console.log(`[${hot ? '高频模式' : '增量模式'}] 消息按时间过滤: ${before} -> ${allNormalizedRecords.length} (db_last=${latestMessageTime}, overlap=${hot ? `${config.hotMessageOverlapMinutes}m` : `${config.messageIncrementalOverlapDays}d`})`); } } } catch (error) { console.error(`[${hot ? '高频模式' : '增量模式'}] 查询数据库最新消息时间失败:`, error.message); } } const previousState = loadCurrentState(dataset.name, dataset.uniqueKey); return persistNormalizedDataset(dataset, dedupeByHash([...(previousState.records || []), ...allNormalizedRecords])); } async function saveMessagesCheckpoint(dataset, pageNum, normalizedRecords) { const normalized = dedupeByHash(normalizedRecords); saveCheckpoint(dataset.name, `page-${pageNum}`, { pageNum, savedAt: new Date().toISOString(), stats: { total: normalized.length }, records: normalized, }); console.log(`[消息检查点] 已落盘: page=${pageNum}, records=${normalized.length}`); } function loadLatestMessagesCheckpoint() { const checkpointDir = path.join(config.dataDir, 'checkpoints', 'messages'); if (!fs.existsSync(checkpointDir)) { return null; } const candidates = fs.readdirSync(checkpointDir) .filter((fileName) => fileName.endsWith('.json')) .map((fileName) => { const filePath = path.join(checkpointDir, fileName); const stat = fs.statSync(filePath); return { fileName, filePath, mtimeMs: stat.mtimeMs }; }) .sort((a, b) => b.mtimeMs - a.mtimeMs); if (candidates.length === 0) { return null; } try { const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8')); if (!latest || typeof latest !== 'object') { return null; } return latest; } catch (error) { console.warn(`[消息检查点] 读取失败,忽略断点续爬: ${error.message}`); return null; } } async function moveMessagesToResumeStart(page, resumeFromPage) { if (resumeFromPage <= 0) { return true; } const reached = await jumpToPage(page, resumeFromPage); if (!reached) { throw new Error(`消息续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`); } const moved = await gotoNextPage(page); return moved; } async function saveBillsCheckpoint(dataset, month, pageNum, normalizedRecords) { const normalized = dedupeByHash(normalizedRecords); const checkpointName = `${month}-latest`; saveCheckpoint(dataset.name, checkpointName, { month, pageNum, savedAt: new Date().toISOString(), stats: { total: normalized.length, }, records: normalized, }); console.log(`[账单检查点] 已落盘: month=${month}, page=${pageNum}, records=${normalized.length}`); } async function saveOrdersCheckpoint(dataset, window, pageNum, normalizedRecords) { const normalized = dedupeByHash(normalizedRecords); const checkpointName = `${window.start}_${window.end}`.replace(/[^0-9_-]/g, '-'); saveCheckpoint(dataset.name, checkpointName, { windowStart: window.start, windowEnd: window.end, pageNum, savedAt: new Date().toISOString(), stats: { total: normalized.length, }, records: normalized, }); console.log(`[订单检查点] 已落盘: ${window.start} ~ ${window.end}, page=${pageNum}, records=${normalized.length}`); } async function saveCustomersCheckpoint(dataset, pageNum, normalizedRecords) { const normalized = dedupeByHash(normalizedRecords); saveCheckpoint(dataset.name, `page-${pageNum}`, { pageNum, savedAt: new Date().toISOString(), stats: { total: normalized.length }, records: normalized, }); console.log(`[客户检查点] 已落盘: page=${pageNum}, records=${normalized.length}`); } function loadLatestCustomersCheckpoint() { const checkpointDir = path.join(config.dataDir, 'checkpoints', 'customers'); if (!fs.existsSync(checkpointDir)) { return null; } const candidates = fs.readdirSync(checkpointDir) .filter((fileName) => fileName.endsWith('.json')) .map((fileName) => { const filePath = path.join(checkpointDir, fileName); const stat = fs.statSync(filePath); return { fileName, filePath, mtimeMs: stat.mtimeMs }; }) .sort((a, b) => b.mtimeMs - a.mtimeMs); if (candidates.length === 0) { return null; } try { const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8')); if (!latest || typeof latest !== 'object') { return null; } return latest; } catch (error) { console.warn(`[客户检查点] 读取失败,忽略断点续爬: ${error.message}`); return null; } } async function saveCustomerDetailsCheckpoint(dataset, currentIndex, records) { const normalized = dedupeByHash(records); saveCheckpoint(dataset.name, 'latest', { currentIndex, savedAt: new Date().toISOString(), stats: { total: normalized.length }, records: normalized, }); console.log(`[客户详情检查点] 已落盘: index=${currentIndex}, records=${normalized.length}`); } function loadLatestCustomerDetailsCheckpoint() { const checkpointDir = path.join(config.dataDir, 'checkpoints', 'customerDetails'); if (!fs.existsSync(checkpointDir)) { return null; } const latestFile = path.join(checkpointDir, 'latest.json'); if (!fs.existsSync(latestFile)) { const candidates = fs.readdirSync(checkpointDir).filter((fileName) => fileName.endsWith('.json')); if (candidates.length === 0) { return null; } return JSON.parse(fs.readFileSync(path.join(checkpointDir, candidates[0]), 'utf-8')); } try { return JSON.parse(fs.readFileSync(latestFile, 'utf-8')); } catch (error) { console.warn(`[客户详情检查点] 读取失败: ${error.message}`); return null; } } async function saveOrderDetailsCheckpoint(dataset, currentIndex, records) { const normalized = dedupeByHash(records); saveCheckpoint(dataset.name, 'latest', { currentIndex, savedAt: new Date().toISOString(), stats: { total: normalized.length }, records: normalized, }); console.log(`[订单详情检查点] 已落盘: index=${currentIndex}, records=${normalized.length}`); } function loadLatestOrderDetailsCheckpoint() { const checkpointDir = path.join(config.dataDir, 'checkpoints', 'orderDetails'); if (!fs.existsSync(checkpointDir)) { return null; } const latestFile = path.join(checkpointDir, 'latest.json'); if (!fs.existsSync(latestFile)) { const candidates = fs.readdirSync(checkpointDir).filter((fileName) => fileName.endsWith('.json')); if (candidates.length === 0) { return null; } return JSON.parse(fs.readFileSync(path.join(checkpointDir, candidates[0]), 'utf-8')); } try { return JSON.parse(fs.readFileSync(latestFile, 'utf-8')); } catch (error) { console.warn(`[订单详情检查点] 读取失败: ${error.message}`); return null; } } function normalizeDatasetRecords(dataset, records, context) { return records.map((record) => withHash(dataset.normalize(record, record.__context || context))); } async function moveBillsToResumeStart(page, resumeFromPage) { if (resumeFromPage <= 0) { return true; } const reached = await jumpToPage(page, resumeFromPage); if (!reached) { throw new Error(`账单续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`); } const moved = await gotoNextPage(page); return moved; } async function moveOrdersToResumeStart(page, resumeFromPage) { if (resumeFromPage <= 0) { return true; } const reached = await jumpToPage(page, resumeFromPage); if (!reached) { throw new Error(`订单续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`); } const moved = await gotoNextPage(page); return moved; } async function moveCustomersToResumeStart(page, resumeFromPage) { if (resumeFromPage <= 0) { return true; } const reached = await jumpToPage(page, resumeFromPage); if (!reached) { throw new Error(`客户续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`); } const moved = await gotoNextPage(page); return moved; } async function getLatestBillConsumptionDate() { if (!hasDbConfig()) { console.warn('[增量模式] 未配置数据库连接,无法读取账单水位,回退到当前日期'); return null; } try { const latest = await getLatestBillConsumptionTimeFromDb(); if (!latest || !/^\d{4}-\d{2}-\d{2}/.test(latest)) { return null; } const parsed = parseDbDateTime(latest); if (!parsed) { return latest.slice(0, 10); } return formatDate(subtractDays(parsed, config.billIncrementalOverlapDays)); } catch (error) { console.error('[增量模式] 查询数据库最新账单消费时间失败:', error.message); return null; } } function isAfterLatestConsumptionDate(record, latestConsumptionDate) { const consumeDate = String(record['消费时间'] || record.consumeDate || '').trim().slice(0, 10); if (!/^\d{4}-\d{2}-\d{2}$/.test(consumeDate)) { return false; } return consumeDate > latestConsumptionDate; } function isAfterLatestMessageTime(record, watermarkDate) { const value = String(record['消息修改时间'] || record['修改时间'] || record.gmtModified || record['消息创建时间'] || record['创建时间'] || record.gmtCreated || '').trim(); if (!value) { return false; } const parsed = parseDbDateTime(value); if (!parsed) { return false; } return parsed >= watermarkDate; } function extractMessageTime(record) { const value = String(record['消息修改时间'] || record['修改时间'] || record.gmtModified || record['消息创建时间'] || record['创建时间'] || record.gmtCreated || '').trim(); if (!value) { return null; } return parseDbDateTime(value); } function getMessagePageTimeStats(records) { const parsedTimes = records .map((record) => extractMessageTime(record)) .filter(Boolean) .sort((a, b) => a.getTime() - b.getTime()); if (parsedTimes.length === 0) { return { total: records.length, parsed: 0, earliest: '', latest: '', }; } return { total: records.length, parsed: parsedTimes.length, earliest: formatDateTime(parsedTimes[0]), latest: formatDateTime(parsedTimes[parsedTimes.length - 1]), }; } function buildMessageFingerprint(record) { const title = String(record.title || record.detailTitle || record.column_1 || '').trim(); const receivedAt = String(record.receivedAt || record.gmtModified || record.gmtCreated || record.column_2 || '').trim(); const orderNo = String(record.orderNo || record.refundOrderNo || '').trim(); return `${title}__${receivedAt}__${orderNo}`; } async function syncOrderDetails(page, cachedOrderIds, options = {}) { await runtimeCheckpoint('同步订单详情'); const dataset = datasets.orderDetails; const resumeCheckpoint = options.resume ? loadLatestOrderDetailsCheckpoint() : null; const ordersState = loadCurrentState('orders', datasets.orders.uniqueKey); const orderTargets = collectOrderDetailTargets(ordersState.records || [], cachedOrderIds || []); if (orderTargets.length === 0) { console.log('[订单详情] 本地无订单定位数据,跳过'); return persistDataset(dataset, [], {}); } console.log(`[订单详情] 共 ${orderTargets.length} 个订单需要获取详情`); const allDetails = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : []; const startIndex = Number.parseInt(String(resumeCheckpoint?.currentIndex || 0), 10) || 0; if (startIndex > 0) { console.log(`[订单详情续爬] 从 checkpoint 恢复: index=${startIndex}, records=${allDetails.length}`); } let currentListPage = 0; let currentWindowKey = ''; let currentGroupKey = ''; await page.goto(datasets.orders.url, { waitUntil: 'domcontentloaded' }); await waitUntilReady(page, datasets.orders.heading); await trySetPageSize(page, datasets.orders.pageSize); for (let index = startIndex; index < orderTargets.length; index += 1) { await runtimeCheckpoint(`订单详情 ${index + 1}/${orderTargets.length}`); const target = orderTargets[index]; console.log(`[订单详情] ${index + 1}/${orderTargets.length} orderId=${target.orderId} page=${target.pageNum} window=${target.windowStart}~${target.windowEnd}`); const nextWindowKey = `${target.windowStart}|${target.windowEnd}`; if (target.windowStart && target.windowEnd && currentWindowKey !== nextWindowKey) { await restoreOrderWindow(page, target.windowStart, target.windowEnd); currentWindowKey = nextWindowKey; currentListPage = 1; } const nextGroupKey = `${nextWindowKey}|${target.pageNum}`; if (target.pageNum > 0 && (currentListPage !== target.pageNum || currentGroupKey !== nextGroupKey)) { const reached = await jumpToOrderPage(page, target.pageNum); if (!reached) { console.warn(`[订单详情] 无法跳到第 ${target.pageNum} 页,跳过 ${target.orderId}`); continue; } currentListPage = target.pageNum; } currentGroupKey = nextGroupKey; const clicked = await clickOrderDetailFromListWithRetry(page, target); if (!clicked) { console.warn(`[订单详情] 列表中未找到 orderId=${target.orderId},跳过`); continue; } try { await page.waitForFunction( (text) => document.body && document.body.innerText.includes(text), '订单详情', { timeout: 15000 }, ); await sleep(1000); } catch { console.warn(`[订单详情] ${target.orderId} 详情页加载超时,跳过`); await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null); await waitForStableOrderList(page).catch(() => null); continue; } const detail = await extractOrderDetail(page); if (!isValidOrderId(detail.orderId)) { detail.orderId = target.orderId; } const detailContext = { detailSyncedAt: new Date().toISOString() }; allDetails.push({ ...detail, __context: detailContext }); await saveOrderDetailsCheckpoint(dataset, index + 1, allDetails); if (hasDbConfig()) { const normalizedDetail = normalizeDatasetRecords(dataset, [{ ...detail, __context: detailContext }], detailContext); await upsertOrderDetails(normalizedDetail); } await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null); await waitForStableOrderList(page).catch(() => null); currentListPage = target.pageNum; } return persistDataset(dataset, dedupeByHash(allDetails), {}); } function persistDataset(dataset, records, context) { const normalized = normalizeDatasetRecords(dataset, records, context); return persistNormalizedDataset(dataset, normalized); } function persistNormalizedDataset(dataset, normalizedRecords) { const previousState = loadCurrentState(dataset.name, dataset.uniqueKey); const nextState = diffRecords(previousState, normalizedRecords, dataset.uniqueKey); const stamp = saveDatasetRun(dataset.name, nextState); saveDelta(dataset.name, stamp, nextState.delta); return { stamp, stats: nextState.stats, }; } async function waitUntilReady(page, heading, timeout = 120000, options = {}) { await runtimeCheckpoint(`等待页面 ${heading}`); const { allowInteractiveAuth = false } = options; await page.waitForLoadState('domcontentloaded'); console.log(`[waitUntilReady] 当前URL: ${page.url()}`); console.log(`[waitUntilReady] 等待页面出现: "${heading}"`); const initialState = await detectAuthRedirect(page); if (initialState.isAuthPage) { console.error(`[waitUntilReady] 检测到登录页/鉴权页: ${initialState.currentUrl}`); console.error(`[waitUntilReady] 页面内容前500字: ${initialState.bodyText}`); if (!allowInteractiveAuth && isAuthUrl(initialState.currentUrl)) { try { await sendLoginAlert(initialState.currentUrl); } catch (notifyErr) { console.error('[通知] 发送登录提醒失败:', notifyErr.message); } } if (!allowInteractiveAuth) { throw new Error(`当前页面仍处于登录/鉴权页,无法进入「${heading}」。请重新执行 npm run login,并确认该账号对该页面有访问权限。`); } console.log(`[waitUntilReady] 允许交互式登录,等待用户完成认证后进入「${heading}」...`); } try { await page.waitForFunction( (text) => document.body && document.body.innerText.includes(text), heading, { timeout }, ); } catch (err) { // 超时时打印诊断信息 const { currentUrl, bodyText, isAuthPage } = await detectAuthRedirect(page); console.error(`[waitUntilReady] 超时!当前URL: ${currentUrl}`); console.error(`[waitUntilReady] 页面内容前500字: ${bodyText}`); if (isAuthPage && !allowInteractiveAuth) { try { await sendLoginAlert(currentUrl); } catch (notifyErr) { console.error('[通知] 发送登录提醒失败:', notifyErr.message); } throw new Error(`当前页面停留在登录/鉴权页,未能进入「${heading}」。请重新执行 npm run login,并确认该账号对该页面有访问权限。`); } if (isAuthPage && allowInteractiveAuth) { throw new Error(`交互式登录超时,仍未进入「${heading}」。请确认已在浏览器中完成 RAM/阿里云登录,并且当前账号有访问该页面的权限。`); } throw err; } const finalState = await detectAuthRedirect(page); if (finalState.isAuthPage && !allowInteractiveAuth) { throw new Error(`当前页面仍处于登录/鉴权页,未成功进入「${heading}」。请重新执行 npm run login,并确认该账号对该页面有访问权限。`); } await sleep(1500); } async function scrapePagedTable(page, dataset, context, options = {}) { const { onPage, skipInitialPage = false, shouldStop } = options; const pages = []; const visited = new Set(); let shouldSkipCurrentPage = skipInitialPage; const describeStopReason = (reason) => { if (!reason) { return 'unknown'; } const details = []; if (reason.beforePage != null) { details.push(`before=${reason.beforePage}`); } if (reason.afterPage != null) { details.push(`after=${reason.afterPage}`); } return details.length > 0 ? `${reason.code} (${details.join(', ')})` : reason.code; }; while (true) { await runtimeCheckpoint(`抓取 ${dataset.name} 分页`); await waitForTableRows(page); const pageData = await extractTable(page); const pageNum = await currentPageNumber(page); const pageKey = `${pageNum}-${pageData.rows.length}`; console.log(`[抓取] 第${pageNum}页, ${pageData.rows.length}行, key="${pageKey}"`); if (shouldSkipCurrentPage) { console.log(`[抓取] 跳过 checkpoint 已保存页: ${pageNum}`); shouldSkipCurrentPage = false; const { moved, reason } = await gotoNextPage(page); if (!moved) { console.log(`[抓取] checkpoint 已停止续爬: ${describeStopReason(reason)}`); break; } continue; } if (visited.has(pageKey)) { console.log(`[抓取] 重复页面key,停止翻页`); break; } visited.add(pageKey); const pageRows = pageData.rows.map((row) => ({ ...row, __context: { ...context, pageNum } })); pages.push(...pageRows); if (onPage) { await onPage({ pageData, pageNum, pageRows }); } if (shouldStop && await shouldStop({ pageData, pageNum, pageRows, pages })) { console.log(`[抓取] 满足停止条件,在第${pageNum}页提前停止`); break; } const { moved, reason } = await gotoNextPage(page); if (!moved) { console.log(`[抓取] 停止翻页: ${describeStopReason(reason)}`); break; } } console.log(`[抓取] 共采集 ${pages.length} 条记录`); return pages; } async function raiseIfSessionExpired(page, label) { const { currentUrl, bodyText, isAuthPage } = await detectAuthRedirect(page); if (!isAuthPage) { return; } console.error(`[鉴权] ${label} 时检测到登录页/鉴权页: ${currentUrl}`); console.error(`[鉴权] 页面内容前500字: ${bodyText}`); try { await sendLoginAlert(currentUrl); } catch (notifyErr) { console.error('[通知] 发送登录提醒失败:', notifyErr.message); } throw new Error(`运行过程中登录态失效(${label})。请重新执行 npm run login 后再继续同步。`); } async function extractTable(page) { return page.evaluate(() => { const normalize = (value) => String(value || '') .replace(/\u00a0/g, ' ') .replace(/\s+\n/g, '\n') .replace(/\n\s+/g, '\n') .trim(); const toRecords = (headers, rows) => rows .map((cells) => cells.map((cell) => normalize(cell))) .filter((cells) => cells.some(Boolean)) .map((cells) => { const record = {}; const keys = headers.length ? headers : cells.map((_, index) => `column_${index + 1}`); keys.forEach((header, index) => { record[header || `column_${index + 1}`] = cells[index] || ''; }); return record; }); const extractFromNativeTables = () => { const headerTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('thead th').length > 1); const headerTable = headerTables.sort((a, b) => b.querySelectorAll('thead th').length - a.querySelectorAll('thead th').length)[0]; const headers = headerTable ? Array.from(headerTable.querySelectorAll('thead th')).map((cell) => normalize(cell.textContent)) : []; const bodyTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('tbody tr').length > 0); const bodyTable = bodyTables.sort((a, b) => { const aSize = Math.max(...Array.from(a.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0); const bSize = Math.max(...Array.from(b.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0); return bSize - aSize; })[0]; if (!bodyTable) { return { headers, rows: [] }; } const rows = Array.from(bodyTable.querySelectorAll('tbody tr')) .map((row) => Array.from(row.querySelectorAll('td')).map((cell) => normalize(cell.innerText || cell.textContent))); return { headers, rows: toRecords(headers, rows) }; }; const extractFromNextTable = () => { const container = document.querySelector('.next-table, .next-table-inner, [class*="next-table"]'); if (!container) { return { headers: [], rows: [] }; } const headers = Array.from(container.querySelectorAll('.next-table-header .next-table-cell, .next-table-header th, [role="columnheader"]')) .map((cell) => normalize(cell.innerText || cell.textContent)) .filter(Boolean); const rowCandidates = Array.from(container.querySelectorAll('.next-table-body .next-table-row, .next-table-row, [role="row"]')); const rows = rowCandidates .map((row) => { const cells = Array.from(row.querySelectorAll('.next-table-cell, [role="gridcell"], [role="cell"], td')) .map((cell) => normalize(cell.innerText || cell.textContent)); return cells; }) .filter((cells) => cells.length > 0 && cells.some(Boolean)); return { headers, rows: toRecords(headers, rows) }; }; const nativeResult = extractFromNativeTables(); if (nativeResult.rows.length > 0) { return nativeResult; } const nextTableResult = extractFromNextTable(); if (nextTableResult.rows.length > 0) { return nextTableResult; } return nextTableResult.headers.length > 0 ? nextTableResult : nativeResult; }); } function isTargetClosedError(error) { const message = String(error?.message || error || ''); return message.includes('Target page, context or browser has been closed'); } function assertPageAvailable(page, label) { if (!page || page.isClosed?.()) { throw new Error(`页面在${label}前已被关闭。请检查是否手动关闭了浏览器,或浏览器是否异常退出,然后重新执行同步。`); } } async function waitForTableRows(page) { await runtimeCheckpoint('等待表格加载'); assertPageAvailable(page, '等待表格加载'); try { await page.waitForFunction(() => { const nativeRows = document.querySelectorAll('table tbody tr').length; const nextRows = document.querySelectorAll('.next-table-body .next-table-row, .next-table-row, [role="row"]').length; const emptyState = document.querySelector('.next-table-empty, .next-empty, [class*="empty"], [class*="no-data"]'); return nativeRows > 0 || nextRows > 0 || Boolean(emptyState); }, null, { timeout: 120000 }); } catch (error) { if (isTargetClosedError(error)) { throw new Error('等待消息表格加载时,浏览器页面已被关闭。请勿手动关闭浏览器窗口,并检查浏览器是否异常退出后重试。'); } await raiseIfSessionExpired(page, '等待表格加载'); throw error; } await sleep(800); } async function currentPageNumber(page) { assertPageAvailable(page, '读取当前页码'); try { const active = page.locator('.next-pagination-item.next-current'); if ((await active.count()) === 0) return 1; return Number.parseInt((await active.first().innerText()).trim(), 10) || 1; } catch (error) { if (isTargetClosedError(error)) { throw new Error('读取分页页码时,浏览器页面已被关闭。请勿手动关闭浏览器窗口,并检查浏览器是否异常退出后重试。'); } throw error; } } async function jumpToPage(page, targetPage, options = {}) { const { allowSequentialFallback = true } = options; if (targetPage <= 1) { return true; } const current = await currentPageNumber(page); if (current === targetPage) { return true; } const jumpInputCandidates = [ '.next-pagination-jump-input input', 'input[aria-label*="页码"]', 'input[aria-label*="页"]', ]; for (const selector of jumpInputCandidates) { const input = page.locator(selector).first(); if ((await input.count()) === 0) { continue; } await input.click().catch(() => null); await sleep(100); await page.keyboard.press('Control+A').catch(() => null); await page.keyboard.type(String(targetPage), { delay: 20 }).catch(() => null); await page.keyboard.press('Enter').catch(() => null); await sleep(1500); const afterJump = await currentPageNumber(page); if (afterJump === targetPage) { console.log(`[跳页] 已跳转到第 ${targetPage} 页`); return true; } } if (!allowSequentialFallback) { console.warn(`[跳页] 未找到可用跳页输入框,且当前模式禁止顺序兜底: target=${targetPage}`); return false; } console.warn(`[跳页] 未找到可用跳页输入框,尝试顺序翻到第 ${targetPage} 页`); let guard = 0; while (guard < targetPage + 5) { const currentPage = await currentPageNumber(page); if (currentPage >= targetPage) { return currentPage === targetPage; } const { moved } = await gotoNextPage(page); if (!moved) { return false; } guard += 1; } return false; } async function gotoNextPage(page) { await runtimeCheckpoint('翻页'); assertPageAvailable(page, '翻页'); const before = await currentPageNumber(page); try { // 用 Playwright locator 定位"下一页"按钮 const nextBtn = page.locator('button.next-pagination-item.next-next'); if ((await nextBtn.count()) === 0) { return { moved: false, reason: { code: 'next_button_missing', beforePage: before }, }; } const disabled = (await nextBtn.getAttribute('disabled')) != null; if (disabled) { return { moved: false, reason: { code: 'next_button_disabled', beforePage: before }, }; } // 用 Playwright click(而非 DOM click),确保 React 事件正常触发 await nextBtn.click(); await sleep(2000); await raiseIfSessionExpired(page, `翻页 ${before} -> next`); const after = await currentPageNumber(page); console.log(`[翻页] ${before} -> ${after}`); if (before > 1 && after === 1) { throw new Error(`分页从第 ${before} 页异常回退到第 1 页,疑似登录态失效或页面会话已重置。请重新执行 npm run login 后再继续同步。`); } if (after < before) { throw new Error(`分页从第 ${before} 页异常回退到第 ${after} 页,疑似登录态失效或页面状态被重置。请重新执行 npm run login 后再继续同步。`); } if (before === after) { const fallbackTarget = before + 1; console.warn(`[翻页] next 点击后页码未推进,尝试跳页到 ${fallbackTarget}`); const jumped = await jumpToPage(page, fallbackTarget, { allowSequentialFallback: false }); if (jumped) { const afterJump = await currentPageNumber(page); console.log(`[翻页] fallback jump ${before} -> ${afterJump}`); return { moved: true, reason: { code: 'advanced_via_jump', beforePage: before, afterPage: afterJump }, }; } return { moved: false, reason: { code: 'page_number_not_advanced', beforePage: before, afterPage: after }, }; } return { moved: true, reason: { code: 'advanced', beforePage: before, afterPage: after }, }; } catch (error) { if (isTargetClosedError(error)) { throw new Error(`翻页到下一页时,浏览器页面在第 ${before} 页之后被关闭。请勿手动关闭浏览器窗口,并检查浏览器是否异常退出后重试。`); } throw error; } } async function trySetPageSize(page, pageSize) { await runtimeCheckpoint(`设置每页 ${pageSize}`); const input = page.locator('input[aria-label="请选择每页显示几条"]').first(); if ((await input.count()) === 0) return; await input.click().catch(() => null); await sleep(300); const option = page.locator(`text=${pageSize}`).last(); if ((await option.count()) === 0) { await page.keyboard.press('Escape').catch(() => null); return; } await option.click().catch(() => null); await sleep(1200); } async function setDateRange(page, start, end) { await runtimeCheckpoint(`设置订单日期 ${start} ~ ${end}`); console.log(`[订单日期] 设置: ${start} ~ ${end}`); await _fillDateRange(page, start, end); // 验证 const startActual = await page.locator('input[placeholder="起始日期"]').inputValue().catch(() => ''); const endActual = await page.locator('input[placeholder="结束日期"]').inputValue().catch(() => ''); // 如果结果不对,用反向顺序重试(先填开始再填结束) if (startActual !== start || endActual !== end) { console.log(`[订单日期] 首次结果不对: "${startActual}" ~ "${endActual}",反向重试`); await _fillDateRange(page, start, end, true); const s2 = await page.locator('input[placeholder="起始日期"]').inputValue().catch(() => ''); const e2 = await page.locator('input[placeholder="结束日期"]').inputValue().catch(() => ''); console.log(`[订单日期] 重试结果: "${s2}" ~ "${e2}"`); } else { console.log(`[订单日期] 结果: "${startActual}" ~ "${endActual}"`); } } async function _fillDateRange(page, start, end, startFirst = false) { await runtimeCheckpoint('填写订单日期'); const trigger = page.locator('input[placeholder="结束日期"]'); await trigger.click(); await sleep(1000); const panelStartInput = page.locator('.next-range-picker-panel-input-start-date input'); const panelEndInput = page.locator('.next-range-picker-panel-input-end-date input'); if (startFirst) { // 先填开始日期 await panelStartInput.click(); await sleep(100); await page.keyboard.press('Control+A'); await page.keyboard.type(start, { delay: 30 }); await sleep(300); // 再填结束日期 await panelEndInput.click(); await sleep(100); await page.keyboard.press('Control+A'); await page.keyboard.type(end, { delay: 30 }); await sleep(300); } else { // 先填结束日期(默认) await panelEndInput.click(); await sleep(100); await page.keyboard.press('Control+A'); await page.keyboard.type(end, { delay: 30 }); await sleep(300); // 再填开始日期 await panelStartInput.click(); await sleep(100); await page.keyboard.press('Control+A'); await page.keyboard.type(start, { delay: 30 }); await sleep(300); } await page.keyboard.press('Enter'); await sleep(500); await page.mouse.click(0, 0); await sleep(300); await page.keyboard.press('Escape'); await sleep(300); await page.locator('.next-overlay-wrapper.opened').waitFor({ state: 'hidden', timeout: 3000 }).catch(() => null); await sleep(300); } async function setMonthValue(page, month) { await runtimeCheckpoint(`设置账单月份 ${month}`); // 先尝试按 inputValue 匹配 YYYY-MM 格式 const inputs = page.locator('input'); const total = await inputs.count(); const allValues = []; for (let index = 0; index < total; index += 1) { const input = inputs.nth(index); const value = await input.inputValue().catch(() => ''); const placeholder = await input.getAttribute('placeholder').catch(() => ''); allValues.push({ index, value, placeholder }); if (/^\d{4}-\d{2}$/.test(value)) { console.log(`[账单月份] 通过 value 匹配到 input[${index}], 设置: ${month}`); await typeIntoDateInput(input, month, page); return; } } // 如果 value 为空,尝试按 placeholder 匹配月份选择器 for (const item of allValues) { if (item.placeholder && /月/.test(item.placeholder)) { console.log(`[账单月份] 通过 placeholder 匹配到 input[${item.index}], 设置: ${month}`); await typeIntoDateInput(inputs.nth(item.index), month, page); return; } } // 兜底:找任何看起来像日期/月份选择器的 input(排除搜索框等) for (const item of allValues) { const input = inputs.nth(item.index); const cls = await input.evaluate((el) => el.closest('[class*="date-picker"], [class*="month-picker"], [class*="range-picker"]')?.className || '').catch(() => ''); if (cls) { console.log(`[账单月份] 通过父级 class 匹配到 input[${item.index}] (${cls}), 设置: ${month}`); await typeIntoDateInput(input, month, page); return; } } console.error('[DEBUG] 账单页面所有 input:', JSON.stringify(allValues, null, 2)); throw new Error('未识别到账单佣金月份输入框,请打开页面确认结构是否变化。'); } /** * 用键盘输入日期值。 * 策略:focus → 全选 → 快速键入 → Tab 移开焦点(触发 blur 提交,但不会像 click 那样打开面板)。 * 即使面板弹出,快速键入 + Tab 也能在面板滚动前完成提交并关闭。 */ async function typeIntoDateInput(locator, value, page) { await runtimeCheckpoint(`填写日期输入 ${value}`); // 移除 readonly await locator.evaluate((node) => node.removeAttribute('readonly')); // focus 并全选当前内容 await locator.focus(); await sleep(100); await page.keyboard.press('Control+A'); await sleep(100); // 快速逐字符输入新值 await page.keyboard.type(value, { delay: 30 }); await sleep(200); // Tab 移开焦点 → 触发 onBlur 提交值 + 关闭面板 await page.keyboard.press('Tab'); await sleep(300); // 如果面板还在,Escape 兜底关闭 await page.keyboard.press('Escape'); await sleep(300); // 验证 const actual = await locator.inputValue().catch(() => ''); if (actual !== value) { console.warn(`[WARN] typeIntoDateInput: 期望 "${value}",实际 "${actual}"`); } else { console.log(`[日期设置] 成功: "${value}"`); } } async function clickQuery(page) { await runtimeCheckpoint('点击查询'); const button = page.locator('button:has-text("查询")').first(); await button.scrollIntoViewIfNeeded().catch(() => null); await button.evaluate((node) => { node.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' }); }).catch(() => null); try { await button.click({ timeout: 8000 }); } catch (error) { console.warn(`[查询按钮] click 失败,尝试 Enter 兜底: ${error.message}`); await button.focus().catch(() => null); await page.keyboard.press('Enter'); } await sleep(1800); } function buildMonthlyDateWindows(startDate) { const start = new Date(`${startDate}T00:00:00+08:00`); const end = new Date(); const windows = []; const cursor = new Date(start.getFullYear(), start.getMonth(), 1); while (cursor <= end) { const windowStart = new Date(cursor); const windowEnd = new Date(cursor.getFullYear(), cursor.getMonth() + 1, 0); const actualEnd = windowEnd > end ? end : windowEnd; windows.push({ windowStart: formatDate(windowStart), windowEnd: formatDate(actualEnd), start: formatDate(windowStart), end: formatDate(actualEnd), }); cursor.setMonth(cursor.getMonth() + 1); } return windows; } function buildMonthList(startMonth) { const [year, month] = startMonth.split('-').map(Number); const cursor = new Date(year, month - 1, 1); const end = new Date(); const months = []; while (cursor <= end) { months.push(`${cursor.getFullYear()}-${String(cursor.getMonth() + 1).padStart(2, '0')}`); cursor.setMonth(cursor.getMonth() + 1); } return months; } function formatDate(date) { return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')}`; } function dedupeByHash(records) { const seen = new Set(); return records.filter((record) => { const key = JSON.stringify(record); if (seen.has(key)) return false; seen.add(key); return true; }); } function collectValidOrderIds(records) { const ids = []; const seen = new Set(); for (const record of records) { // 支持两种字段名:normalized 后的 orderId 和原始的 订单号 const rawOrderId = String(record.orderId || record['订单号'] || '').trim(); if (!rawOrderId || rawOrderId.includes('没有数据')) { continue; } if (!isValidOrderId(rawOrderId)) { console.log(`[订单详情] 跳过无效订单号: ${rawOrderId}`); continue; } if (seen.has(rawOrderId)) { continue; } seen.add(rawOrderId); ids.push(rawOrderId); } return ids; } function collectValidAccountIds(records) { const ids = []; const seen = new Set(); for (const record of records) { const rawAccountId = String(record.accountId || '').trim(); if (!rawAccountId || rawAccountId.includes('没有数据')) { continue; } if (!isValidAccountId(rawAccountId)) { console.log(`[客户详情] 跳过无效 accountId: ${rawAccountId}`); continue; } if (seen.has(rawAccountId)) { continue; } seen.add(rawAccountId); ids.push(rawAccountId); } return ids; } function collectCustomerDetailTargets(records) { const targets = []; const seen = new Set(); for (const record of records) { const accountId = String(record.accountId || '').trim(); const loginName = String(record.loginName || '').trim(); const pageNum = Number.parseInt(String(record.listPageNum || 0), 10) || 0; if (!accountId || !isValidAccountId(accountId) || pageNum <= 0) { continue; } if (seen.has(accountId)) { continue; } seen.add(accountId); targets.push({ accountId, loginName, pageNum }); } return targets.sort((a, b) => a.pageNum - b.pageNum); } function collectOrderDetailTargets(records, cachedOrderIds = []) { const allowSet = new Set((cachedOrderIds || []).map((value) => String(value || '').trim()).filter(Boolean)); const targets = []; const seen = new Set(); for (const record of records) { const orderId = String(record.orderId || '').trim(); const pageNum = Number.parseInt(String(record.listPageNum || 0), 10) || 0; const windowStart = String(record.windowStart || '').trim(); const windowEnd = String(record.windowEnd || '').trim(); if (!orderId || !isValidOrderId(orderId) || pageNum <= 0) { continue; } if (allowSet.size > 0 && !allowSet.has(orderId)) { continue; } if (seen.has(orderId)) { continue; } seen.add(orderId); targets.push({ orderId, pageNum, windowStart, windowEnd }); } return targets.sort((a, b) => { const windowCompare = `${a.windowStart}|${a.windowEnd}`.localeCompare(`${b.windowStart}|${b.windowEnd}`); if (windowCompare !== 0) { return windowCompare; } return a.pageNum - b.pageNum; }); } async function clickCustomerDetailFromList(page, target) { const clicked = await page.evaluate(({ accountId, loginName }) => { const normalize = (value) => String(value || '').replace(/\s+/g, '').trim(); const rows = Array.from(document.querySelectorAll('table tbody tr')); const targetRow = rows.find((row) => { const text = normalize(row.innerText || row.textContent || ''); return text.includes(accountId) || (loginName && text.includes(loginName)); }); if (!targetRow) { return false; } const detailButton = Array.from(targetRow.querySelectorAll('button, a, span')) .find((node) => /详情/.test(String(node.textContent || '').trim())); if (!detailButton) { return false; } detailButton.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' }); detailButton.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true })); return true; }, target).catch(() => false); if (clicked) { await sleep(1200); } return clicked; } async function clickCustomerDetailFromListWithRetry(page, target) { const attempts = [target.pageNum, Math.max(1, target.pageNum - 1), target.pageNum + 1]; for (const pageNum of attempts) { if (pageNum > 0 && pageNum !== target.pageNum) { const reached = await jumpToCustomerPage(page, pageNum); if (!reached) { continue; } await waitForStableCustomerList(page); } const clicked = await clickCustomerDetailFromList(page, target); if (clicked) { return true; } } return false; } async function clickOrderDetailFromList(page, target) { const clicked = await page.evaluate(({ orderId }) => { const normalize = (value) => String(value || '').replace(/\s+/g, '').trim(); const rows = Array.from(document.querySelectorAll('table tbody tr')); const targetRow = rows.find((row) => { const text = normalize(row.innerText || row.textContent || ''); return text.includes(orderId); }); if (!targetRow) { return false; } const detailButton = Array.from(targetRow.querySelectorAll('button, a, span')) .find((node) => /详情/.test(String(node.textContent || '').trim())); if (!detailButton) { return false; } detailButton.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' }); detailButton.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true })); return true; }, target).catch(() => false); if (clicked) { await sleep(1200); } return clicked; } async function clickOrderDetailFromListWithRetry(page, target) { const attempts = [target.pageNum, Math.max(1, target.pageNum - 1), target.pageNum + 1]; for (const pageNum of attempts) { if (pageNum > 0 && pageNum !== target.pageNum) { const reached = await jumpToOrderPage(page, pageNum); if (!reached) { continue; } await waitForStableOrderList(page); } const clicked = await clickOrderDetailFromList(page, target); if (clicked) { return true; } } return false; } async function jumpToCustomerPage(page, pageNum) { const reached = await jumpToPage(page, pageNum); if (reached) { console.log(`[客户详情] 已跳转到第 ${pageNum} 页`); } return reached; } async function jumpToOrderPage(page, pageNum) { const reached = await jumpToPage(page, pageNum); if (reached) { console.log(`[订单详情] 已跳转到第 ${pageNum} 页`); } return reached; } async function waitForStableCustomerList(page) { await waitForTableRows(page).catch(() => null); await sleep(600); await waitForTableRows(page).catch(() => null); } async function waitForStableOrderList(page) { await waitForTableRows(page).catch(() => null); await sleep(600); await waitForTableRows(page).catch(() => null); } async function clickMessageDetailButton(page, rowText, rowIndex) { const clicked = await page.evaluate(({ rowTextValue, rowIndexValue }) => { const normalize = (value) => String(value || '').replace(/\s+/g, ' ').trim(); const target = normalize(rowTextValue); const rows = Array.from(document.querySelectorAll('.next-table-row, table tbody tr, [role="row"]')) .filter((row) => normalize(row.innerText || row.textContent || '')); const row = rows[rowIndexValue]; if (!row) { return { clicked: false, reason: 'row_not_found', rowCount: rows.length }; } const rowTextActual = normalize(row.innerText || row.textContent || ''); const clickableNodes = Array.from(row.querySelectorAll('button, a, [role="button"], .next-btn-text')); const preferred = clickableNodes.find((node) => { const text = normalize(node.innerText || node.textContent || ''); return text && rowTextActual.includes(text); }) || clickableNodes[0]; if (!preferred) { return { clicked: false, reason: 'clickable_node_not_found', rowTextActual }; } preferred.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' }); preferred.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true })); return { clicked: true, buttonText: normalize(preferred.innerText || preferred.textContent || ''), rowTextActual, matched: rowTextActual.includes(target), }; }, { rowTextValue: rowText, rowIndexValue: rowIndex }).catch(() => ({ clicked: false, reason: 'evaluate_failed' })); if (clicked.clicked) { await sleep(1200); } return clicked; } async function waitForMessageDetailDrawer(page) { await page.waitForFunction(() => { const header = document.querySelector('.next-drawer-header'); const body = document.querySelector('.next-drawer-body'); return !!header && !!body && String(header.textContent || '').includes('消息详情'); }, null, { timeout: 15000 }); await sleep(600); } async function extractMessageDetail(page) { return page.evaluate(() => { const normalize = (value) => String(value || '').replace(/\u00a0/g, ' ').trim(); const header = normalize(document.querySelector('.next-drawer-header')?.innerText || ''); const body = normalize(document.querySelector('.next-drawer-body')?.innerText || ''); const lines = body.split(/\r?\n/).map((line) => normalize(line)).filter(Boolean); const firstLine = lines[0] || ''; const extract = (label) => { const line = lines.find((item) => item.startsWith(`${label}:`) || item.startsWith(`${label}:`)); if (!line) return ''; return normalize(line.replace(`${label}:`, '').replace(`${label}:`, '')); }; const match = (pattern) => { const matched = body.match(pattern); return matched?.[1] ? normalize(matched[1]) : ''; }; const accountIdMatches = Array.from(body.matchAll(/账号ID[::]?(\d{6,})/g)).map((item) => normalize(item[1])).filter(Boolean); const classification = (() => { if (/退款/.test(header) || /退款/.test(body)) return 'refund'; if (/释放预警/.test(header) || /预计于【.*】释放/.test(body)) return 'release_warning'; if (/释放通知/.test(header) || /已释放/.test(body)) return 'release_notice'; if (/未支付提醒/.test(header) || /未支付/.test(body)) return 'unpaid_reminder'; if (/取消通知/.test(header) || /取消了一笔未支付订单/.test(body)) return 'order_cancel'; if (/余额-预警通知/.test(header) || /账户现金余额/.test(body)) return 'balance_warning'; if (/关联成功/.test(header) || /关联关系已完成建立/.test(body)) return 'association_success'; if (/注册成功/.test(header) || /受邀注册UID/.test(body)) return 'registration_success'; if (/变更已超期/.test(header) || /变更申请已超期/.test(body)) return 'change_overdue'; return 'general'; })(); const detailContent = lines.filter((line) => !/^(接收时间|客户账号|订单号|退款订单号|订单金额|退款金额|客户下单时间|退款时间|受邀注册UID)[::]/.test(line)); return { detailTitle: firstLine || header, detailContent: body, receivedAt: extract('接收时间'), customerName: extract('客户账号'), customerNo: extract('客户账号') || match(/贵司的代付(?:关联)?客户【[^/]+\/(\d{6,})】/) || match(/受邀注册UID[::]?(\d{6,})/) || accountIdMatches[0] || '', orderNo: extract('订单号') || extract('退款订单号'), orderAmount: extract('订单金额'), customerOrderTime: extract('客户下单时间'), refundOrderNo: extract('退款订单号'), refundAmount: extract('退款金额'), refundTime: extract('退款时间'), invitedRegisterUid: extract('受邀注册UID') || match(/受邀注册UID[::]?(\d{6,})/), accountIds: accountIdMatches.join(','), messageClassification: classification, status: '未读', title: firstLine || header, content: detailContent.join('\n'), }; }); } async function closeMessageDetailDrawer(page) { const closeButton = page.locator('.next-drawer-close, .next-dialog-close, .next-icon-close').first(); if ((await closeButton.count()) > 0) { await closeButton.click().catch(() => null); } else { await page.keyboard.press('Escape').catch(() => null); } await page.waitForFunction(() => !document.querySelector('.next-drawer-header'), null, { timeout: 10000 }).catch(() => null); await sleep(400); } async function fetchMessageApiRows(page, pageNum, pageSize) { return page.evaluate(async ({ currentPage, currentPageSize }) => { const response = await fetch(`/api/taskapi/msgbox/queryUserMsg.json?lv2CategoryId=0&pageNo=${currentPage}&pageSize=${currentPageSize}`, { credentials: 'include', }); const payload = await response.json(); return Array.isArray(payload?.data?.list) ? payload.data.list : []; }, { currentPage: pageNum, currentPageSize: pageSize }).catch(() => []); } function stripHtmlTags(value) { return String(value || '') .replace(/(\r?\n)?/gi, '\n') .replace(/<\/div>/gi, '\n') .replace(/<\/p>/gi, '\n') .replace(/<[^>]+>/g, '') .replace(/ /gi, ' ') .replace(/\r/g, '') .split('\n') .map((line) => line.trim()) .filter(Boolean) .join('\n'); } function mapApiMessageRecord(record) { if (!record || typeof record !== 'object') { return null; } return { msgId: String(record.id || '').trim(), title: String(record.title || '').trim(), detailTitle: String(record.title || '').trim(), detailContent: stripHtmlTags(record.htmlContent || record.content || ''), content: stripHtmlTags(record.htmlContent || record.content || ''), fromApp: String(record.fromApp || '').trim(), bizCode: String(record.bizCode || '').trim(), msgChannel: String(record.msgChannel || '').trim(), categoryName: String(record.categoryName || '').trim(), categoryId: String(record.lv3CategoryId || '').trim(), lv1CategoryId: String(record.lv1CategoryId || '').trim(), lv2CategoryId: String(record.lv2CategoryId || '').trim(), lv3CategoryId: String(record.lv3CategoryId || '').trim(), gmtCreated: record.createDate ? formatDateTime(new Date(record.createDate)) : '', gmtModified: record.updateDate ? formatDateTime(new Date(record.updateDate)) : '', status: Number(record.isRead) === 1 ? '已读' : '未读', }; } async function enrichMessageRowsWithDetails(page, pageRows, pageNum) { const enrichedRows = []; let detailSuccess = 0; let detailFailed = 0; const apiRows = await fetchMessageApiRows(page, pageNum, datasets.messages.pageSize); for (let index = 0; index < pageRows.length; index += 1) { const row = pageRows[index]; const rowText = String(row['消息标题'] || row['标题'] || row.title || row.column_1 || '').trim(); if (!rowText) { enrichedRows.push(row); continue; } const apiDetail = mapApiMessageRecord(apiRows[index]); if (apiDetail?.msgId) { detailSuccess += 1; enrichedRows.push({ ...row, ...apiDetail }); continue; } const clicked = await clickMessageDetailButton(page, rowText, index); if (!clicked.clicked) { detailFailed += 1; console.warn(`[消息详情] 打开失败: pageRow=${index + 1}, title="${rowText}", reason=${clicked.reason || 'unknown'}`); enrichedRows.push(row); continue; } try { await waitForMessageDetailDrawer(page); const detail = await extractMessageDetail(page); detailSuccess += 1; enrichedRows.push({ ...row, ...detail }); } catch (error) { detailFailed += 1; console.warn(`[消息详情] 提取失败: pageRow=${index + 1}, title="${rowText}", error=${error.message}`); enrichedRows.push(row); } finally { await closeMessageDetailDrawer(page); await waitForTableRows(page).catch(() => null); } } console.log(`[消息详情] 本页详情提取: success=${detailSuccess}, failed=${detailFailed}, total=${pageRows.length}`); return enrichedRows; } async function restoreOrderWindow(page, windowStart, windowEnd) { await waitUntilReady(page, datasets.orders.heading).catch(() => null); await setDateRange(page, windowStart, windowEnd); await clickQuery(page); await trySetPageSize(page, datasets.orders.pageSize).catch(() => null); await waitForStableOrderList(page).catch(() => null); } async function recoverCustomerListState(page, pageNum) { await waitUntilReady(page, datasets.customers.heading).catch(() => null); await trySetPageSize(page, datasets.customers.pageSize).catch(() => null); if (pageNum > 0) { await jumpToCustomerPage(page, pageNum).catch(() => null); await waitForStableCustomerList(page).catch(() => null); } } async function recoverOrderListState(page, pageNum, windowStart = '', windowEnd = '') { await waitUntilReady(page, datasets.orders.heading).catch(() => null); if (windowStart && windowEnd) { await restoreOrderWindow(page, windowStart, windowEnd).catch(() => null); } else { await trySetPageSize(page, datasets.orders.pageSize).catch(() => null); } if (pageNum > 0) { await jumpToOrderPage(page, pageNum).catch(() => null); await waitForStableOrderList(page).catch(() => null); } } function isValidOrderId(orderId) { const value = String(orderId || '').trim(); if (!value) return false; if (value.includes('�')) return false; return /^\d+$/.test(value); } function isValidAccountId(accountId) { const value = String(accountId || '').trim(); if (!value) return false; if (value.includes('�')) return false; return /^\d+$/.test(value); } async function extractOrderDetail(page) { return page.evaluate(() => { const text = document.body?.innerText || ''; const extract = (label) => { const lineBreakPattern = new RegExp(`${label}\\s*(?:\\r?\\n)+\\s*([^\\r\\n]+)`); const lineBreakMatch = text.match(lineBreakPattern); if (lineBreakMatch) return lineBreakMatch[1].trim(); const inlinePattern = new RegExp(`${label}\\s*[::]?\\s*([^\\r\\n]+)`); const inlineMatch = text.match(inlinePattern); return inlineMatch ? inlineMatch[1].trim() : ''; }; return { orderId: extract('订单号'), orderType: extract('订单类型'), status: extract('状态'), tradeType: extract('交易类型'), customerCategory: extract('客户分类'), dealerName: extract('二级经销商名称'), dealerUid: extract('二级经销商UID'), customerType: extract('客户类型'), opportunityId: extract('商机ID'), paymentTime: extract('支付时间'), orderTime: extract('下单时间'), productName: extract('产品名称'), productCode: extract('产品code'), originalPriceCny: extract('订单原价\\(CNY\\)'), paidAmountCny: extract('实付金额\\(CNY\\)'), discount: extract('订单折扣'), payableAmountCny: extract('应付金额(实付\\+代金券)\\(CNY\\)'), couponAmountCny: extract('代金券金额\\(CNY\\)'), }; }); } async function extractCustomerDetail(page) { return page.evaluate(() => { const normalize = (value) => String(value || '') .replace(/\u00a0/g, ' ') .trim(); const text = normalize(document.body?.innerText || '').replace(/\r/g, ''); const extract = (label, sourceText = text) => { const lineBreakPattern = new RegExp(`${label}\\s*(?:\\n)+\\s*([^\\n]+)`); const lineBreakMatch = sourceText.match(lineBreakPattern); if (lineBreakMatch) return normalize(lineBreakMatch[1]); const inlinePattern = new RegExp(`${label}\\s*[::]?\\s*([^\\n]+)`); const inlineMatch = sourceText.match(inlinePattern); return inlineMatch ? normalize(inlineMatch[1]) : ''; }; const normalizeAmount = (value) => normalize(value).replace(/[¥,]/g, '').trim(); const buildSection = (startLabel, endLabel = '') => { const start = text.indexOf(startLabel); if (start < 0) return ''; const end = endLabel ? text.indexOf(endLabel, start + startLabel.length) : -1; if (end > start) return text.slice(start, end); return text.slice(start); }; const lastMonthSection = buildSection('上月应付总金额(CNY)', '本月应付总金额(CNY)'); const currentMonthSection = buildSection('本月应付总金额(CNY)'); const extractAmountFromSection = (sectionText, label) => normalizeAmount(extract(label, sectionText)); let department = ''; const table = Array.from(document.querySelectorAll('table')).find((node) => (node.innerText || '').includes('所属部门'), ); if (table) { const rows = table.querySelectorAll('tbody tr'); for (const row of rows) { const cells = row.querySelectorAll('td'); if (cells.length >= 2) { const value = normalize(cells[1]?.innerText || cells[1]?.textContent || ''); if (value) { department = value; break; } } } } if (!department) { department = extract('所属部门'); } return { customerAccount: extract('客户账号'), customerName: extract('客户名称'), customerType: extract('客户类型'), tradeMode: extract('交易模式'), customerSource: extract('客户来源'), realNameStatus: extract('实名状态'), email: extract('邮箱') || extract('Email') || extract('电子邮箱'), relationDate: extract('关联日期'), phone: extract('手机号') || extract('手机') || extract('联系电话') || extract('联系手机'), remark: extract('备注') || extract('客户备注'), paymentNoticeStatus: extract('代为支付告知状态'), department, lastMonthPayableTotalCny: extractAmountFromSection(lastMonthSection, '上月应付总金额(CNY)'), lastMonthPrepayCny: extractAmountFromSection(lastMonthSection, '预付费金额'), lastMonthPostpayCny: extractAmountFromSection(lastMonthSection, '后付费金额'), currentMonthPayableTotalCny: extractAmountFromSection(currentMonthSection, '本月应付总金额(CNY)'), currentMonthPrepayCny: extractAmountFromSection(currentMonthSection, '预付费金额'), currentMonthPostpayCny: extractAmountFromSection(currentMonthSection, '后付费金额'), }; }); }