Files
aliyunApsSkill/aliyun-sync/aliyun-aps-sync/src/sync.js
2026-06-18 15:08:06 +08:00

3142 lines
113 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { chromium } from 'playwright';
import cron from 'node-cron';
import fs from 'node:fs';
import path from 'node:path';
import readline from 'node:readline';
import { config, datasets } from './config.js';
import { sendLoginAlert, sendRuntimeErrorAlert } from './notify.js';
import {
closeDbPool,
customerExists,
getExistingMessageIds,
getExistingMessageFingerprints,
getLatestBillConsumptionTimeFromDb,
getLatestMessageTimeFromDb,
getLatestOrderTimeFromDb,
hasDbConfig,
upsertBills,
upsertCustomerDetails,
upsertCustomers,
upsertMessages,
upsertOrderDetails,
upsertOrders,
} from './db.js';
import {
diffRecords,
loadCurrentState,
nowStamp,
saveCheckpoint,
saveDatasetRun,
saveDelta,
saveRunSummary,
withHash,
} from './storage.js';
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
const scheduleEventFile = path.join(config.dataDir, 'runs', 'schedule-events.jsonl');
let _context = null;
let _runtimeController = null;
let _browser = null;
let _isAttachedBrowser = false;
const runningJobs = new Set();
function recordScheduleEvent(payload) {
try {
fs.mkdirSync(path.dirname(scheduleEventFile), { recursive: true });
fs.appendFileSync(scheduleEventFile, `${JSON.stringify({ at: new Date().toISOString(), ...payload })}\n`, 'utf8');
} catch (error) {
console.warn(`[schedule-event] 写入失败: ${error.message}`);
}
}
const AUTH_PAGE_KEYWORDS = [
'RAM 用户登录',
'主账号登录',
'钉钉扫码登录',
'用户名',
'下一步',
'登录并使用 RAM',
];
async function closeContextIfNeeded() {
if (!_context) return;
if (_isAttachedBrowser) {
_context = null;
return;
}
await _context.close();
_context = null;
}
function getRuntimeController() {
if (_runtimeController) return _runtimeController;
let paused = false;
let terminated = false;
let keypressBound = false;
const onKeypress = (_str, key = {}) => {
if (key.name === 'f7') {
if (!paused) {
paused = true;
console.log('[控制] 已暂停F7。按 F8 继续,按 F9 终止。');
}
return;
}
if (key.name === 'f8') {
if (paused) {
paused = false;
console.log('[控制] 已继续F8。');
}
return;
}
if (key.name === 'f9') {
terminated = true;
paused = false;
console.log('[控制] 已请求终止F9将在安全检查点停止。');
}
};
const bind = () => {
if (keypressBound || !process.stdin.isTTY) return;
readline.emitKeypressEvents(process.stdin);
if (typeof process.stdin.setRawMode === 'function') {
process.stdin.setRawMode(true);
}
process.stdin.resume();
process.stdin.on('keypress', onKeypress);
keypressBound = true;
console.log('[控制] 热键已启用F7 暂停 / F8 继续 / F9 终止');
};
const unbind = () => {
if (!keypressBound) return;
process.stdin.off('keypress', onKeypress);
if (process.stdin.isTTY && typeof process.stdin.setRawMode === 'function') {
process.stdin.setRawMode(false);
}
keypressBound = false;
};
const waitIfPaused = async (label = '任务') => {
if (terminated) {
throw new Error(`[控制] 已终止:${label}`);
}
while (paused) {
await sleep(300);
if (terminated) {
throw new Error(`[控制] 已终止:${label}`);
}
}
};
const throwIfTerminated = (label = '任务') => {
if (terminated) {
throw new Error(`[控制] 已终止:${label}`);
}
};
_runtimeController = {
bind,
unbind,
waitIfPaused,
throwIfTerminated,
};
return _runtimeController;
}
async function runtimeCheckpoint(label) {
const controller = getRuntimeController();
controller.throwIfTerminated(label);
await controller.waitIfPaused(label);
}
function clearStaleBrowserProfileLocks() {
const lockFiles = ['SingletonLock', 'SingletonCookie', 'SingletonSocket'];
const now = Date.now();
const staleMs = 10 * 60 * 1000;
for (const fileName of lockFiles) {
const filePath = path.join(config.userDataDir, fileName);
if (!fs.existsSync(filePath)) {
continue;
}
try {
const stat = fs.statSync(filePath);
const ageMs = now - stat.mtimeMs;
if (ageMs < staleMs) {
console.log(`[浏览器锁] 检测到活跃锁文件,保留: ${fileName}`);
continue;
}
fs.rmSync(filePath, { force: true });
console.log(`[浏览器锁] 已清理陈旧锁文件: ${fileName}`);
} catch (error) {
console.warn(`[浏览器锁] 清理 ${fileName} 失败: ${error.message}`);
}
}
}
async function getContext() {
if (_context) return _context;
if (config.browserMode === 'cdp') {
try {
_browser = await chromium.connectOverCDP(config.cdpUrl);
_isAttachedBrowser = true;
const contexts = _browser.contexts();
_context = contexts[0] || await _browser.newContext();
console.log(`[CDP] 已附着到手动浏览器: ${config.cdpUrl}`);
return _context;
} catch (error) {
throw new Error(`无法通过 CDP 连接到手动浏览器(${config.cdpUrl})。请先手动启动 Chrome 并开启远程调试端口。原始错误: ${error.message}`);
}
}
_isAttachedBrowser = false;
clearStaleBrowserProfileLocks();
const launchOptions = {
headless: config.headless,
acceptDownloads: true,
downloadsPath: config.downloadDir,
};
if (config.browserChannel) {
launchOptions.channel = config.browserChannel;
}
if (config.browserExecutablePath) {
launchOptions.executablePath = config.browserExecutablePath;
}
try {
_context = await chromium.launchPersistentContext(config.userDataDir, launchOptions);
} catch (error) {
const browserHint = config.browserExecutablePath
? `executablePath=${config.browserExecutablePath}`
: config.browserChannel
? `channel=${config.browserChannel}`
: 'bundled-chromium';
throw new Error(`浏览器启动失败(${browserHint})。请确认没有其他浏览器占用 .browser 目录,或删除 .browser 后重新执行 npm run login。原始错误: ${error.message}`);
}
await restoreStorageState(_context);
return _context;
}
async function resolveActivePage(context, targetUrl = '') {
const pages = context.pages();
let page = null;
if (config.browserMode === 'cdp' && targetUrl) {
page = pages.find((item) => item.url().includes(targetUrl));
}
if (!page) {
page = pages[0] || await context.newPage();
}
if (config.browserMode === 'cdp') {
const pageIndex = pages.indexOf(page);
console.log(`[CDP] 使用 tab=${pageIndex >= 0 ? pageIndex : 'new'} url=${page.url() || '(blank)'}`);
await page.bringToFront().catch(() => null);
}
return page;
}
async function restoreStorageState(context) {
if (!fs.existsSync(config.storageStateFile)) {
return;
}
try {
const state = JSON.parse(fs.readFileSync(config.storageStateFile, 'utf-8'));
if (Array.isArray(state.cookies) && state.cookies.length > 0) {
await context.addCookies(state.cookies);
console.log(`[storageState] 已恢复 ${state.cookies.length} 个 cookie`);
}
} catch (error) {
console.warn(`[storageState] 恢复失败,继续使用 .browser profile: ${error.message}`);
}
}
async function saveStorageState(context) {
await context.storageState({ path: config.storageStateFile });
console.log(`[storageState] 已保存登录态快照: ${config.storageStateFile}`);
}
function loadLatestBillsCheckpoint() {
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'bills');
if (!fs.existsSync(checkpointDir)) {
return null;
}
const candidates = fs.readdirSync(checkpointDir)
.filter((fileName) => fileName.endsWith('.json'))
.map((fileName) => {
const filePath = path.join(checkpointDir, fileName);
const stat = fs.statSync(filePath);
return { fileName, filePath, mtimeMs: stat.mtimeMs };
})
.sort((a, b) => b.mtimeMs - a.mtimeMs);
if (candidates.length === 0) {
return null;
}
try {
const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8'));
if (!latest || typeof latest !== 'object') {
return null;
}
return latest;
} catch (error) {
console.warn(`[账单检查点] 读取失败,忽略断点续爬: ${error.message}`);
return null;
}
}
function loadLatestOrdersCheckpoint() {
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'orders');
if (!fs.existsSync(checkpointDir)) {
return null;
}
const candidates = fs.readdirSync(checkpointDir)
.filter((fileName) => fileName.endsWith('.json'))
.map((fileName) => {
const filePath = path.join(checkpointDir, fileName);
const stat = fs.statSync(filePath);
return { fileName, filePath, mtimeMs: stat.mtimeMs };
})
.sort((a, b) => b.mtimeMs - a.mtimeMs);
if (candidates.length === 0) {
return null;
}
try {
const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8'));
if (!latest || typeof latest !== 'object') {
return null;
}
return latest;
} catch (error) {
console.warn(`[订单检查点] 读取失败,忽略断点续爬: ${error.message}`);
return null;
}
}
function subtractDays(dateValue, days) {
const next = new Date(dateValue);
next.setDate(next.getDate() - days);
return next;
}
function subtractMonths(dateValue, months) {
const next = new Date(dateValue);
next.setMonth(next.getMonth() - months);
return next;
}
function randomIntBetween(min, max) {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
function parseDbDateTime(value) {
const normalized = String(value || '').trim();
if (!normalized) {
return null;
}
const parsed = new Date(normalized.replace(' ', 'T'));
return Number.isNaN(parsed.getTime()) ? null : parsed;
}
function formatDateTime(date) {
return `${formatDate(date)} ${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}:${String(date.getSeconds()).padStart(2, '0')}`;
}
function isSameDate(value, date) {
const parsed = parseDbDateTime(value);
if (!parsed) {
return false;
}
return formatDate(parsed) === formatDate(date);
}
function addMinutes(date, minutes) {
const next = new Date(date);
next.setMinutes(next.getMinutes() + minutes);
return next;
}
function buildOrderFingerprint(record) {
return [
String(record.orderStatus || '').trim(),
String(record.actualPaidCny || '').trim(),
String(record.orderOriginalPriceCny || '').trim(),
String(record.orderType || '').trim(),
String(record.customerCategory || '').trim(),
String(record.createdAt || '').trim(),
].join('|');
}
function isFinalOrderStatus(status) {
const normalized = String(status || '').trim();
if (!normalized) {
return false;
}
return config.hotFinalStatuses.some((item) => item === normalized);
}
async function runLockedJob(jobName, job) {
if (runningJobs.has(jobName)) {
console.log(`[任务锁] ${jobName} 已在运行,跳过本次执行`);
return { skipped: true, reason: 'already_running', jobName };
}
runningJobs.add(jobName);
try {
return await job();
} finally {
runningJobs.delete(jobName);
}
}
function buildTodayOrderWindow() {
const today = formatDate(new Date());
return buildSingleDateWindow(today, today);
}
function computeChangedOrderIds(previousRecords, nextRecords) {
const previousMap = new Map();
for (const record of previousRecords || []) {
const orderId = String(record.orderId || '').trim();
if (!orderId) {
continue;
}
previousMap.set(orderId, record);
}
const changedOrderIds = [];
for (const record of nextRecords || []) {
const orderId = String(record.orderId || '').trim();
if (!orderId) {
continue;
}
const previous = previousMap.get(orderId);
if (!previous) {
changedOrderIds.push(orderId);
continue;
}
if (buildOrderFingerprint(previous) !== buildOrderFingerprint(record)) {
changedOrderIds.push(orderId);
}
}
return Array.from(new Set(changedOrderIds));
}
function selectOrderDetailCandidates(orderRecords, changedOrderIds, detailRecords) {
const changedSet = new Set((changedOrderIds || []).map((item) => String(item || '').trim()).filter(Boolean));
const detailMap = new Map();
for (const record of detailRecords || []) {
const orderId = String(record.orderId || '').trim();
if (!orderId) {
continue;
}
detailMap.set(orderId, record);
}
const now = new Date();
const refreshBefore = addMinutes(now, -config.hotOrderDetailRefreshMinutes);
const candidateIds = [];
for (const record of orderRecords || []) {
const orderId = String(record.orderId || '').trim();
if (!orderId || !isValidOrderId(orderId)) {
continue;
}
if (changedSet.has(orderId)) {
candidateIds.push(orderId);
continue;
}
const status = String(record.orderStatus || '').trim();
if (isFinalOrderStatus(status)) {
continue;
}
const detail = detailMap.get(orderId);
if (!detail) {
candidateIds.push(orderId);
continue;
}
const lastSyncedAt = parseDbDateTime(detail.detailSyncedAt || detail.__detailSyncedAt || '');
if (!lastSyncedAt || lastSyncedAt <= refreshBefore) {
candidateIds.push(orderId);
}
}
return Array.from(new Set(candidateIds));
}
function summarizeHotPage(previousOrderMap, normalizedPageRows) {
let stableCount = 0;
let changedCount = 0;
let newCount = 0;
let todayRowCount = 0;
for (const record of normalizedPageRows) {
if (isSameDate(record.createdAt, new Date())) {
todayRowCount += 1;
}
const orderId = String(record.orderId || '').trim();
const previous = previousOrderMap.get(orderId);
if (!previous) {
newCount += 1;
continue;
}
if (buildOrderFingerprint(previous) === buildOrderFingerprint(record)) {
stableCount += 1;
} else {
changedCount += 1;
}
}
return { stableCount, changedCount, newCount, todayRowCount };
}
function buildSingleDateWindow(startDate, endDate) {
return [{
windowStart: startDate,
windowEnd: endDate,
start: startDate,
end: endDate,
}];
}
async function captureErrorArtifacts(page, metadata = {}) {
const stamp = nowStamp();
const artifactDir = path.join(config.errorDir, metadata.dataset || 'general');
fs.mkdirSync(artifactDir, { recursive: true });
const jsonPath = path.join(artifactDir, `${stamp}.json`);
const screenshotPath = path.join(artifactDir, `${stamp}.png`);
const payload = {
...metadata,
capturedAt: new Date().toISOString(),
pageUrl: page?.url?.() || '',
stack: metadata.error?.stack || metadata.errorMessage || '',
};
fs.writeFileSync(jsonPath, JSON.stringify(payload, null, 2));
let screenshotSaved = false;
if (page && !page.isClosed?.()) {
try {
await page.screenshot({ path: screenshotPath, fullPage: true, timeout: 5000, animations: 'disabled' });
screenshotSaved = true;
} catch (error) {
console.error('[错误截图] 保存失败:', error.message);
}
}
return {
jsonPath,
screenshotPath: screenshotSaved ? screenshotPath : '',
};
}
async function reportRuntimeError(error, page, metadata = {}) {
const artifacts = await captureErrorArtifacts(page, {
...metadata,
errorMessage: error.message,
error,
});
const subject = `[APS同步异常] ${metadata.label || metadata.dataset || 'sync'} failed`;
const text = [
`时间: ${new Date().toISOString()}`,
`任务: ${metadata.label || ''}`,
`数据集: ${metadata.dataset || ''}`,
`模式: ${metadata.mode || ''}`,
`URL: ${page?.url?.() || ''}`,
`错误: ${error.message}`,
`JSON: ${artifacts.jsonPath}`,
artifacts.screenshotPath ? `截图: ${artifacts.screenshotPath}` : '截图: 保存失败',
].join('\n');
const attachments = [{ filename: path.basename(artifacts.jsonPath), path: artifacts.jsonPath }];
if (artifacts.screenshotPath) {
attachments.push({ filename: path.basename(artifacts.screenshotPath), path: artifacts.screenshotPath });
}
await sendRuntimeErrorAlert({ subject, text, attachments });
}
async function getPageBodyPreview(page) {
return page
.evaluate(() => document.body?.innerText?.substring(0, 500) || '(空)')
.catch(() => '(无法获取)');
}
function isAuthUrl(url) {
return /account\.aliyun\.com|signin\.aliyun\.com/.test(url)
|| url.includes('login.htm')
|| url.includes('/#/signin');
}
function hasAuthKeywords(text) {
return AUTH_PAGE_KEYWORDS.some((keyword) => text.includes(keyword));
}
async function detectAuthRedirect(page) {
const currentUrl = page.url();
const bodyText = await getPageBodyPreview(page);
return {
currentUrl,
bodyText,
isAuthPage: isAuthUrl(currentUrl) || hasAuthKeywords(bodyText),
};
}
async function ensureDatasetAccessible(page, dataset, timeout = 120000, options = {}) {
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, dataset.heading, timeout, options);
}
export async function login() {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
const cleanupAndExit = async (signal) => {
console.log(`[login] 收到 ${signal},正在保存登录态并关闭浏览器...`);
await closeContextIfNeeded();
process.exit(130);
};
const onSigint = () => {
void cleanupAndExit('SIGINT');
};
const onSigterm = () => {
void cleanupAndExit('SIGTERM');
};
process.once('SIGINT', onSigint);
process.once('SIGTERM', onSigterm);
try {
const page = await resolveActivePage(context, '/detail/my_customer/~/customer/list');
await page.goto(datasets.customers.url, { waitUntil: 'domcontentloaded' });
console.log('请在打开的浏览器里完成阿里云伙伴中心登录。检测到进入“我的客户”和“账单查询”页面后,脚本会自动保存登录态并关闭浏览器。');
await waitUntilReady(page, datasets.customers.heading, 10 * 60 * 1000, { allowInteractiveAuth: true });
console.log('[login] 我的客户页验证通过,继续验证账单页登录态...');
await ensureDatasetAccessible(page, datasets.bills, 60 * 1000, { allowInteractiveAuth: true });
await sleep(1000);
await saveStorageState(context);
console.log('登录态已写入 .browser 目录,且已验证“我的客户”和“账单查询”页面可访问,后续可直接执行 npm run sync 或 npm run bills。');
} finally {
process.off('SIGINT', onSigint);
process.off('SIGTERM', onSigterm);
await closeContextIfNeeded();
runtimeController.unbind();
}
}
export async function syncAll(options = {}) {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
let page = null;
const { resume = false } = options;
try {
const summary = { startedAt: new Date().toISOString(), datasets: {} };
page = await resolveActivePage(context, '/detail/my_customer/~/customer/list');
if (config.fullSync) {
summary.datasets.customers = await syncCustomers(page, { resume });
summary.datasets.customerDetails = await syncCustomerDetails(page, { resume });
}
summary.datasets.orders = await syncOrders(page, { incremental: !config.fullSync, resume });
// syncOrders 完成后,从最新的 orders.json 读取 orderId 列表
const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey);
const orderIdsForDetail = collectValidOrderIds(latestOrders.records || []);
summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail, options);
summary.datasets.bills = await syncBills(page, { incremental: !config.fullSync, resume });
summary.datasets.messages = await syncMessages(page, { incremental: !config.fullSync, resume });
summary.finishedAt = new Date().toISOString();
const stamp = nowStamp();
saveRunSummary(stamp, summary);
return summary;
} catch (error) {
await reportRuntimeError(error, page, { label: 'syncAll', dataset: 'all', mode: config.fullSync ? 'full' : 'incremental' });
throw error;
} finally {
if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
}
await closeDbPool();
runtimeController.unbind();
}
}
export async function syncBillsOnly(options = {}) {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
let page = null;
try {
const summary = { startedAt: new Date().toISOString(), datasets: {} };
page = await resolveActivePage(context, '/detail/bill/~/costCenter/bill');
summary.datasets.bills = await syncBills(page, options);
summary.finishedAt = new Date().toISOString();
const stamp = nowStamp();
saveRunSummary(stamp, summary);
return summary;
} catch (error) {
await reportRuntimeError(error, page, { label: 'syncBillsOnly', dataset: 'bills', mode: options.incremental ? 'incremental' : 'full' });
throw error;
} finally {
if (options.keepBrowserOpen === true) {
console.log('浏览器保持运行schedule bills');
} else if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
}
await closeDbPool();
runtimeController.unbind();
}
}
export async function syncOrdersOnly(options = {}) {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
let page = null;
try {
const summary = { startedAt: new Date().toISOString(), datasets: {} };
page = await resolveActivePage(context, '/detail/order/~/costCenter/order');
const orderSyncResult = await syncOrders(page, options);
summary.datasets.orders = orderSyncResult;
const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey);
const orderDetailsState = loadCurrentState('orderDetails', datasets.orderDetails.uniqueKey);
const orderIdsForDetail = options.hot
? selectOrderDetailCandidates(latestOrders.records || [], orderSyncResult.changedOrderIds || [], orderDetailsState.records || [])
: collectValidOrderIds(latestOrders.records || []);
summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail);
summary.finishedAt = new Date().toISOString();
const stamp = nowStamp();
saveRunSummary(stamp, summary);
return summary;
} catch (error) {
await reportRuntimeError(error, page, { label: 'syncOrdersOnly', dataset: 'orders', mode: options.incremental ? 'incremental' : 'full' });
throw error;
} finally {
if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
}
await closeDbPool();
runtimeController.unbind();
}
}
export async function syncMessagesOnly(options = {}) {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
let page = null;
try {
const summary = { startedAt: new Date().toISOString(), datasets: {} };
page = await resolveActivePage(context, '/message');
summary.datasets.messages = await syncMessages(page, options);
summary.finishedAt = new Date().toISOString();
const stamp = nowStamp();
saveRunSummary(stamp, summary);
return summary;
} catch (error) {
await reportRuntimeError(error, page, { label: 'syncMessagesOnly', dataset: 'messages', mode: options.incremental ? 'incremental' : 'full' });
throw error;
} finally {
if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
}
await closeDbPool();
runtimeController.unbind();
}
}
export async function scheduleSync() {
console.log(`定时任务已启动: bills=${config.cron}, hot=${config.hotCron} (${config.timezone})`);
setInterval(() => {
console.log(`[${new Date().toISOString()}] 定时守护存活中: bills=${config.cron}, hot=${config.hotCron}, mode=${config.scheduleMode}`);
}, 60 * 1000);
cron.schedule(
config.cron,
async () => {
if (config.scheduleMode === 'hot') {
return runLockedJob('schedule-shared', async () => {
try {
recordScheduleEvent({ track: 'bills', status: 'started', mode: 'bills-incremental' });
console.log(`[${new Date().toISOString()}] 开始执行账单定时同步 mode=bills-incremental`);
const summary = await syncBillsOnly({ incremental: true, keepBrowserOpen: true });
recordScheduleEvent({ track: 'bills', status: 'completed', mode: 'bills-incremental', summary });
console.log(`[${new Date().toISOString()}] 账单定时同步完成`, JSON.stringify(summary, null, 2));
} catch (error) {
recordScheduleEvent({ track: 'bills', status: 'failed', mode: 'bills-incremental', error: error.message });
console.error(`[${new Date().toISOString()}] 账单定时同步失败`, error);
}
});
}
try {
console.log(`[${new Date().toISOString()}] 开始执行同步 mode=${config.scheduleMode}`);
const summary = config.scheduleMode === 'full'
? await syncAll()
: await syncAllIncremental();
console.log(`[${new Date().toISOString()}] 同步完成`, JSON.stringify(summary, null, 2));
} catch (error) {
console.error(`[${new Date().toISOString()}] 同步失败`, error);
}
},
{ timezone: config.timezone },
);
cron.schedule(
config.hotCron,
async () => {
if (config.scheduleMode !== 'hot') {
return;
}
return runLockedJob('schedule-shared', async () => {
try {
recordScheduleEvent({ track: 'hot', status: 'started', mode: 'hot' });
console.log(`[${new Date().toISOString()}] 开始执行高频同步 mode=hot`);
const summary = await syncHot({ keepBrowserOpen: true });
if (summary?.skipped) {
recordScheduleEvent({ track: 'hot', status: 'skipped', mode: 'hot', reason: summary.reason || 'already_running' });
} else {
recordScheduleEvent({ track: 'hot', status: 'completed', mode: 'hot', summary });
}
console.log(`[${new Date().toISOString()}] 高频同步完成`, JSON.stringify(summary, null, 2));
} catch (error) {
recordScheduleEvent({ track: 'hot', status: 'failed', mode: 'hot', error: error.message });
console.error(`[${new Date().toISOString()}] 高频同步失败`, error);
}
});
},
{ timezone: config.timezone },
);
}
export async function syncHot(options = {}) {
return runLockedJob('hot-sync', async () => {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
let page = null;
try {
const summary = { startedAt: new Date().toISOString(), mode: 'hot', datasets: {} };
page = await resolveActivePage(context, '/detail/order/~/costCenter/order');
const orderSyncResult = await syncOrders(page, { ...options, hot: true, incremental: true, resume: options.resume === true });
summary.datasets.orders = orderSyncResult;
const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey);
const orderDetailsState = loadCurrentState('orderDetails', datasets.orderDetails.uniqueKey);
const orderIdsForDetail = selectOrderDetailCandidates(
latestOrders.records || [],
orderSyncResult.changedOrderIds || [],
orderDetailsState.records || [],
);
summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail, { resume: options.resume === true });
page = await resolveActivePage(context, '/message');
summary.datasets.messages = await syncMessages(page, { incremental: true, resume: options.resume === true, hot: true });
page = await resolveActivePage(context, '/detail/my_customer/~/customer/list');
summary.datasets.customerHot = await syncOneCustomerHot(page, { resume: options.resume === true });
summary.finishedAt = new Date().toISOString();
const stamp = nowStamp();
saveRunSummary(stamp, summary);
return summary;
} catch (error) {
await reportRuntimeError(error, page, { label: 'syncHot', dataset: 'hot', mode: 'hot' });
throw error;
} finally {
if (options.keepBrowserOpen === true) {
console.log('浏览器保持运行schedule hot');
} else if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
}
await closeDbPool();
runtimeController.unbind();
}
});
}
async function syncOneCustomerHot(page, options = {}) {
await runtimeCheckpoint('高频同步客户');
const dataset = datasets.customers;
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, dataset.heading);
await trySetPageSize(page, dataset.pageSize);
const pageData = await extractTable(page);
const normalizedRows = normalizeDatasetRecords(dataset, pageData.rows || [], { pageNum: 1 });
const target = normalizedRows.find((record) => String(record.accountId || '').trim());
if (!target) {
return { skipped: true, reason: 'no_customer_found' };
}
if (await customerExists(target.accountId)) {
console.log(`[客户高频] accountId=${target.accountId} 已存在,停止本轮客户抓取`);
return { skipped: true, reason: 'customer_exists', accountId: target.accountId };
}
await upsertCustomers([target]);
const clicked = await clickCustomerDetailFromListWithRetry(page, target);
if (!clicked) {
return { skipped: false, inserted: true, accountId: target.accountId, detail: 'click_failed' };
}
try {
await page.waitForFunction(
(text) => document.body && document.body.innerText.includes(text),
'详情',
{ timeout: 15000 },
);
await sleep(1000);
const detail = await extractCustomerDetail(page);
const normalizedDetail = normalizeDatasetRecords(datasets.customerDetails, [{ ...detail, accountId: target.accountId, loginName: target.loginName }], { accountId: target.accountId });
await upsertCustomerDetails(normalizedDetail);
return { skipped: false, inserted: true, accountId: target.accountId, detail: 'ok' };
} catch (error) {
return { skipped: false, inserted: true, accountId: target.accountId, detail: `extract_failed:${error.message}` };
}
}
export async function syncAllIncremental() {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
let page = null;
try {
const summary = { startedAt: new Date().toISOString(), mode: 'incremental', datasets: {} };
page = await resolveActivePage(context, '/detail/order/~/costCenter/order');
const orderSyncResult = await syncOrders(page, { incremental: true, resume: true });
summary.datasets.orders = orderSyncResult;
const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey);
const orderIdsForDetail = collectValidOrderIds(latestOrders.records || []);
summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail, { resume: true });
summary.datasets.bills = await syncBills(page, { incremental: true, resume: true });
summary.datasets.messages = await syncMessages(page, { incremental: true, resume: true });
summary.finishedAt = new Date().toISOString();
const stamp = nowStamp();
saveRunSummary(stamp, summary);
return summary;
} catch (error) {
await reportRuntimeError(error, page, { label: 'syncAllIncremental', dataset: 'incremental', mode: 'incremental' });
throw error;
} finally {
if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
}
await closeDbPool();
runtimeController.unbind();
}
}
async function syncCustomers(page, options = {}) {
await runtimeCheckpoint('同步客户');
const dataset = datasets.customers;
const { resume = false } = options;
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, dataset.heading);
await trySetPageSize(page, dataset.pageSize);
const resumeCheckpoint = resume ? loadLatestCustomersCheckpoint() : null;
let resumeFromPage = Number.parseInt(String(resumeCheckpoint?.pageNum || 0), 10) || 0;
let shouldContinueScrape = true;
let allNormalizedRecords = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : [];
if (resumeFromPage > 0) {
console.log(`[客户续爬] 从 checkpoint 恢复: page=${resumeFromPage}, records=${allNormalizedRecords.length}`);
const moved = await moveCustomersToResumeStart(page, resumeFromPage);
if (!moved) {
console.log('[客户续爬] checkpoint 已在最后一页,无需继续抓取');
shouldContinueScrape = false;
}
}
let records = [];
if (shouldContinueScrape) {
records = await scrapePagedTable(page, dataset, {}, {
onPage: async ({ pageNum, pageRows }) => {
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, {});
allNormalizedRecords.push(...normalizedPageRows);
if (hasDbConfig()) {
await upsertCustomers(normalizedPageRows);
}
await saveCustomersCheckpoint(dataset, pageNum, allNormalizedRecords);
},
skipInitialPage: resumeFromPage > 0,
});
}
if (resumeFromPage === 0) {
allNormalizedRecords = normalizeDatasetRecords(dataset, records, {});
if (hasDbConfig()) {
await upsertCustomers(dedupeByHash(allNormalizedRecords));
}
}
return persistDataset(dataset, dedupeByHash(allNormalizedRecords), {});
}
async function syncCustomerDetails(page, options = {}) {
await runtimeCheckpoint('同步客户详情');
const dataset = datasets.customerDetails;
const customersState = loadCurrentState('customers', datasets.customers.uniqueKey);
const customerTargets = collectCustomerDetailTargets(customersState.records || []);
const resumeCheckpoint = options.resume ? loadLatestCustomerDetailsCheckpoint() : null;
if (customerTargets.length === 0) {
console.log('[客户详情] 本地无有效客户定位信息,跳过');
return persistDataset(dataset, [], {});
}
console.log(`[客户详情] 共 ${customerTargets.length} 个客户需要获取详情`);
const allDetails = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : [];
let currentListPage = 0;
let currentGroupKey = '';
let startIndex = Number.parseInt(String(resumeCheckpoint?.currentIndex || 0), 10) || 0;
if (startIndex > 0) {
console.log(`[客户详情续爬] 从 checkpoint 恢复: index=${startIndex}, records=${allDetails.length}`);
}
await page.goto(datasets.customers.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, datasets.customers.heading);
await trySetPageSize(page, datasets.customers.pageSize);
for (let index = startIndex; index < customerTargets.length; index += 1) {
await runtimeCheckpoint(`客户详情 ${index + 1}/${customerTargets.length}`);
const target = customerTargets[index];
console.log(`[客户详情] ${index + 1}/${customerTargets.length} accountId=${target.accountId} page=${target.pageNum}`);
const pauseMs = randomIntBetween(1000, 3000);
console.log(`[客户详情] 随机等待 ${pauseMs}ms 后继续`);
await sleep(pauseMs);
if (target.pageNum > 0 && currentListPage !== target.pageNum) {
const reached = await jumpToCustomerPage(page, target.pageNum);
if (!reached) {
console.warn(`[客户详情] 无法跳到第 ${target.pageNum} 页,跳过 ${target.accountId}`);
continue;
}
currentListPage = target.pageNum;
}
const nextGroupKey = `${target.pageNum}`;
if (currentGroupKey !== nextGroupKey) {
currentGroupKey = nextGroupKey;
}
const clicked = await clickCustomerDetailFromListWithRetry(page, target);
if (!clicked) {
console.warn(`[客户详情] 列表中未找到 accountId=${target.accountId},跳过`);
continue;
}
try {
await page.waitForFunction(
(text) => document.body && document.body.innerText.includes(text),
'详情',
{ timeout: 15000 },
);
await sleep(1000);
} catch {
console.warn(`[客户详情] ${target.accountId} 详情页加载超时,跳过`);
await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null);
await waitForStableCustomerList(page).catch(() => null);
continue;
}
const detail = await extractCustomerDetail(page);
allDetails.push({ ...detail, __context: { accountId: target.accountId } });
await saveCustomerDetailsCheckpoint(dataset, index + 1, allDetails);
if (hasDbConfig()) {
const normalizedDetail = normalizeDatasetRecords(dataset, [{ ...detail, __context: { accountId: target.accountId } }], {});
await upsertCustomerDetails(normalizedDetail);
}
await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null);
await waitForStableCustomerList(page).catch(() => null);
currentListPage = target.pageNum;
}
return persistDataset(dataset, dedupeByHash(allDetails), {});
}
async function syncOrders(page, options = {}) {
await runtimeCheckpoint('同步订单');
const dataset = datasets.orders;
const { incremental = false, resume = false, hot = false } = options;
let windows;
if (hot) {
windows = buildTodayOrderWindow();
} else if (!incremental) {
windows = buildMonthlyDateWindows(config.orderStartDate);
} else {
windows = await buildIncrementalOrderWindows();
}
const resumeCheckpoint = resume ? loadLatestOrdersCheckpoint() : null;
if (resumeCheckpoint?.windowStart) {
const resumeIndex = windows.findIndex((window) => window.start === resumeCheckpoint.windowStart && window.end === resumeCheckpoint.windowEnd);
if (resumeIndex >= 0) {
windows = windows.slice(resumeIndex);
console.log(`[订单续爬] 从 checkpoint 恢复: ${resumeCheckpoint.windowStart} ~ ${resumeCheckpoint.windowEnd}, page=${resumeCheckpoint.pageNum || 1}, records=${(resumeCheckpoint.records || []).length}`);
}
}
const previousState = loadCurrentState(dataset.name, dataset.uniqueKey);
const previousRecords = previousState.records || [];
const previousOrderMap = new Map(previousRecords.map((record) => [String(record.orderId || '').trim(), record]));
const allNormalizedRecords = [];
const hotStats = {
pagesScanned: 0,
stableRows: 0,
newRows: 0,
changedRows: 0,
stoppedEarly: false,
stopReason: '',
};
for (const window of windows) {
await runtimeCheckpoint(`订单窗口 ${window.start} ~ ${window.end}`);
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, dataset.heading);
await setDateRange(page, window.start, window.end);
await clickQuery(page);
await trySetPageSize(page, dataset.pageSize);
let windowNormalizedRecords = [];
let resumeFromPage = 0;
let shouldContinueScrape = true;
if (resumeCheckpoint?.windowStart === window.start && resumeCheckpoint?.windowEnd === window.end) {
windowNormalizedRecords = Array.isArray(resumeCheckpoint.records) ? resumeCheckpoint.records : [];
resumeFromPage = Number.parseInt(String(resumeCheckpoint.pageNum || 0), 10) || 0;
if (resumeFromPage > 0) {
const moved = await moveOrdersToResumeStart(page, resumeFromPage);
if (!moved) {
console.log(`[订单续爬] checkpoint 已在最后一页,无需继续抓取 window=${window.start}~${window.end}`);
shouldContinueScrape = false;
}
}
}
let records = [];
let stableRowsInARow = 0;
let stablePagesInARow = 0;
if (shouldContinueScrape) {
records = await scrapePagedTable(page, dataset, window, {
onPage: async ({ pageNum, pageRows }) => {
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, window);
windowNormalizedRecords.push(...normalizedPageRows);
if (hasDbConfig()) {
await upsertOrders(normalizedPageRows);
}
await saveOrdersCheckpoint(dataset, window, pageNum, windowNormalizedRecords);
if (hot) {
hotStats.pagesScanned += 1;
const pageSummary = summarizeHotPage(previousOrderMap, normalizedPageRows);
hotStats.stableRows += pageSummary.stableCount;
hotStats.newRows += pageSummary.newCount;
hotStats.changedRows += pageSummary.changedCount;
if (pageSummary.changedCount === 0 && pageSummary.newCount === 0) {
stablePagesInARow += 1;
} else {
stablePagesInARow = 0;
}
if (pageSummary.stableCount === normalizedPageRows.length && normalizedPageRows.length > 0) {
stableRowsInARow += normalizedPageRows.length;
} else {
stableRowsInARow = 0;
}
}
},
shouldStop: hot
? async ({ pageNum }) => {
if (pageNum >= config.hotOrderMaxPagesPerRun) {
hotStats.stoppedEarly = true;
hotStats.stopReason = `max_pages:${config.hotOrderMaxPagesPerRun}`;
return true;
}
if (stableRowsInARow >= config.hotOrderStableThreshold) {
hotStats.stoppedEarly = true;
hotStats.stopReason = `stable_rows:${stableRowsInARow}`;
return true;
}
if (stablePagesInARow >= config.hotOrderStablePageThreshold) {
hotStats.stoppedEarly = true;
hotStats.stopReason = `stable_pages:${stablePagesInARow}`;
return true;
}
return false;
}
: undefined,
});
}
if (resumeFromPage === 0) {
windowNormalizedRecords = normalizeDatasetRecords(dataset, records, window);
if (hasDbConfig()) {
await upsertOrders(dedupeByHash(windowNormalizedRecords));
}
}
allNormalizedRecords.push(...windowNormalizedRecords);
}
const normalizedRecords = dedupeByHash(allNormalizedRecords);
const changedOrderIds = computeChangedOrderIds(previousRecords, normalizedRecords);
const persisted = persistNormalizedDataset(dataset, normalizedRecords);
return {
...persisted,
changedOrderIds,
hot: hot ? hotStats : undefined,
};
}
async function buildIncrementalOrderWindows() {
const configuredStartDate = normalizeConfiguredDate(config.incrementalOrderStartDate);
if (configuredStartDate) {
const windows = buildMonthlyDateWindows(configuredStartDate);
console.log(`[增量模式] 订单从指定日期开始查询: ${configuredStartDate}`);
return windows;
}
if (!hasDbConfig()) {
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
const dateStr = formatDate(yesterday);
console.log(`[增量模式] 未配置数据库,订单仅查询: ${dateStr}`);
return buildSingleDateWindow(dateStr, dateStr);
}
const latestOrderTime = await getLatestOrderTimeFromDb();
const runAt = new Date();
const parsed = parseDbDateTime(latestOrderTime);
if (!parsed) {
const dateStr = formatDate(runAt);
console.log(`[增量模式] 数据库无订单水位,订单仅查询当天: ${dateStr}`);
return buildSingleDateWindow(dateStr, dateStr);
}
const startDate = formatDate(subtractDays(parsed, config.orderIncrementalOverlapDays));
const endDate = formatDate(runAt);
console.log(`[增量模式] 订单窗口: ${startDate} ~ ${endDate} (db_last=${latestOrderTime}, overlap=${config.orderIncrementalOverlapDays}d)`);
return buildSingleDateWindow(startDate, endDate);
}
function normalizeConfiguredDate(value) {
const normalized = String(value || '').trim();
if (!normalized) {
return '';
}
if (!/^\d{4}-\d{2}-\d{2}$/.test(normalized)) {
throw new Error(`ALIYUN_APS_INCREMENTAL_ORDER_START_DATE 格式无效: ${normalized},期望 YYYY-MM-DD`);
}
const parsed = new Date(`${normalized}T00:00:00+08:00`);
if (Number.isNaN(parsed.getTime())) {
throw new Error(`ALIYUN_APS_INCREMENTAL_ORDER_START_DATE 不是有效日期: ${normalized}`);
}
return normalized;
}
async function syncBills(page, options = {}) {
await runtimeCheckpoint('同步账单');
const dataset = datasets.bills;
const { resume = false, incremental = false } = options;
let months;
let latestConsumptionDate = null;
if (!incremental) {
months = buildMonthList(config.billStartMonth);
} else {
latestConsumptionDate = await getLatestBillConsumptionDate();
const startDate = latestConsumptionDate ? latestConsumptionDate.slice(0, 10) : formatDate(new Date());
const endDate = formatDate(new Date());
const startMonth = startDate.slice(0, 7);
const endMonth = endDate.slice(0, 7);
months = buildMonthList(startMonth).filter((month) => month <= endMonth);
console.log(`[增量模式] 账单窗口: ${startDate} ~ ${endDate}${latestConsumptionDate ? `, 数据库最新消费时间: ${latestConsumptionDate}` : ''}`);
}
const resumeCheckpoint = resume ? loadLatestBillsCheckpoint() : null;
if (resumeCheckpoint?.month) {
const resumeIndex = months.indexOf(resumeCheckpoint.month);
if (resumeIndex >= 0) {
months = months.slice(resumeIndex);
console.log(`[账单续爬] 从 checkpoint 恢复: month=${resumeCheckpoint.month}, page=${resumeCheckpoint.pageNum || 1}, records=${(resumeCheckpoint.records || []).length}`);
}
}
const allNormalizedRecords = [];
for (const month of months) {
await runtimeCheckpoint(`账单月份 ${month}`);
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, dataset.heading);
await setMonthValue(page, month);
await clickQuery(page);
await trySetPageSize(page, dataset.pageSize);
let monthNormalizedRecords = [];
let resumeFromPage = 0;
let shouldContinueScrape = true;
if (resumeCheckpoint?.month === month) {
monthNormalizedRecords = Array.isArray(resumeCheckpoint.records) ? resumeCheckpoint.records : [];
resumeFromPage = Number.parseInt(String(resumeCheckpoint.pageNum || 0), 10) || 0;
if (resumeFromPage > 0) {
const moved = await moveBillsToResumeStart(page, resumeFromPage);
if (!moved) {
console.log(`[账单续爬] checkpoint 已在最后一页,无需继续抓取 month=${month}`);
shouldContinueScrape = false;
}
}
}
let rawRecords = [];
if (shouldContinueScrape) {
rawRecords = await scrapePagedTable(page, dataset, { month }, {
onPage: async ({ pageNum, pageRows }) => {
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, { month });
monthNormalizedRecords.push(...normalizedPageRows);
if (hasDbConfig()) {
await upsertBills(normalizedPageRows);
}
let checkpointRecords = monthNormalizedRecords;
if (latestConsumptionDate) {
checkpointRecords = monthNormalizedRecords.filter((record) => isAfterLatestConsumptionDate(record, latestConsumptionDate));
}
await saveBillsCheckpoint(dataset, month, pageNum, checkpointRecords);
},
});
}
if (resumeFromPage === 0) {
monthNormalizedRecords = normalizeDatasetRecords(dataset, rawRecords, { month });
}
if (latestConsumptionDate) {
const before = monthNormalizedRecords.length;
monthNormalizedRecords = monthNormalizedRecords.filter((record) => isAfterLatestConsumptionDate(record, latestConsumptionDate));
console.log(`[增量模式] 账单按消费时间过滤: ${before} -> ${monthNormalizedRecords.length}`);
}
allNormalizedRecords.push(...monthNormalizedRecords);
}
return persistNormalizedDataset(dataset, dedupeByHash(allNormalizedRecords));
}
async function syncMessages(page, options = {}) {
await runtimeCheckpoint('同步消息');
const dataset = datasets.messages;
const { incremental = false, resume = false, hot = false } = options;
const fullSyncWatermark = !incremental && !hot ? subtractMonths(new Date(), 3) : null;
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, dataset.heading);
await trySetPageSize(page, dataset.pageSize);
const resumeCheckpoint = resume ? loadLatestMessagesCheckpoint() : null;
let resumeFromPage = Number.parseInt(String(resumeCheckpoint?.pageNum || 0), 10) || 0;
let shouldContinueScrape = true;
let allNormalizedRecords = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : [];
const shouldStopForFullSyncPage = (pageRows) => {
if (!fullSyncWatermark) {
return false;
}
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, {});
const pageTimeStats = getMessagePageTimeStats(normalizedPageRows);
console.log(`[全量模式] 当前页时间范围: parsed=${pageTimeStats.parsed}/${pageTimeStats.total}, earliest=${pageTimeStats.earliest || 'N/A'}, latest=${pageTimeStats.latest || 'N/A'}, watermark=${formatDateTime(fullSyncWatermark)}`);
return normalizedPageRows.length > 0
&& normalizedPageRows.every((record) => !isAfterLatestMessageTime(record, fullSyncWatermark));
};
if (resumeFromPage > 0) {
console.log(`[消息续爬] 从 checkpoint 恢复: page=${resumeFromPage}, records=${allNormalizedRecords.length}`);
const moved = await moveMessagesToResumeStart(page, resumeFromPage);
if (!moved) {
console.log('[消息续爬] checkpoint 已在最后一页,无需继续抓取');
shouldContinueScrape = false;
} else if (fullSyncWatermark) {
await waitForTableRows(page);
const resumedPageData = await extractTable(page);
if (shouldStopForFullSyncPage(resumedPageData.rows)) {
console.log(`[全量模式] 当前续爬页已超出近三个月范围,停止继续抓取: page=${resumeFromPage + 1}, watermark=${formatDateTime(fullSyncWatermark)}`);
shouldContinueScrape = false;
}
}
}
let records = [];
let hotWatermark = null;
let stopByExistingPage = false;
if (hot && hasDbConfig()) {
const latestMessageTime = await getLatestMessageTimeFromDb();
const latest = parseDbDateTime(latestMessageTime);
if (latest) {
hotWatermark = addMinutes(latest, -config.hotMessageOverlapMinutes);
}
}
if (shouldContinueScrape) {
records = await scrapePagedTable(page, dataset, {}, {
onPage: async ({ pageNum, pageRows }) => {
const detailedPageRows = await enrichMessageRowsWithDetails(page, pageRows, pageNum);
const normalizedPageRows = normalizeDatasetRecords(dataset, detailedPageRows, {});
const filteredPageRows = fullSyncWatermark
? normalizedPageRows.filter((record) => isAfterLatestMessageTime(record, fullSyncWatermark))
: normalizedPageRows;
let pageRowsToPersist = filteredPageRows;
if (hasDbConfig() && filteredPageRows.length > 0) {
const pageMsgIds = filteredPageRows.map((record) => record.msgId).filter(Boolean);
const existingIds = await getExistingMessageIds(pageMsgIds);
const fingerprintCandidates = filteredPageRows
.map((record) => String(record.receivedAt || record.gmtModified || record.gmtCreated || '').trim())
.filter(Boolean);
const existingFingerprintRows = await getExistingMessageFingerprints(fingerprintCandidates);
const existingFingerprints = new Set(
existingFingerprintRows.map((row) => buildMessageFingerprint({ title: row.title, receivedAt: row.received_at, orderNo: row.order_no })),
);
stopByExistingPage = filteredPageRows.length > 0 && filteredPageRows.every((record) => {
if (record.msgId) {
return existingIds.has(record.msgId);
}
return existingFingerprints.has(buildMessageFingerprint(record));
});
pageRowsToPersist = filteredPageRows.filter((record) => {
if (record.msgId) {
return !existingIds.has(record.msgId);
}
return !existingFingerprints.has(buildMessageFingerprint(record));
});
if (pageRowsToPersist.length > 0) {
await upsertMessages(pageRowsToPersist);
}
}
allNormalizedRecords.push(...pageRowsToPersist);
await saveMessagesCheckpoint(dataset, pageNum, allNormalizedRecords);
},
skipInitialPage: resumeFromPage > 0,
shouldStop: hot
? async ({ pageNum, pageRows }) => {
if (pageNum >= config.hotMessageMaxPagesPerRun) {
return true;
}
if (stopByExistingPage) {
return true;
}
if (!hotWatermark) {
return false;
}
const detailedPageRows = await enrichMessageRowsWithDetails(page, pageRows, pageNum);
const normalizedPageRows = normalizeDatasetRecords(dataset, detailedPageRows, {});
return normalizedPageRows.length > 0
&& normalizedPageRows.every((record) => !isAfterLatestMessageTime(record, hotWatermark));
}
: fullSyncWatermark
? async ({ pageNum, pageRows }) => {
const detailedPageRows = await enrichMessageRowsWithDetails(page, pageRows, pageNum);
return stopByExistingPage || shouldStopForFullSyncPage(detailedPageRows);
}
: undefined,
});
}
if (resumeFromPage === 0) {
allNormalizedRecords = normalizeDatasetRecords(dataset, records, {});
}
if (fullSyncWatermark) {
const before = allNormalizedRecords.length;
allNormalizedRecords = allNormalizedRecords.filter((record) => isAfterLatestMessageTime(record, fullSyncWatermark));
console.log(`[全量模式] 消息仅保留近三个月: ${before} -> ${allNormalizedRecords.length} (watermark=${formatDateTime(fullSyncWatermark)})`);
}
if ((incremental || hot) && hasDbConfig()) {
try {
const latestMessageTime = await getLatestMessageTimeFromDb();
if (latestMessageTime) {
const latest = parseDbDateTime(latestMessageTime);
if (latest) {
const watermark = hot
? addMinutes(latest, -config.hotMessageOverlapMinutes)
: subtractDays(latest, config.messageIncrementalOverlapDays);
const before = allNormalizedRecords.length;
allNormalizedRecords = allNormalizedRecords.filter((record) => isAfterLatestMessageTime(record, watermark));
console.log(`[${hot ? '高频模式' : '增量模式'}] 消息按时间过滤: ${before} -> ${allNormalizedRecords.length} (db_last=${latestMessageTime}, overlap=${hot ? `${config.hotMessageOverlapMinutes}m` : `${config.messageIncrementalOverlapDays}d`})`);
}
}
} catch (error) {
console.error(`[${hot ? '高频模式' : '增量模式'}] 查询数据库最新消息时间失败:`, error.message);
}
}
const previousState = loadCurrentState(dataset.name, dataset.uniqueKey);
return persistNormalizedDataset(dataset, dedupeByHash([...(previousState.records || []), ...allNormalizedRecords]));
}
async function saveMessagesCheckpoint(dataset, pageNum, normalizedRecords) {
const normalized = dedupeByHash(normalizedRecords);
saveCheckpoint(dataset.name, `page-${pageNum}`, {
pageNum,
savedAt: new Date().toISOString(),
stats: { total: normalized.length },
records: normalized,
});
console.log(`[消息检查点] 已落盘: page=${pageNum}, records=${normalized.length}`);
}
function loadLatestMessagesCheckpoint() {
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'messages');
if (!fs.existsSync(checkpointDir)) {
return null;
}
const candidates = fs.readdirSync(checkpointDir)
.filter((fileName) => fileName.endsWith('.json'))
.map((fileName) => {
const filePath = path.join(checkpointDir, fileName);
const stat = fs.statSync(filePath);
return { fileName, filePath, mtimeMs: stat.mtimeMs };
})
.sort((a, b) => b.mtimeMs - a.mtimeMs);
if (candidates.length === 0) {
return null;
}
try {
const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8'));
if (!latest || typeof latest !== 'object') {
return null;
}
return latest;
} catch (error) {
console.warn(`[消息检查点] 读取失败,忽略断点续爬: ${error.message}`);
return null;
}
}
async function moveMessagesToResumeStart(page, resumeFromPage) {
if (resumeFromPage <= 0) {
return true;
}
const reached = await jumpToPage(page, resumeFromPage);
if (!reached) {
throw new Error(`消息续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`);
}
const moved = await gotoNextPage(page);
return moved;
}
async function saveBillsCheckpoint(dataset, month, pageNum, normalizedRecords) {
const normalized = dedupeByHash(normalizedRecords);
const checkpointName = `${month}-latest`;
saveCheckpoint(dataset.name, checkpointName, {
month,
pageNum,
savedAt: new Date().toISOString(),
stats: {
total: normalized.length,
},
records: normalized,
});
console.log(`[账单检查点] 已落盘: month=${month}, page=${pageNum}, records=${normalized.length}`);
}
async function saveOrdersCheckpoint(dataset, window, pageNum, normalizedRecords) {
const normalized = dedupeByHash(normalizedRecords);
const checkpointName = `${window.start}_${window.end}`.replace(/[^0-9_-]/g, '-');
saveCheckpoint(dataset.name, checkpointName, {
windowStart: window.start,
windowEnd: window.end,
pageNum,
savedAt: new Date().toISOString(),
stats: {
total: normalized.length,
},
records: normalized,
});
console.log(`[订单检查点] 已落盘: ${window.start} ~ ${window.end}, page=${pageNum}, records=${normalized.length}`);
}
async function saveCustomersCheckpoint(dataset, pageNum, normalizedRecords) {
const normalized = dedupeByHash(normalizedRecords);
saveCheckpoint(dataset.name, `page-${pageNum}`, {
pageNum,
savedAt: new Date().toISOString(),
stats: { total: normalized.length },
records: normalized,
});
console.log(`[客户检查点] 已落盘: page=${pageNum}, records=${normalized.length}`);
}
function loadLatestCustomersCheckpoint() {
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'customers');
if (!fs.existsSync(checkpointDir)) {
return null;
}
const candidates = fs.readdirSync(checkpointDir)
.filter((fileName) => fileName.endsWith('.json'))
.map((fileName) => {
const filePath = path.join(checkpointDir, fileName);
const stat = fs.statSync(filePath);
return { fileName, filePath, mtimeMs: stat.mtimeMs };
})
.sort((a, b) => b.mtimeMs - a.mtimeMs);
if (candidates.length === 0) {
return null;
}
try {
const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8'));
if (!latest || typeof latest !== 'object') {
return null;
}
return latest;
} catch (error) {
console.warn(`[客户检查点] 读取失败,忽略断点续爬: ${error.message}`);
return null;
}
}
async function saveCustomerDetailsCheckpoint(dataset, currentIndex, records) {
const normalized = dedupeByHash(records);
saveCheckpoint(dataset.name, 'latest', {
currentIndex,
savedAt: new Date().toISOString(),
stats: { total: normalized.length },
records: normalized,
});
console.log(`[客户详情检查点] 已落盘: index=${currentIndex}, records=${normalized.length}`);
}
function loadLatestCustomerDetailsCheckpoint() {
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'customerDetails');
if (!fs.existsSync(checkpointDir)) {
return null;
}
const latestFile = path.join(checkpointDir, 'latest.json');
if (!fs.existsSync(latestFile)) {
const candidates = fs.readdirSync(checkpointDir).filter((fileName) => fileName.endsWith('.json'));
if (candidates.length === 0) {
return null;
}
return JSON.parse(fs.readFileSync(path.join(checkpointDir, candidates[0]), 'utf-8'));
}
try {
return JSON.parse(fs.readFileSync(latestFile, 'utf-8'));
} catch (error) {
console.warn(`[客户详情检查点] 读取失败: ${error.message}`);
return null;
}
}
async function saveOrderDetailsCheckpoint(dataset, currentIndex, records) {
const normalized = dedupeByHash(records);
saveCheckpoint(dataset.name, 'latest', {
currentIndex,
savedAt: new Date().toISOString(),
stats: { total: normalized.length },
records: normalized,
});
console.log(`[订单详情检查点] 已落盘: index=${currentIndex}, records=${normalized.length}`);
}
function loadLatestOrderDetailsCheckpoint() {
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'orderDetails');
if (!fs.existsSync(checkpointDir)) {
return null;
}
const latestFile = path.join(checkpointDir, 'latest.json');
if (!fs.existsSync(latestFile)) {
const candidates = fs.readdirSync(checkpointDir).filter((fileName) => fileName.endsWith('.json'));
if (candidates.length === 0) {
return null;
}
return JSON.parse(fs.readFileSync(path.join(checkpointDir, candidates[0]), 'utf-8'));
}
try {
return JSON.parse(fs.readFileSync(latestFile, 'utf-8'));
} catch (error) {
console.warn(`[订单详情检查点] 读取失败: ${error.message}`);
return null;
}
}
function normalizeDatasetRecords(dataset, records, context) {
return records.map((record) => withHash(dataset.normalize(record, record.__context || context)));
}
async function moveBillsToResumeStart(page, resumeFromPage) {
if (resumeFromPage <= 0) {
return true;
}
const reached = await jumpToPage(page, resumeFromPage);
if (!reached) {
throw new Error(`账单续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`);
}
const moved = await gotoNextPage(page);
return moved;
}
async function moveOrdersToResumeStart(page, resumeFromPage) {
if (resumeFromPage <= 0) {
return true;
}
const reached = await jumpToPage(page, resumeFromPage);
if (!reached) {
throw new Error(`订单续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`);
}
const moved = await gotoNextPage(page);
return moved;
}
async function moveCustomersToResumeStart(page, resumeFromPage) {
if (resumeFromPage <= 0) {
return true;
}
const reached = await jumpToPage(page, resumeFromPage);
if (!reached) {
throw new Error(`客户续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`);
}
const moved = await gotoNextPage(page);
return moved;
}
async function getLatestBillConsumptionDate() {
if (!hasDbConfig()) {
console.warn('[增量模式] 未配置数据库连接,无法读取账单水位,回退到当前日期');
return null;
}
try {
const latest = await getLatestBillConsumptionTimeFromDb();
if (!latest || !/^\d{4}-\d{2}-\d{2}/.test(latest)) {
return null;
}
const parsed = parseDbDateTime(latest);
if (!parsed) {
return latest.slice(0, 10);
}
return formatDate(subtractDays(parsed, config.billIncrementalOverlapDays));
} catch (error) {
console.error('[增量模式] 查询数据库最新账单消费时间失败:', error.message);
return null;
}
}
function isAfterLatestConsumptionDate(record, latestConsumptionDate) {
const consumeDate = String(record['消费时间'] || record.consumeDate || '').trim().slice(0, 10);
if (!/^\d{4}-\d{2}-\d{2}$/.test(consumeDate)) {
return false;
}
return consumeDate > latestConsumptionDate;
}
function isAfterLatestMessageTime(record, watermarkDate) {
const value = String(record['消息修改时间'] || record['修改时间'] || record.gmtModified || record['消息创建时间'] || record['创建时间'] || record.gmtCreated || '').trim();
if (!value) {
return false;
}
const parsed = parseDbDateTime(value);
if (!parsed) {
return false;
}
return parsed >= watermarkDate;
}
function extractMessageTime(record) {
const value = String(record['消息修改时间'] || record['修改时间'] || record.gmtModified || record['消息创建时间'] || record['创建时间'] || record.gmtCreated || '').trim();
if (!value) {
return null;
}
return parseDbDateTime(value);
}
function getMessagePageTimeStats(records) {
const parsedTimes = records
.map((record) => extractMessageTime(record))
.filter(Boolean)
.sort((a, b) => a.getTime() - b.getTime());
if (parsedTimes.length === 0) {
return {
total: records.length,
parsed: 0,
earliest: '',
latest: '',
};
}
return {
total: records.length,
parsed: parsedTimes.length,
earliest: formatDateTime(parsedTimes[0]),
latest: formatDateTime(parsedTimes[parsedTimes.length - 1]),
};
}
function buildMessageFingerprint(record) {
const title = String(record.title || record.detailTitle || record.column_1 || '').trim();
const receivedAt = String(record.receivedAt || record.gmtModified || record.gmtCreated || record.column_2 || '').trim();
const orderNo = String(record.orderNo || record.refundOrderNo || '').trim();
return `${title}__${receivedAt}__${orderNo}`;
}
async function syncOrderDetails(page, cachedOrderIds, options = {}) {
await runtimeCheckpoint('同步订单详情');
const dataset = datasets.orderDetails;
const resumeCheckpoint = options.resume ? loadLatestOrderDetailsCheckpoint() : null;
const ordersState = loadCurrentState('orders', datasets.orders.uniqueKey);
const orderTargets = collectOrderDetailTargets(ordersState.records || [], cachedOrderIds || []);
if (orderTargets.length === 0) {
console.log('[订单详情] 本地无订单定位数据,跳过');
return persistDataset(dataset, [], {});
}
console.log(`[订单详情] 共 ${orderTargets.length} 个订单需要获取详情`);
const allDetails = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : [];
const startIndex = Number.parseInt(String(resumeCheckpoint?.currentIndex || 0), 10) || 0;
if (startIndex > 0) {
console.log(`[订单详情续爬] 从 checkpoint 恢复: index=${startIndex}, records=${allDetails.length}`);
}
let currentListPage = 0;
let currentWindowKey = '';
let currentGroupKey = '';
await page.goto(datasets.orders.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, datasets.orders.heading);
await trySetPageSize(page, datasets.orders.pageSize);
for (let index = startIndex; index < orderTargets.length; index += 1) {
await runtimeCheckpoint(`订单详情 ${index + 1}/${orderTargets.length}`);
const target = orderTargets[index];
console.log(`[订单详情] ${index + 1}/${orderTargets.length} orderId=${target.orderId} page=${target.pageNum} window=${target.windowStart}~${target.windowEnd}`);
const nextWindowKey = `${target.windowStart}|${target.windowEnd}`;
if (target.windowStart && target.windowEnd && currentWindowKey !== nextWindowKey) {
await restoreOrderWindow(page, target.windowStart, target.windowEnd);
currentWindowKey = nextWindowKey;
currentListPage = 1;
}
const nextGroupKey = `${nextWindowKey}|${target.pageNum}`;
if (target.pageNum > 0 && (currentListPage !== target.pageNum || currentGroupKey !== nextGroupKey)) {
const reached = await jumpToOrderPage(page, target.pageNum);
if (!reached) {
console.warn(`[订单详情] 无法跳到第 ${target.pageNum} 页,跳过 ${target.orderId}`);
continue;
}
currentListPage = target.pageNum;
}
currentGroupKey = nextGroupKey;
const clicked = await clickOrderDetailFromListWithRetry(page, target);
if (!clicked) {
console.warn(`[订单详情] 列表中未找到 orderId=${target.orderId},跳过`);
continue;
}
try {
await page.waitForFunction(
(text) => document.body && document.body.innerText.includes(text),
'订单详情',
{ timeout: 15000 },
);
await sleep(1000);
} catch {
console.warn(`[订单详情] ${target.orderId} 详情页加载超时,跳过`);
await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null);
await waitForStableOrderList(page).catch(() => null);
continue;
}
const detail = await extractOrderDetail(page);
if (!isValidOrderId(detail.orderId)) {
detail.orderId = target.orderId;
}
const detailContext = { detailSyncedAt: new Date().toISOString() };
allDetails.push({ ...detail, __context: detailContext });
await saveOrderDetailsCheckpoint(dataset, index + 1, allDetails);
if (hasDbConfig()) {
const normalizedDetail = normalizeDatasetRecords(dataset, [{ ...detail, __context: detailContext }], detailContext);
await upsertOrderDetails(normalizedDetail);
}
await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null);
await waitForStableOrderList(page).catch(() => null);
currentListPage = target.pageNum;
}
return persistDataset(dataset, dedupeByHash(allDetails), {});
}
function persistDataset(dataset, records, context) {
const normalized = normalizeDatasetRecords(dataset, records, context);
return persistNormalizedDataset(dataset, normalized);
}
function persistNormalizedDataset(dataset, normalizedRecords) {
const previousState = loadCurrentState(dataset.name, dataset.uniqueKey);
const nextState = diffRecords(previousState, normalizedRecords, dataset.uniqueKey);
const stamp = saveDatasetRun(dataset.name, nextState);
saveDelta(dataset.name, stamp, nextState.delta);
return {
stamp,
stats: nextState.stats,
};
}
async function waitUntilReady(page, heading, timeout = 120000, options = {}) {
await runtimeCheckpoint(`等待页面 ${heading}`);
const { allowInteractiveAuth = false } = options;
await page.waitForLoadState('domcontentloaded');
console.log(`[waitUntilReady] 当前URL: ${page.url()}`);
console.log(`[waitUntilReady] 等待页面出现: "${heading}"`);
const initialState = await detectAuthRedirect(page);
if (initialState.isAuthPage) {
console.error(`[waitUntilReady] 检测到登录页/鉴权页: ${initialState.currentUrl}`);
console.error(`[waitUntilReady] 页面内容前500字: ${initialState.bodyText}`);
if (!allowInteractiveAuth && isAuthUrl(initialState.currentUrl)) {
try {
await sendLoginAlert(initialState.currentUrl);
} catch (notifyErr) {
console.error('[通知] 发送登录提醒失败:', notifyErr.message);
}
}
if (!allowInteractiveAuth) {
throw new Error(`当前页面仍处于登录/鉴权页,无法进入「${heading}」。请重新执行 npm run login并确认该账号对该页面有访问权限。`);
}
console.log(`[waitUntilReady] 允许交互式登录,等待用户完成认证后进入「${heading}」...`);
}
try {
await page.waitForFunction(
(text) => document.body && document.body.innerText.includes(text),
heading,
{ timeout },
);
} catch (err) {
// 超时时打印诊断信息
const { currentUrl, bodyText, isAuthPage } = await detectAuthRedirect(page);
console.error(`[waitUntilReady] 超时当前URL: ${currentUrl}`);
console.error(`[waitUntilReady] 页面内容前500字: ${bodyText}`);
if (isAuthPage && !allowInteractiveAuth) {
try {
await sendLoginAlert(currentUrl);
} catch (notifyErr) {
console.error('[通知] 发送登录提醒失败:', notifyErr.message);
}
throw new Error(`当前页面停留在登录/鉴权页,未能进入「${heading}」。请重新执行 npm run login并确认该账号对该页面有访问权限。`);
}
if (isAuthPage && allowInteractiveAuth) {
throw new Error(`交互式登录超时,仍未进入「${heading}」。请确认已在浏览器中完成 RAM/阿里云登录,并且当前账号有访问该页面的权限。`);
}
throw err;
}
const finalState = await detectAuthRedirect(page);
if (finalState.isAuthPage && !allowInteractiveAuth) {
throw new Error(`当前页面仍处于登录/鉴权页,未成功进入「${heading}」。请重新执行 npm run login并确认该账号对该页面有访问权限。`);
}
await sleep(1500);
}
async function scrapePagedTable(page, dataset, context, options = {}) {
const { onPage, skipInitialPage = false, shouldStop } = options;
const pages = [];
const visited = new Set();
let shouldSkipCurrentPage = skipInitialPage;
const describeStopReason = (reason) => {
if (!reason) {
return 'unknown';
}
const details = [];
if (reason.beforePage != null) {
details.push(`before=${reason.beforePage}`);
}
if (reason.afterPage != null) {
details.push(`after=${reason.afterPage}`);
}
return details.length > 0 ? `${reason.code} (${details.join(', ')})` : reason.code;
};
while (true) {
await runtimeCheckpoint(`抓取 ${dataset.name} 分页`);
await waitForTableRows(page);
const pageData = await extractTable(page);
const pageNum = await currentPageNumber(page);
const pageKey = `${pageNum}-${pageData.rows.length}`;
console.log(`[抓取] 第${pageNum}页, ${pageData.rows.length}行, key="${pageKey}"`);
if (shouldSkipCurrentPage) {
console.log(`[抓取] 跳过 checkpoint 已保存页: ${pageNum}`);
shouldSkipCurrentPage = false;
const { moved, reason } = await gotoNextPage(page);
if (!moved) {
console.log(`[抓取] checkpoint 已停止续爬: ${describeStopReason(reason)}`);
break;
}
continue;
}
if (visited.has(pageKey)) {
console.log(`[抓取] 重复页面key停止翻页`);
break;
}
visited.add(pageKey);
const pageRows = pageData.rows.map((row) => ({ ...row, __context: { ...context, pageNum } }));
pages.push(...pageRows);
if (onPage) {
await onPage({ pageData, pageNum, pageRows });
}
if (shouldStop && await shouldStop({ pageData, pageNum, pageRows, pages })) {
console.log(`[抓取] 满足停止条件,在第${pageNum}页提前停止`);
break;
}
const { moved, reason } = await gotoNextPage(page);
if (!moved) {
console.log(`[抓取] 停止翻页: ${describeStopReason(reason)}`);
break;
}
}
console.log(`[抓取] 共采集 ${pages.length} 条记录`);
return pages;
}
async function raiseIfSessionExpired(page, label) {
const { currentUrl, bodyText, isAuthPage } = await detectAuthRedirect(page);
if (!isAuthPage) {
return;
}
console.error(`[鉴权] ${label} 时检测到登录页/鉴权页: ${currentUrl}`);
console.error(`[鉴权] 页面内容前500字: ${bodyText}`);
try {
await sendLoginAlert(currentUrl);
} catch (notifyErr) {
console.error('[通知] 发送登录提醒失败:', notifyErr.message);
}
throw new Error(`运行过程中登录态失效(${label})。请重新执行 npm run login 后再继续同步。`);
}
async function extractTable(page) {
return page.evaluate(() => {
const normalize = (value) =>
String(value || '')
.replace(/\u00a0/g, ' ')
.replace(/\s+\n/g, '\n')
.replace(/\n\s+/g, '\n')
.trim();
const toRecords = (headers, rows) => rows
.map((cells) => cells.map((cell) => normalize(cell)))
.filter((cells) => cells.some(Boolean))
.map((cells) => {
const record = {};
const keys = headers.length ? headers : cells.map((_, index) => `column_${index + 1}`);
keys.forEach((header, index) => {
record[header || `column_${index + 1}`] = cells[index] || '';
});
return record;
});
const extractFromNativeTables = () => {
const headerTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('thead th').length > 1);
const headerTable = headerTables.sort((a, b) => b.querySelectorAll('thead th').length - a.querySelectorAll('thead th').length)[0];
const headers = headerTable
? Array.from(headerTable.querySelectorAll('thead th')).map((cell) => normalize(cell.textContent))
: [];
const bodyTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('tbody tr').length > 0);
const bodyTable = bodyTables.sort((a, b) => {
const aSize = Math.max(...Array.from(a.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0);
const bSize = Math.max(...Array.from(b.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0);
return bSize - aSize;
})[0];
if (!bodyTable) {
return { headers, rows: [] };
}
const rows = Array.from(bodyTable.querySelectorAll('tbody tr'))
.map((row) => Array.from(row.querySelectorAll('td')).map((cell) => normalize(cell.innerText || cell.textContent)));
return { headers, rows: toRecords(headers, rows) };
};
const extractFromNextTable = () => {
const container = document.querySelector('.next-table, .next-table-inner, [class*="next-table"]');
if (!container) {
return { headers: [], rows: [] };
}
const headers = Array.from(container.querySelectorAll('.next-table-header .next-table-cell, .next-table-header th, [role="columnheader"]'))
.map((cell) => normalize(cell.innerText || cell.textContent))
.filter(Boolean);
const rowCandidates = Array.from(container.querySelectorAll('.next-table-body .next-table-row, .next-table-row, [role="row"]'));
const rows = rowCandidates
.map((row) => {
const cells = Array.from(row.querySelectorAll('.next-table-cell, [role="gridcell"], [role="cell"], td'))
.map((cell) => normalize(cell.innerText || cell.textContent));
return cells;
})
.filter((cells) => cells.length > 0 && cells.some(Boolean));
return { headers, rows: toRecords(headers, rows) };
};
const nativeResult = extractFromNativeTables();
if (nativeResult.rows.length > 0) {
return nativeResult;
}
const nextTableResult = extractFromNextTable();
if (nextTableResult.rows.length > 0) {
return nextTableResult;
}
return nextTableResult.headers.length > 0 ? nextTableResult : nativeResult;
});
}
function isTargetClosedError(error) {
const message = String(error?.message || error || '');
return message.includes('Target page, context or browser has been closed');
}
function assertPageAvailable(page, label) {
if (!page || page.isClosed?.()) {
throw new Error(`页面在${label}前已被关闭。请检查是否手动关闭了浏览器,或浏览器是否异常退出,然后重新执行同步。`);
}
}
async function waitForTableRows(page) {
await runtimeCheckpoint('等待表格加载');
assertPageAvailable(page, '等待表格加载');
try {
await page.waitForFunction(() => {
const nativeRows = document.querySelectorAll('table tbody tr').length;
const nextRows = document.querySelectorAll('.next-table-body .next-table-row, .next-table-row, [role="row"]').length;
const emptyState = document.querySelector('.next-table-empty, .next-empty, [class*="empty"], [class*="no-data"]');
return nativeRows > 0 || nextRows > 0 || Boolean(emptyState);
}, null, { timeout: 120000 });
} catch (error) {
if (isTargetClosedError(error)) {
throw new Error('等待消息表格加载时,浏览器页面已被关闭。请勿手动关闭浏览器窗口,并检查浏览器是否异常退出后重试。');
}
await raiseIfSessionExpired(page, '等待表格加载');
throw error;
}
await sleep(800);
}
async function currentPageNumber(page) {
assertPageAvailable(page, '读取当前页码');
try {
const active = page.locator('.next-pagination-item.next-current');
if ((await active.count()) === 0) return 1;
return Number.parseInt((await active.first().innerText()).trim(), 10) || 1;
} catch (error) {
if (isTargetClosedError(error)) {
throw new Error('读取分页页码时,浏览器页面已被关闭。请勿手动关闭浏览器窗口,并检查浏览器是否异常退出后重试。');
}
throw error;
}
}
async function jumpToPage(page, targetPage, options = {}) {
const { allowSequentialFallback = true } = options;
if (targetPage <= 1) {
return true;
}
const current = await currentPageNumber(page);
if (current === targetPage) {
return true;
}
const jumpInputCandidates = [
'.next-pagination-jump-input input',
'input[aria-label*="页码"]',
'input[aria-label*="页"]',
];
for (const selector of jumpInputCandidates) {
const input = page.locator(selector).first();
if ((await input.count()) === 0) {
continue;
}
await input.click().catch(() => null);
await sleep(100);
await page.keyboard.press('Control+A').catch(() => null);
await page.keyboard.type(String(targetPage), { delay: 20 }).catch(() => null);
await page.keyboard.press('Enter').catch(() => null);
await sleep(1500);
const afterJump = await currentPageNumber(page);
if (afterJump === targetPage) {
console.log(`[跳页] 已跳转到第 ${targetPage}`);
return true;
}
}
if (!allowSequentialFallback) {
console.warn(`[跳页] 未找到可用跳页输入框,且当前模式禁止顺序兜底: target=${targetPage}`);
return false;
}
console.warn(`[跳页] 未找到可用跳页输入框,尝试顺序翻到第 ${targetPage}`);
let guard = 0;
while (guard < targetPage + 5) {
const currentPage = await currentPageNumber(page);
if (currentPage >= targetPage) {
return currentPage === targetPage;
}
const { moved } = await gotoNextPage(page);
if (!moved) {
return false;
}
guard += 1;
}
return false;
}
async function gotoNextPage(page) {
await runtimeCheckpoint('翻页');
assertPageAvailable(page, '翻页');
const before = await currentPageNumber(page);
try {
// 用 Playwright locator 定位"下一页"按钮
const nextBtn = page.locator('button.next-pagination-item.next-next');
if ((await nextBtn.count()) === 0) {
return {
moved: false,
reason: { code: 'next_button_missing', beforePage: before },
};
}
const disabled = (await nextBtn.getAttribute('disabled')) != null;
if (disabled) {
return {
moved: false,
reason: { code: 'next_button_disabled', beforePage: before },
};
}
// 用 Playwright click而非 DOM click确保 React 事件正常触发
await nextBtn.click();
await sleep(2000);
await raiseIfSessionExpired(page, `翻页 ${before} -> next`);
const after = await currentPageNumber(page);
console.log(`[翻页] ${before} -> ${after}`);
if (before > 1 && after === 1) {
throw new Error(`分页从第 ${before} 页异常回退到第 1 页,疑似登录态失效或页面会话已重置。请重新执行 npm run login 后再继续同步。`);
}
if (after < before) {
throw new Error(`分页从第 ${before} 页异常回退到第 ${after} 页,疑似登录态失效或页面状态被重置。请重新执行 npm run login 后再继续同步。`);
}
if (before === after) {
const fallbackTarget = before + 1;
console.warn(`[翻页] next 点击后页码未推进,尝试跳页到 ${fallbackTarget}`);
const jumped = await jumpToPage(page, fallbackTarget, { allowSequentialFallback: false });
if (jumped) {
const afterJump = await currentPageNumber(page);
console.log(`[翻页] fallback jump ${before} -> ${afterJump}`);
return {
moved: true,
reason: { code: 'advanced_via_jump', beforePage: before, afterPage: afterJump },
};
}
return {
moved: false,
reason: { code: 'page_number_not_advanced', beforePage: before, afterPage: after },
};
}
return {
moved: true,
reason: { code: 'advanced', beforePage: before, afterPage: after },
};
} catch (error) {
if (isTargetClosedError(error)) {
throw new Error(`翻页到下一页时,浏览器页面在第 ${before} 页之后被关闭。请勿手动关闭浏览器窗口,并检查浏览器是否异常退出后重试。`);
}
throw error;
}
}
async function trySetPageSize(page, pageSize) {
await runtimeCheckpoint(`设置每页 ${pageSize}`);
const input = page.locator('input[aria-label="请选择每页显示几条"]').first();
if ((await input.count()) === 0) return;
await input.click().catch(() => null);
await sleep(300);
const option = page.locator(`text=${pageSize}`).last();
if ((await option.count()) === 0) {
await page.keyboard.press('Escape').catch(() => null);
return;
}
await option.click().catch(() => null);
await sleep(1200);
}
async function setDateRange(page, start, end) {
await runtimeCheckpoint(`设置订单日期 ${start} ~ ${end}`);
console.log(`[订单日期] 设置: ${start} ~ ${end}`);
await _fillDateRange(page, start, end);
// 验证
const startActual = await page.locator('input[placeholder="起始日期"]').inputValue().catch(() => '');
const endActual = await page.locator('input[placeholder="结束日期"]').inputValue().catch(() => '');
// 如果结果不对,用反向顺序重试(先填开始再填结束)
if (startActual !== start || endActual !== end) {
console.log(`[订单日期] 首次结果不对: "${startActual}" ~ "${endActual}",反向重试`);
await _fillDateRange(page, start, end, true);
const s2 = await page.locator('input[placeholder="起始日期"]').inputValue().catch(() => '');
const e2 = await page.locator('input[placeholder="结束日期"]').inputValue().catch(() => '');
console.log(`[订单日期] 重试结果: "${s2}" ~ "${e2}"`);
} else {
console.log(`[订单日期] 结果: "${startActual}" ~ "${endActual}"`);
}
}
async function _fillDateRange(page, start, end, startFirst = false) {
await runtimeCheckpoint('填写订单日期');
const trigger = page.locator('input[placeholder="结束日期"]');
await trigger.click();
await sleep(1000);
const panelStartInput = page.locator('.next-range-picker-panel-input-start-date input');
const panelEndInput = page.locator('.next-range-picker-panel-input-end-date input');
if (startFirst) {
// 先填开始日期
await panelStartInput.click();
await sleep(100);
await page.keyboard.press('Control+A');
await page.keyboard.type(start, { delay: 30 });
await sleep(300);
// 再填结束日期
await panelEndInput.click();
await sleep(100);
await page.keyboard.press('Control+A');
await page.keyboard.type(end, { delay: 30 });
await sleep(300);
} else {
// 先填结束日期(默认)
await panelEndInput.click();
await sleep(100);
await page.keyboard.press('Control+A');
await page.keyboard.type(end, { delay: 30 });
await sleep(300);
// 再填开始日期
await panelStartInput.click();
await sleep(100);
await page.keyboard.press('Control+A');
await page.keyboard.type(start, { delay: 30 });
await sleep(300);
}
await page.keyboard.press('Enter');
await sleep(500);
await page.mouse.click(0, 0);
await sleep(300);
await page.keyboard.press('Escape');
await sleep(300);
await page.locator('.next-overlay-wrapper.opened').waitFor({ state: 'hidden', timeout: 3000 }).catch(() => null);
await sleep(300);
}
async function setMonthValue(page, month) {
await runtimeCheckpoint(`设置账单月份 ${month}`);
// 先尝试按 inputValue 匹配 YYYY-MM 格式
const inputs = page.locator('input');
const total = await inputs.count();
const allValues = [];
for (let index = 0; index < total; index += 1) {
const input = inputs.nth(index);
const value = await input.inputValue().catch(() => '');
const placeholder = await input.getAttribute('placeholder').catch(() => '');
allValues.push({ index, value, placeholder });
if (/^\d{4}-\d{2}$/.test(value)) {
console.log(`[账单月份] 通过 value 匹配到 input[${index}], 设置: ${month}`);
await typeIntoDateInput(input, month, page);
return;
}
}
// 如果 value 为空,尝试按 placeholder 匹配月份选择器
for (const item of allValues) {
if (item.placeholder && /月/.test(item.placeholder)) {
console.log(`[账单月份] 通过 placeholder 匹配到 input[${item.index}], 设置: ${month}`);
await typeIntoDateInput(inputs.nth(item.index), month, page);
return;
}
}
// 兜底:找任何看起来像日期/月份选择器的 input排除搜索框等
for (const item of allValues) {
const input = inputs.nth(item.index);
const cls = await input.evaluate((el) => el.closest('[class*="date-picker"], [class*="month-picker"], [class*="range-picker"]')?.className || '').catch(() => '');
if (cls) {
console.log(`[账单月份] 通过父级 class 匹配到 input[${item.index}] (${cls}), 设置: ${month}`);
await typeIntoDateInput(input, month, page);
return;
}
}
console.error('[DEBUG] 账单页面所有 input:', JSON.stringify(allValues, null, 2));
throw new Error('未识别到账单佣金月份输入框,请打开页面确认结构是否变化。');
}
/**
* 用键盘输入日期值。
* 策略focus → 全选 → 快速键入 → Tab 移开焦点(触发 blur 提交,但不会像 click 那样打开面板)。
* 即使面板弹出,快速键入 + Tab 也能在面板滚动前完成提交并关闭。
*/
async function typeIntoDateInput(locator, value, page) {
await runtimeCheckpoint(`填写日期输入 ${value}`);
// 移除 readonly
await locator.evaluate((node) => node.removeAttribute('readonly'));
// focus 并全选当前内容
await locator.focus();
await sleep(100);
await page.keyboard.press('Control+A');
await sleep(100);
// 快速逐字符输入新值
await page.keyboard.type(value, { delay: 30 });
await sleep(200);
// Tab 移开焦点 → 触发 onBlur 提交值 + 关闭面板
await page.keyboard.press('Tab');
await sleep(300);
// 如果面板还在Escape 兜底关闭
await page.keyboard.press('Escape');
await sleep(300);
// 验证
const actual = await locator.inputValue().catch(() => '');
if (actual !== value) {
console.warn(`[WARN] typeIntoDateInput: 期望 "${value}",实际 "${actual}"`);
} else {
console.log(`[日期设置] 成功: "${value}"`);
}
}
async function clickQuery(page) {
await runtimeCheckpoint('点击查询');
const button = page.locator('button:has-text("查询")').first();
await button.scrollIntoViewIfNeeded().catch(() => null);
await button.evaluate((node) => {
node.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
}).catch(() => null);
try {
await button.click({ timeout: 8000 });
} catch (error) {
console.warn(`[查询按钮] click 失败,尝试 Enter 兜底: ${error.message}`);
await button.focus().catch(() => null);
await page.keyboard.press('Enter');
}
await sleep(1800);
}
function buildMonthlyDateWindows(startDate) {
const start = new Date(`${startDate}T00:00:00+08:00`);
const end = new Date();
const windows = [];
const cursor = new Date(start.getFullYear(), start.getMonth(), 1);
while (cursor <= end) {
const windowStart = new Date(cursor);
const windowEnd = new Date(cursor.getFullYear(), cursor.getMonth() + 1, 0);
const actualEnd = windowEnd > end ? end : windowEnd;
windows.push({
windowStart: formatDate(windowStart),
windowEnd: formatDate(actualEnd),
start: formatDate(windowStart),
end: formatDate(actualEnd),
});
cursor.setMonth(cursor.getMonth() + 1);
}
return windows;
}
function buildMonthList(startMonth) {
const [year, month] = startMonth.split('-').map(Number);
const cursor = new Date(year, month - 1, 1);
const end = new Date();
const months = [];
while (cursor <= end) {
months.push(`${cursor.getFullYear()}-${String(cursor.getMonth() + 1).padStart(2, '0')}`);
cursor.setMonth(cursor.getMonth() + 1);
}
return months;
}
function formatDate(date) {
return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')}`;
}
function dedupeByHash(records) {
const seen = new Set();
return records.filter((record) => {
const key = JSON.stringify(record);
if (seen.has(key)) return false;
seen.add(key);
return true;
});
}
function collectValidOrderIds(records) {
const ids = [];
const seen = new Set();
for (const record of records) {
// 支持两种字段名normalized 后的 orderId 和原始的 订单号
const rawOrderId = String(record.orderId || record['订单号'] || '').trim();
if (!rawOrderId || rawOrderId.includes('没有数据')) {
continue;
}
if (!isValidOrderId(rawOrderId)) {
console.log(`[订单详情] 跳过无效订单号: ${rawOrderId}`);
continue;
}
if (seen.has(rawOrderId)) {
continue;
}
seen.add(rawOrderId);
ids.push(rawOrderId);
}
return ids;
}
function collectValidAccountIds(records) {
const ids = [];
const seen = new Set();
for (const record of records) {
const rawAccountId = String(record.accountId || '').trim();
if (!rawAccountId || rawAccountId.includes('没有数据')) {
continue;
}
if (!isValidAccountId(rawAccountId)) {
console.log(`[客户详情] 跳过无效 accountId: ${rawAccountId}`);
continue;
}
if (seen.has(rawAccountId)) {
continue;
}
seen.add(rawAccountId);
ids.push(rawAccountId);
}
return ids;
}
function collectCustomerDetailTargets(records) {
const targets = [];
const seen = new Set();
for (const record of records) {
const accountId = String(record.accountId || '').trim();
const loginName = String(record.loginName || '').trim();
const pageNum = Number.parseInt(String(record.listPageNum || 0), 10) || 0;
if (!accountId || !isValidAccountId(accountId) || pageNum <= 0) {
continue;
}
if (seen.has(accountId)) {
continue;
}
seen.add(accountId);
targets.push({ accountId, loginName, pageNum });
}
return targets.sort((a, b) => a.pageNum - b.pageNum);
}
function collectOrderDetailTargets(records, cachedOrderIds = []) {
const allowSet = new Set((cachedOrderIds || []).map((value) => String(value || '').trim()).filter(Boolean));
const targets = [];
const seen = new Set();
for (const record of records) {
const orderId = String(record.orderId || '').trim();
const pageNum = Number.parseInt(String(record.listPageNum || 0), 10) || 0;
const windowStart = String(record.windowStart || '').trim();
const windowEnd = String(record.windowEnd || '').trim();
if (!orderId || !isValidOrderId(orderId) || pageNum <= 0) {
continue;
}
if (allowSet.size > 0 && !allowSet.has(orderId)) {
continue;
}
if (seen.has(orderId)) {
continue;
}
seen.add(orderId);
targets.push({ orderId, pageNum, windowStart, windowEnd });
}
return targets.sort((a, b) => {
const windowCompare = `${a.windowStart}|${a.windowEnd}`.localeCompare(`${b.windowStart}|${b.windowEnd}`);
if (windowCompare !== 0) {
return windowCompare;
}
return a.pageNum - b.pageNum;
});
}
async function clickCustomerDetailFromList(page, target) {
const clicked = await page.evaluate(({ accountId, loginName }) => {
const normalize = (value) => String(value || '').replace(/\s+/g, '').trim();
const rows = Array.from(document.querySelectorAll('table tbody tr'));
const targetRow = rows.find((row) => {
const text = normalize(row.innerText || row.textContent || '');
return text.includes(accountId) || (loginName && text.includes(loginName));
});
if (!targetRow) {
return false;
}
const detailButton = Array.from(targetRow.querySelectorAll('button, a, span'))
.find((node) => /详情/.test(String(node.textContent || '').trim()));
if (!detailButton) {
return false;
}
detailButton.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
detailButton.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }));
return true;
}, target).catch(() => false);
if (clicked) {
await sleep(1200);
}
return clicked;
}
async function clickCustomerDetailFromListWithRetry(page, target) {
const attempts = [target.pageNum, Math.max(1, target.pageNum - 1), target.pageNum + 1];
for (const pageNum of attempts) {
if (pageNum > 0 && pageNum !== target.pageNum) {
const reached = await jumpToCustomerPage(page, pageNum);
if (!reached) {
continue;
}
await waitForStableCustomerList(page);
}
const clicked = await clickCustomerDetailFromList(page, target);
if (clicked) {
return true;
}
}
return false;
}
async function clickOrderDetailFromList(page, target) {
const clicked = await page.evaluate(({ orderId }) => {
const normalize = (value) => String(value || '').replace(/\s+/g, '').trim();
const rows = Array.from(document.querySelectorAll('table tbody tr'));
const targetRow = rows.find((row) => {
const text = normalize(row.innerText || row.textContent || '');
return text.includes(orderId);
});
if (!targetRow) {
return false;
}
const detailButton = Array.from(targetRow.querySelectorAll('button, a, span'))
.find((node) => /详情/.test(String(node.textContent || '').trim()));
if (!detailButton) {
return false;
}
detailButton.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
detailButton.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }));
return true;
}, target).catch(() => false);
if (clicked) {
await sleep(1200);
}
return clicked;
}
async function clickOrderDetailFromListWithRetry(page, target) {
const attempts = [target.pageNum, Math.max(1, target.pageNum - 1), target.pageNum + 1];
for (const pageNum of attempts) {
if (pageNum > 0 && pageNum !== target.pageNum) {
const reached = await jumpToOrderPage(page, pageNum);
if (!reached) {
continue;
}
await waitForStableOrderList(page);
}
const clicked = await clickOrderDetailFromList(page, target);
if (clicked) {
return true;
}
}
return false;
}
async function jumpToCustomerPage(page, pageNum) {
const reached = await jumpToPage(page, pageNum);
if (reached) {
console.log(`[客户详情] 已跳转到第 ${pageNum}`);
}
return reached;
}
async function jumpToOrderPage(page, pageNum) {
const reached = await jumpToPage(page, pageNum);
if (reached) {
console.log(`[订单详情] 已跳转到第 ${pageNum}`);
}
return reached;
}
async function waitForStableCustomerList(page) {
await waitForTableRows(page).catch(() => null);
await sleep(600);
await waitForTableRows(page).catch(() => null);
}
async function waitForStableOrderList(page) {
await waitForTableRows(page).catch(() => null);
await sleep(600);
await waitForTableRows(page).catch(() => null);
}
async function clickMessageDetailButton(page, rowText, rowIndex) {
const clicked = await page.evaluate(({ rowTextValue, rowIndexValue }) => {
const normalize = (value) => String(value || '').replace(/\s+/g, ' ').trim();
const target = normalize(rowTextValue);
const rows = Array.from(document.querySelectorAll('.next-table-row, table tbody tr, [role="row"]'))
.filter((row) => normalize(row.innerText || row.textContent || ''));
const row = rows[rowIndexValue];
if (!row) {
return { clicked: false, reason: 'row_not_found', rowCount: rows.length };
}
const rowTextActual = normalize(row.innerText || row.textContent || '');
const clickableNodes = Array.from(row.querySelectorAll('button, a, [role="button"], .next-btn-text'));
const preferred = clickableNodes.find((node) => {
const text = normalize(node.innerText || node.textContent || '');
return text && rowTextActual.includes(text);
}) || clickableNodes[0];
if (!preferred) {
return { clicked: false, reason: 'clickable_node_not_found', rowTextActual };
}
preferred.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
preferred.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }));
return {
clicked: true,
buttonText: normalize(preferred.innerText || preferred.textContent || ''),
rowTextActual,
matched: rowTextActual.includes(target),
};
}, { rowTextValue: rowText, rowIndexValue: rowIndex }).catch(() => ({ clicked: false, reason: 'evaluate_failed' }));
if (clicked.clicked) {
await sleep(1200);
}
return clicked;
}
async function waitForMessageDetailDrawer(page) {
await page.waitForFunction(() => {
const header = document.querySelector('.next-drawer-header');
const body = document.querySelector('.next-drawer-body');
return !!header && !!body && String(header.textContent || '').includes('消息详情');
}, null, { timeout: 15000 });
await sleep(600);
}
async function extractMessageDetail(page) {
return page.evaluate(() => {
const normalize = (value) => String(value || '').replace(/\u00a0/g, ' ').trim();
const header = normalize(document.querySelector('.next-drawer-header')?.innerText || '');
const body = normalize(document.querySelector('.next-drawer-body')?.innerText || '');
const lines = body.split(/\r?\n/).map((line) => normalize(line)).filter(Boolean);
const firstLine = lines[0] || '';
const extract = (label) => {
const line = lines.find((item) => item.startsWith(`${label}:`) || item.startsWith(`${label}`));
if (!line) return '';
return normalize(line.replace(`${label}:`, '').replace(`${label}`, ''));
};
const match = (pattern) => {
const matched = body.match(pattern);
return matched?.[1] ? normalize(matched[1]) : '';
};
const accountIdMatches = Array.from(body.matchAll(/账号ID[:]?(\d{6,})/g)).map((item) => normalize(item[1])).filter(Boolean);
const classification = (() => {
if (/退款/.test(header) || /退款/.test(body)) return 'refund';
if (/释放预警/.test(header) || /预计于【.*】释放/.test(body)) return 'release_warning';
if (/释放通知/.test(header) || /已释放/.test(body)) return 'release_notice';
if (/未支付提醒/.test(header) || /未支付/.test(body)) return 'unpaid_reminder';
if (/取消通知/.test(header) || /取消了一笔未支付订单/.test(body)) return 'order_cancel';
if (/余额-预警通知/.test(header) || /账户现金余额/.test(body)) return 'balance_warning';
if (/关联成功/.test(header) || /关联关系已完成建立/.test(body)) return 'association_success';
if (/注册成功/.test(header) || /受邀注册UID/.test(body)) return 'registration_success';
if (/变更已超期/.test(header) || /变更申请已超期/.test(body)) return 'change_overdue';
return 'general';
})();
const detailContent = lines.filter((line) => !/^(接收时间|客户账号|订单号|退款订单号|订单金额|退款金额|客户下单时间|退款时间|受邀注册UID)[:]/.test(line));
return {
detailTitle: firstLine || header,
detailContent: body,
receivedAt: extract('接收时间'),
customerName: extract('客户账号'),
customerNo: extract('客户账号') || match(/贵司的代付(?:关联)?客户【[^/]+\/(\d{6,})】/) || match(/受邀注册UID[:]?(\d{6,})/) || accountIdMatches[0] || '',
orderNo: extract('订单号') || extract('退款订单号'),
orderAmount: extract('订单金额'),
customerOrderTime: extract('客户下单时间'),
refundOrderNo: extract('退款订单号'),
refundAmount: extract('退款金额'),
refundTime: extract('退款时间'),
invitedRegisterUid: extract('受邀注册UID') || match(/受邀注册UID[:]?(\d{6,})/),
accountIds: accountIdMatches.join(','),
messageClassification: classification,
status: '未读',
title: firstLine || header,
content: detailContent.join('\n'),
};
});
}
async function closeMessageDetailDrawer(page) {
const closeButton = page.locator('.next-drawer-close, .next-dialog-close, .next-icon-close').first();
if ((await closeButton.count()) > 0) {
await closeButton.click().catch(() => null);
} else {
await page.keyboard.press('Escape').catch(() => null);
}
await page.waitForFunction(() => !document.querySelector('.next-drawer-header'), null, { timeout: 10000 }).catch(() => null);
await sleep(400);
}
async function fetchMessageApiRows(page, pageNum, pageSize) {
return page.evaluate(async ({ currentPage, currentPageSize }) => {
const response = await fetch(`/api/taskapi/msgbox/queryUserMsg.json?lv2CategoryId=0&pageNo=${currentPage}&pageSize=${currentPageSize}`, {
credentials: 'include',
});
const payload = await response.json();
return Array.isArray(payload?.data?.list) ? payload.data.list : [];
}, { currentPage: pageNum, currentPageSize: pageSize }).catch(() => []);
}
function stripHtmlTags(value) {
return String(value || '')
.replace(/<br\s*\/?>(\r?\n)?/gi, '\n')
.replace(/<\/div>/gi, '\n')
.replace(/<\/p>/gi, '\n')
.replace(/<[^>]+>/g, '')
.replace(/&nbsp;/gi, ' ')
.replace(/\r/g, '')
.split('\n')
.map((line) => line.trim())
.filter(Boolean)
.join('\n');
}
function mapApiMessageRecord(record) {
if (!record || typeof record !== 'object') {
return null;
}
return {
msgId: String(record.id || '').trim(),
title: String(record.title || '').trim(),
detailTitle: String(record.title || '').trim(),
detailContent: stripHtmlTags(record.htmlContent || record.content || ''),
content: stripHtmlTags(record.htmlContent || record.content || ''),
fromApp: String(record.fromApp || '').trim(),
bizCode: String(record.bizCode || '').trim(),
msgChannel: String(record.msgChannel || '').trim(),
categoryName: String(record.categoryName || '').trim(),
categoryId: String(record.lv3CategoryId || '').trim(),
lv1CategoryId: String(record.lv1CategoryId || '').trim(),
lv2CategoryId: String(record.lv2CategoryId || '').trim(),
lv3CategoryId: String(record.lv3CategoryId || '').trim(),
gmtCreated: record.createDate ? formatDateTime(new Date(record.createDate)) : '',
gmtModified: record.updateDate ? formatDateTime(new Date(record.updateDate)) : '',
status: Number(record.isRead) === 1 ? '已读' : '未读',
};
}
async function enrichMessageRowsWithDetails(page, pageRows, pageNum) {
const enrichedRows = [];
let detailSuccess = 0;
let detailFailed = 0;
const apiRows = await fetchMessageApiRows(page, pageNum, datasets.messages.pageSize);
for (let index = 0; index < pageRows.length; index += 1) {
const row = pageRows[index];
const rowText = String(row['消息标题'] || row['标题'] || row.title || row.column_1 || '').trim();
if (!rowText) {
enrichedRows.push(row);
continue;
}
const apiDetail = mapApiMessageRecord(apiRows[index]);
if (apiDetail?.msgId) {
detailSuccess += 1;
enrichedRows.push({ ...row, ...apiDetail });
continue;
}
const clicked = await clickMessageDetailButton(page, rowText, index);
if (!clicked.clicked) {
detailFailed += 1;
console.warn(`[消息详情] 打开失败: pageRow=${index + 1}, title="${rowText}", reason=${clicked.reason || 'unknown'}`);
enrichedRows.push(row);
continue;
}
try {
await waitForMessageDetailDrawer(page);
const detail = await extractMessageDetail(page);
detailSuccess += 1;
enrichedRows.push({ ...row, ...detail });
} catch (error) {
detailFailed += 1;
console.warn(`[消息详情] 提取失败: pageRow=${index + 1}, title="${rowText}", error=${error.message}`);
enrichedRows.push(row);
} finally {
await closeMessageDetailDrawer(page);
await waitForTableRows(page).catch(() => null);
}
}
console.log(`[消息详情] 本页详情提取: success=${detailSuccess}, failed=${detailFailed}, total=${pageRows.length}`);
return enrichedRows;
}
async function restoreOrderWindow(page, windowStart, windowEnd) {
await waitUntilReady(page, datasets.orders.heading).catch(() => null);
await setDateRange(page, windowStart, windowEnd);
await clickQuery(page);
await trySetPageSize(page, datasets.orders.pageSize).catch(() => null);
await waitForStableOrderList(page).catch(() => null);
}
async function recoverCustomerListState(page, pageNum) {
await waitUntilReady(page, datasets.customers.heading).catch(() => null);
await trySetPageSize(page, datasets.customers.pageSize).catch(() => null);
if (pageNum > 0) {
await jumpToCustomerPage(page, pageNum).catch(() => null);
await waitForStableCustomerList(page).catch(() => null);
}
}
async function recoverOrderListState(page, pageNum, windowStart = '', windowEnd = '') {
await waitUntilReady(page, datasets.orders.heading).catch(() => null);
if (windowStart && windowEnd) {
await restoreOrderWindow(page, windowStart, windowEnd).catch(() => null);
} else {
await trySetPageSize(page, datasets.orders.pageSize).catch(() => null);
}
if (pageNum > 0) {
await jumpToOrderPage(page, pageNum).catch(() => null);
await waitForStableOrderList(page).catch(() => null);
}
}
function isValidOrderId(orderId) {
const value = String(orderId || '').trim();
if (!value) return false;
if (value.includes('<27>')) return false;
return /^\d+$/.test(value);
}
function isValidAccountId(accountId) {
const value = String(accountId || '').trim();
if (!value) return false;
if (value.includes('<27>')) return false;
return /^\d+$/.test(value);
}
async function extractOrderDetail(page) {
return page.evaluate(() => {
const text = document.body?.innerText || '';
const extract = (label) => {
const lineBreakPattern = new RegExp(`${label}\\s*(?:\\r?\\n)+\\s*([^\\r\\n]+)`);
const lineBreakMatch = text.match(lineBreakPattern);
if (lineBreakMatch) return lineBreakMatch[1].trim();
const inlinePattern = new RegExp(`${label}\\s*[:]?\\s*([^\\r\\n]+)`);
const inlineMatch = text.match(inlinePattern);
return inlineMatch ? inlineMatch[1].trim() : '';
};
return {
orderId: extract('订单号'),
orderType: extract('订单类型'),
status: extract('状态'),
tradeType: extract('交易类型'),
customerCategory: extract('客户分类'),
dealerName: extract('二级经销商名称'),
dealerUid: extract('二级经销商UID'),
customerType: extract('客户类型'),
opportunityId: extract('商机ID'),
paymentTime: extract('支付时间'),
orderTime: extract('下单时间'),
productName: extract('产品名称'),
productCode: extract('产品code'),
originalPriceCny: extract('订单原价\\(CNY\\)'),
paidAmountCny: extract('实付金额\\(CNY\\)'),
discount: extract('订单折扣'),
payableAmountCny: extract('应付金额(实付\\+代金券)\\(CNY\\)'),
couponAmountCny: extract('代金券金额\\(CNY\\)'),
};
});
}
async function extractCustomerDetail(page) {
return page.evaluate(() => {
const normalize = (value) =>
String(value || '')
.replace(/\u00a0/g, ' ')
.trim();
const text = normalize(document.body?.innerText || '').replace(/\r/g, '');
const extract = (label, sourceText = text) => {
const lineBreakPattern = new RegExp(`${label}\\s*(?:\\n)+\\s*([^\\n]+)`);
const lineBreakMatch = sourceText.match(lineBreakPattern);
if (lineBreakMatch) return normalize(lineBreakMatch[1]);
const inlinePattern = new RegExp(`${label}\\s*[:]?\\s*([^\\n]+)`);
const inlineMatch = sourceText.match(inlinePattern);
return inlineMatch ? normalize(inlineMatch[1]) : '';
};
const normalizeAmount = (value) => normalize(value).replace(/[¥,]/g, '').trim();
const buildSection = (startLabel, endLabel = '') => {
const start = text.indexOf(startLabel);
if (start < 0) return '';
const end = endLabel ? text.indexOf(endLabel, start + startLabel.length) : -1;
if (end > start) return text.slice(start, end);
return text.slice(start);
};
const lastMonthSection = buildSection('上月应付总金额CNY', '本月应付总金额CNY');
const currentMonthSection = buildSection('本月应付总金额CNY');
const extractAmountFromSection = (sectionText, label) => normalizeAmount(extract(label, sectionText));
let department = '';
const table = Array.from(document.querySelectorAll('table')).find((node) =>
(node.innerText || '').includes('所属部门'),
);
if (table) {
const rows = table.querySelectorAll('tbody tr');
for (const row of rows) {
const cells = row.querySelectorAll('td');
if (cells.length >= 2) {
const value = normalize(cells[1]?.innerText || cells[1]?.textContent || '');
if (value) {
department = value;
break;
}
}
}
}
if (!department) {
department = extract('所属部门');
}
return {
customerAccount: extract('客户账号'),
customerName: extract('客户名称'),
customerType: extract('客户类型'),
tradeMode: extract('交易模式'),
customerSource: extract('客户来源'),
realNameStatus: extract('实名状态'),
email: extract('邮箱') || extract('Email') || extract('电子邮箱'),
relationDate: extract('关联日期'),
phone: extract('手机号') || extract('手机') || extract('联系电话') || extract('联系手机'),
remark: extract('备注') || extract('客户备注'),
paymentNoticeStatus: extract('代为支付告知状态'),
department,
lastMonthPayableTotalCny: extractAmountFromSection(lastMonthSection, '上月应付总金额CNY'),
lastMonthPrepayCny: extractAmountFromSection(lastMonthSection, '预付费金额'),
lastMonthPostpayCny: extractAmountFromSection(lastMonthSection, '后付费金额'),
currentMonthPayableTotalCny: extractAmountFromSection(currentMonthSection, '本月应付总金额CNY'),
currentMonthPrepayCny: extractAmountFromSection(currentMonthSection, '预付费金额'),
currentMonthPostpayCny: extractAmountFromSection(currentMonthSection, '后付费金额'),
};
});
}