3142 lines
113 KiB
JavaScript
3142 lines
113 KiB
JavaScript
import { chromium } from 'playwright';
|
||
import cron from 'node-cron';
|
||
import fs from 'node:fs';
|
||
import path from 'node:path';
|
||
import readline from 'node:readline';
|
||
import { config, datasets } from './config.js';
|
||
import { sendLoginAlert, sendRuntimeErrorAlert } from './notify.js';
|
||
import {
|
||
closeDbPool,
|
||
customerExists,
|
||
getExistingMessageIds,
|
||
getExistingMessageFingerprints,
|
||
getLatestBillConsumptionTimeFromDb,
|
||
getLatestMessageTimeFromDb,
|
||
getLatestOrderTimeFromDb,
|
||
hasDbConfig,
|
||
upsertBills,
|
||
upsertCustomerDetails,
|
||
upsertCustomers,
|
||
upsertMessages,
|
||
upsertOrderDetails,
|
||
upsertOrders,
|
||
} from './db.js';
|
||
import {
|
||
diffRecords,
|
||
loadCurrentState,
|
||
nowStamp,
|
||
saveCheckpoint,
|
||
saveDatasetRun,
|
||
saveDelta,
|
||
saveRunSummary,
|
||
withHash,
|
||
} from './storage.js';
|
||
|
||
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
||
const scheduleEventFile = path.join(config.dataDir, 'runs', 'schedule-events.jsonl');
|
||
|
||
let _context = null;
|
||
let _runtimeController = null;
|
||
let _browser = null;
|
||
let _isAttachedBrowser = false;
|
||
const runningJobs = new Set();
|
||
|
||
function recordScheduleEvent(payload) {
|
||
try {
|
||
fs.mkdirSync(path.dirname(scheduleEventFile), { recursive: true });
|
||
fs.appendFileSync(scheduleEventFile, `${JSON.stringify({ at: new Date().toISOString(), ...payload })}\n`, 'utf8');
|
||
} catch (error) {
|
||
console.warn(`[schedule-event] 写入失败: ${error.message}`);
|
||
}
|
||
}
|
||
|
||
const AUTH_PAGE_KEYWORDS = [
|
||
'RAM 用户登录',
|
||
'主账号登录',
|
||
'钉钉扫码登录',
|
||
'用户名',
|
||
'下一步',
|
||
'登录并使用 RAM',
|
||
];
|
||
|
||
async function closeContextIfNeeded() {
|
||
if (!_context) return;
|
||
if (_isAttachedBrowser) {
|
||
_context = null;
|
||
return;
|
||
}
|
||
await _context.close();
|
||
_context = null;
|
||
}
|
||
|
||
function getRuntimeController() {
|
||
if (_runtimeController) return _runtimeController;
|
||
|
||
let paused = false;
|
||
let terminated = false;
|
||
let keypressBound = false;
|
||
|
||
const onKeypress = (_str, key = {}) => {
|
||
if (key.name === 'f7') {
|
||
if (!paused) {
|
||
paused = true;
|
||
console.log('[控制] 已暂停(F7)。按 F8 继续,按 F9 终止。');
|
||
}
|
||
return;
|
||
}
|
||
if (key.name === 'f8') {
|
||
if (paused) {
|
||
paused = false;
|
||
console.log('[控制] 已继续(F8)。');
|
||
}
|
||
return;
|
||
}
|
||
if (key.name === 'f9') {
|
||
terminated = true;
|
||
paused = false;
|
||
console.log('[控制] 已请求终止(F9),将在安全检查点停止。');
|
||
}
|
||
};
|
||
|
||
const bind = () => {
|
||
if (keypressBound || !process.stdin.isTTY) return;
|
||
readline.emitKeypressEvents(process.stdin);
|
||
if (typeof process.stdin.setRawMode === 'function') {
|
||
process.stdin.setRawMode(true);
|
||
}
|
||
process.stdin.resume();
|
||
process.stdin.on('keypress', onKeypress);
|
||
keypressBound = true;
|
||
console.log('[控制] 热键已启用:F7 暂停 / F8 继续 / F9 终止');
|
||
};
|
||
|
||
const unbind = () => {
|
||
if (!keypressBound) return;
|
||
process.stdin.off('keypress', onKeypress);
|
||
if (process.stdin.isTTY && typeof process.stdin.setRawMode === 'function') {
|
||
process.stdin.setRawMode(false);
|
||
}
|
||
keypressBound = false;
|
||
};
|
||
|
||
const waitIfPaused = async (label = '任务') => {
|
||
if (terminated) {
|
||
throw new Error(`[控制] 已终止:${label}`);
|
||
}
|
||
while (paused) {
|
||
await sleep(300);
|
||
if (terminated) {
|
||
throw new Error(`[控制] 已终止:${label}`);
|
||
}
|
||
}
|
||
};
|
||
|
||
const throwIfTerminated = (label = '任务') => {
|
||
if (terminated) {
|
||
throw new Error(`[控制] 已终止:${label}`);
|
||
}
|
||
};
|
||
|
||
_runtimeController = {
|
||
bind,
|
||
unbind,
|
||
waitIfPaused,
|
||
throwIfTerminated,
|
||
};
|
||
|
||
return _runtimeController;
|
||
}
|
||
|
||
async function runtimeCheckpoint(label) {
|
||
const controller = getRuntimeController();
|
||
controller.throwIfTerminated(label);
|
||
await controller.waitIfPaused(label);
|
||
}
|
||
|
||
function clearStaleBrowserProfileLocks() {
|
||
const lockFiles = ['SingletonLock', 'SingletonCookie', 'SingletonSocket'];
|
||
const now = Date.now();
|
||
const staleMs = 10 * 60 * 1000;
|
||
|
||
for (const fileName of lockFiles) {
|
||
const filePath = path.join(config.userDataDir, fileName);
|
||
if (!fs.existsSync(filePath)) {
|
||
continue;
|
||
}
|
||
|
||
try {
|
||
const stat = fs.statSync(filePath);
|
||
const ageMs = now - stat.mtimeMs;
|
||
if (ageMs < staleMs) {
|
||
console.log(`[浏览器锁] 检测到活跃锁文件,保留: ${fileName}`);
|
||
continue;
|
||
}
|
||
fs.rmSync(filePath, { force: true });
|
||
console.log(`[浏览器锁] 已清理陈旧锁文件: ${fileName}`);
|
||
} catch (error) {
|
||
console.warn(`[浏览器锁] 清理 ${fileName} 失败: ${error.message}`);
|
||
}
|
||
}
|
||
}
|
||
|
||
async function getContext() {
|
||
if (_context) return _context;
|
||
if (config.browserMode === 'cdp') {
|
||
try {
|
||
_browser = await chromium.connectOverCDP(config.cdpUrl);
|
||
_isAttachedBrowser = true;
|
||
const contexts = _browser.contexts();
|
||
_context = contexts[0] || await _browser.newContext();
|
||
console.log(`[CDP] 已附着到手动浏览器: ${config.cdpUrl}`);
|
||
return _context;
|
||
} catch (error) {
|
||
throw new Error(`无法通过 CDP 连接到手动浏览器(${config.cdpUrl})。请先手动启动 Chrome 并开启远程调试端口。原始错误: ${error.message}`);
|
||
}
|
||
}
|
||
|
||
_isAttachedBrowser = false;
|
||
clearStaleBrowserProfileLocks();
|
||
const launchOptions = {
|
||
headless: config.headless,
|
||
acceptDownloads: true,
|
||
downloadsPath: config.downloadDir,
|
||
};
|
||
if (config.browserChannel) {
|
||
launchOptions.channel = config.browserChannel;
|
||
}
|
||
if (config.browserExecutablePath) {
|
||
launchOptions.executablePath = config.browserExecutablePath;
|
||
}
|
||
try {
|
||
_context = await chromium.launchPersistentContext(config.userDataDir, launchOptions);
|
||
} catch (error) {
|
||
const browserHint = config.browserExecutablePath
|
||
? `executablePath=${config.browserExecutablePath}`
|
||
: config.browserChannel
|
||
? `channel=${config.browserChannel}`
|
||
: 'bundled-chromium';
|
||
throw new Error(`浏览器启动失败(${browserHint})。请确认没有其他浏览器占用 .browser 目录,或删除 .browser 后重新执行 npm run login。原始错误: ${error.message}`);
|
||
}
|
||
await restoreStorageState(_context);
|
||
return _context;
|
||
}
|
||
|
||
async function resolveActivePage(context, targetUrl = '') {
|
||
const pages = context.pages();
|
||
let page = null;
|
||
|
||
if (config.browserMode === 'cdp' && targetUrl) {
|
||
page = pages.find((item) => item.url().includes(targetUrl));
|
||
}
|
||
|
||
if (!page) {
|
||
page = pages[0] || await context.newPage();
|
||
}
|
||
|
||
if (config.browserMode === 'cdp') {
|
||
const pageIndex = pages.indexOf(page);
|
||
console.log(`[CDP] 使用 tab=${pageIndex >= 0 ? pageIndex : 'new'} url=${page.url() || '(blank)'}`);
|
||
await page.bringToFront().catch(() => null);
|
||
}
|
||
|
||
return page;
|
||
}
|
||
|
||
async function restoreStorageState(context) {
|
||
if (!fs.existsSync(config.storageStateFile)) {
|
||
return;
|
||
}
|
||
|
||
try {
|
||
const state = JSON.parse(fs.readFileSync(config.storageStateFile, 'utf-8'));
|
||
if (Array.isArray(state.cookies) && state.cookies.length > 0) {
|
||
await context.addCookies(state.cookies);
|
||
console.log(`[storageState] 已恢复 ${state.cookies.length} 个 cookie`);
|
||
}
|
||
} catch (error) {
|
||
console.warn(`[storageState] 恢复失败,继续使用 .browser profile: ${error.message}`);
|
||
}
|
||
}
|
||
|
||
async function saveStorageState(context) {
|
||
await context.storageState({ path: config.storageStateFile });
|
||
console.log(`[storageState] 已保存登录态快照: ${config.storageStateFile}`);
|
||
}
|
||
|
||
function loadLatestBillsCheckpoint() {
|
||
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'bills');
|
||
if (!fs.existsSync(checkpointDir)) {
|
||
return null;
|
||
}
|
||
|
||
const candidates = fs.readdirSync(checkpointDir)
|
||
.filter((fileName) => fileName.endsWith('.json'))
|
||
.map((fileName) => {
|
||
const filePath = path.join(checkpointDir, fileName);
|
||
const stat = fs.statSync(filePath);
|
||
return { fileName, filePath, mtimeMs: stat.mtimeMs };
|
||
})
|
||
.sort((a, b) => b.mtimeMs - a.mtimeMs);
|
||
|
||
if (candidates.length === 0) {
|
||
return null;
|
||
}
|
||
|
||
try {
|
||
const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8'));
|
||
if (!latest || typeof latest !== 'object') {
|
||
return null;
|
||
}
|
||
return latest;
|
||
} catch (error) {
|
||
console.warn(`[账单检查点] 读取失败,忽略断点续爬: ${error.message}`);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
function loadLatestOrdersCheckpoint() {
|
||
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'orders');
|
||
if (!fs.existsSync(checkpointDir)) {
|
||
return null;
|
||
}
|
||
|
||
const candidates = fs.readdirSync(checkpointDir)
|
||
.filter((fileName) => fileName.endsWith('.json'))
|
||
.map((fileName) => {
|
||
const filePath = path.join(checkpointDir, fileName);
|
||
const stat = fs.statSync(filePath);
|
||
return { fileName, filePath, mtimeMs: stat.mtimeMs };
|
||
})
|
||
.sort((a, b) => b.mtimeMs - a.mtimeMs);
|
||
|
||
if (candidates.length === 0) {
|
||
return null;
|
||
}
|
||
|
||
try {
|
||
const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8'));
|
||
if (!latest || typeof latest !== 'object') {
|
||
return null;
|
||
}
|
||
return latest;
|
||
} catch (error) {
|
||
console.warn(`[订单检查点] 读取失败,忽略断点续爬: ${error.message}`);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
function subtractDays(dateValue, days) {
|
||
const next = new Date(dateValue);
|
||
next.setDate(next.getDate() - days);
|
||
return next;
|
||
}
|
||
|
||
function subtractMonths(dateValue, months) {
|
||
const next = new Date(dateValue);
|
||
next.setMonth(next.getMonth() - months);
|
||
return next;
|
||
}
|
||
|
||
function randomIntBetween(min, max) {
|
||
return Math.floor(Math.random() * (max - min + 1)) + min;
|
||
}
|
||
|
||
function parseDbDateTime(value) {
|
||
const normalized = String(value || '').trim();
|
||
if (!normalized) {
|
||
return null;
|
||
}
|
||
const parsed = new Date(normalized.replace(' ', 'T'));
|
||
return Number.isNaN(parsed.getTime()) ? null : parsed;
|
||
}
|
||
|
||
function formatDateTime(date) {
|
||
return `${formatDate(date)} ${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}:${String(date.getSeconds()).padStart(2, '0')}`;
|
||
}
|
||
|
||
function isSameDate(value, date) {
|
||
const parsed = parseDbDateTime(value);
|
||
if (!parsed) {
|
||
return false;
|
||
}
|
||
return formatDate(parsed) === formatDate(date);
|
||
}
|
||
|
||
function addMinutes(date, minutes) {
|
||
const next = new Date(date);
|
||
next.setMinutes(next.getMinutes() + minutes);
|
||
return next;
|
||
}
|
||
|
||
function buildOrderFingerprint(record) {
|
||
return [
|
||
String(record.orderStatus || '').trim(),
|
||
String(record.actualPaidCny || '').trim(),
|
||
String(record.orderOriginalPriceCny || '').trim(),
|
||
String(record.orderType || '').trim(),
|
||
String(record.customerCategory || '').trim(),
|
||
String(record.createdAt || '').trim(),
|
||
].join('|');
|
||
}
|
||
|
||
function isFinalOrderStatus(status) {
|
||
const normalized = String(status || '').trim();
|
||
if (!normalized) {
|
||
return false;
|
||
}
|
||
return config.hotFinalStatuses.some((item) => item === normalized);
|
||
}
|
||
|
||
async function runLockedJob(jobName, job) {
|
||
if (runningJobs.has(jobName)) {
|
||
console.log(`[任务锁] ${jobName} 已在运行,跳过本次执行`);
|
||
return { skipped: true, reason: 'already_running', jobName };
|
||
}
|
||
runningJobs.add(jobName);
|
||
try {
|
||
return await job();
|
||
} finally {
|
||
runningJobs.delete(jobName);
|
||
}
|
||
}
|
||
|
||
function buildTodayOrderWindow() {
|
||
const today = formatDate(new Date());
|
||
return buildSingleDateWindow(today, today);
|
||
}
|
||
|
||
function computeChangedOrderIds(previousRecords, nextRecords) {
|
||
const previousMap = new Map();
|
||
for (const record of previousRecords || []) {
|
||
const orderId = String(record.orderId || '').trim();
|
||
if (!orderId) {
|
||
continue;
|
||
}
|
||
previousMap.set(orderId, record);
|
||
}
|
||
|
||
const changedOrderIds = [];
|
||
for (const record of nextRecords || []) {
|
||
const orderId = String(record.orderId || '').trim();
|
||
if (!orderId) {
|
||
continue;
|
||
}
|
||
const previous = previousMap.get(orderId);
|
||
if (!previous) {
|
||
changedOrderIds.push(orderId);
|
||
continue;
|
||
}
|
||
if (buildOrderFingerprint(previous) !== buildOrderFingerprint(record)) {
|
||
changedOrderIds.push(orderId);
|
||
}
|
||
}
|
||
return Array.from(new Set(changedOrderIds));
|
||
}
|
||
|
||
function selectOrderDetailCandidates(orderRecords, changedOrderIds, detailRecords) {
|
||
const changedSet = new Set((changedOrderIds || []).map((item) => String(item || '').trim()).filter(Boolean));
|
||
const detailMap = new Map();
|
||
for (const record of detailRecords || []) {
|
||
const orderId = String(record.orderId || '').trim();
|
||
if (!orderId) {
|
||
continue;
|
||
}
|
||
detailMap.set(orderId, record);
|
||
}
|
||
|
||
const now = new Date();
|
||
const refreshBefore = addMinutes(now, -config.hotOrderDetailRefreshMinutes);
|
||
const candidateIds = [];
|
||
for (const record of orderRecords || []) {
|
||
const orderId = String(record.orderId || '').trim();
|
||
if (!orderId || !isValidOrderId(orderId)) {
|
||
continue;
|
||
}
|
||
if (changedSet.has(orderId)) {
|
||
candidateIds.push(orderId);
|
||
continue;
|
||
}
|
||
const status = String(record.orderStatus || '').trim();
|
||
if (isFinalOrderStatus(status)) {
|
||
continue;
|
||
}
|
||
const detail = detailMap.get(orderId);
|
||
if (!detail) {
|
||
candidateIds.push(orderId);
|
||
continue;
|
||
}
|
||
const lastSyncedAt = parseDbDateTime(detail.detailSyncedAt || detail.__detailSyncedAt || '');
|
||
if (!lastSyncedAt || lastSyncedAt <= refreshBefore) {
|
||
candidateIds.push(orderId);
|
||
}
|
||
}
|
||
return Array.from(new Set(candidateIds));
|
||
}
|
||
|
||
function summarizeHotPage(previousOrderMap, normalizedPageRows) {
|
||
let stableCount = 0;
|
||
let changedCount = 0;
|
||
let newCount = 0;
|
||
let todayRowCount = 0;
|
||
|
||
for (const record of normalizedPageRows) {
|
||
if (isSameDate(record.createdAt, new Date())) {
|
||
todayRowCount += 1;
|
||
}
|
||
const orderId = String(record.orderId || '').trim();
|
||
const previous = previousOrderMap.get(orderId);
|
||
if (!previous) {
|
||
newCount += 1;
|
||
continue;
|
||
}
|
||
if (buildOrderFingerprint(previous) === buildOrderFingerprint(record)) {
|
||
stableCount += 1;
|
||
} else {
|
||
changedCount += 1;
|
||
}
|
||
}
|
||
|
||
return { stableCount, changedCount, newCount, todayRowCount };
|
||
}
|
||
|
||
function buildSingleDateWindow(startDate, endDate) {
|
||
return [{
|
||
windowStart: startDate,
|
||
windowEnd: endDate,
|
||
start: startDate,
|
||
end: endDate,
|
||
}];
|
||
}
|
||
|
||
async function captureErrorArtifacts(page, metadata = {}) {
|
||
const stamp = nowStamp();
|
||
const artifactDir = path.join(config.errorDir, metadata.dataset || 'general');
|
||
fs.mkdirSync(artifactDir, { recursive: true });
|
||
|
||
const jsonPath = path.join(artifactDir, `${stamp}.json`);
|
||
const screenshotPath = path.join(artifactDir, `${stamp}.png`);
|
||
const payload = {
|
||
...metadata,
|
||
capturedAt: new Date().toISOString(),
|
||
pageUrl: page?.url?.() || '',
|
||
stack: metadata.error?.stack || metadata.errorMessage || '',
|
||
};
|
||
fs.writeFileSync(jsonPath, JSON.stringify(payload, null, 2));
|
||
|
||
let screenshotSaved = false;
|
||
if (page && !page.isClosed?.()) {
|
||
try {
|
||
await page.screenshot({ path: screenshotPath, fullPage: true, timeout: 5000, animations: 'disabled' });
|
||
screenshotSaved = true;
|
||
} catch (error) {
|
||
console.error('[错误截图] 保存失败:', error.message);
|
||
}
|
||
}
|
||
|
||
return {
|
||
jsonPath,
|
||
screenshotPath: screenshotSaved ? screenshotPath : '',
|
||
};
|
||
}
|
||
|
||
async function reportRuntimeError(error, page, metadata = {}) {
|
||
const artifacts = await captureErrorArtifacts(page, {
|
||
...metadata,
|
||
errorMessage: error.message,
|
||
error,
|
||
});
|
||
const subject = `[APS同步异常] ${metadata.label || metadata.dataset || 'sync'} failed`;
|
||
const text = [
|
||
`时间: ${new Date().toISOString()}`,
|
||
`任务: ${metadata.label || ''}`,
|
||
`数据集: ${metadata.dataset || ''}`,
|
||
`模式: ${metadata.mode || ''}`,
|
||
`URL: ${page?.url?.() || ''}`,
|
||
`错误: ${error.message}`,
|
||
`JSON: ${artifacts.jsonPath}`,
|
||
artifacts.screenshotPath ? `截图: ${artifacts.screenshotPath}` : '截图: 保存失败',
|
||
].join('\n');
|
||
|
||
const attachments = [{ filename: path.basename(artifacts.jsonPath), path: artifacts.jsonPath }];
|
||
if (artifacts.screenshotPath) {
|
||
attachments.push({ filename: path.basename(artifacts.screenshotPath), path: artifacts.screenshotPath });
|
||
}
|
||
|
||
await sendRuntimeErrorAlert({ subject, text, attachments });
|
||
}
|
||
|
||
async function getPageBodyPreview(page) {
|
||
return page
|
||
.evaluate(() => document.body?.innerText?.substring(0, 500) || '(空)')
|
||
.catch(() => '(无法获取)');
|
||
}
|
||
|
||
function isAuthUrl(url) {
|
||
return /account\.aliyun\.com|signin\.aliyun\.com/.test(url)
|
||
|| url.includes('login.htm')
|
||
|| url.includes('/#/signin');
|
||
}
|
||
|
||
function hasAuthKeywords(text) {
|
||
return AUTH_PAGE_KEYWORDS.some((keyword) => text.includes(keyword));
|
||
}
|
||
|
||
async function detectAuthRedirect(page) {
|
||
const currentUrl = page.url();
|
||
const bodyText = await getPageBodyPreview(page);
|
||
return {
|
||
currentUrl,
|
||
bodyText,
|
||
isAuthPage: isAuthUrl(currentUrl) || hasAuthKeywords(bodyText),
|
||
};
|
||
}
|
||
|
||
async function ensureDatasetAccessible(page, dataset, timeout = 120000, options = {}) {
|
||
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
|
||
await waitUntilReady(page, dataset.heading, timeout, options);
|
||
}
|
||
|
||
export async function login() {
|
||
const runtimeController = getRuntimeController();
|
||
runtimeController.bind();
|
||
const context = await getContext();
|
||
const cleanupAndExit = async (signal) => {
|
||
console.log(`[login] 收到 ${signal},正在保存登录态并关闭浏览器...`);
|
||
await closeContextIfNeeded();
|
||
process.exit(130);
|
||
};
|
||
|
||
const onSigint = () => {
|
||
void cleanupAndExit('SIGINT');
|
||
};
|
||
const onSigterm = () => {
|
||
void cleanupAndExit('SIGTERM');
|
||
};
|
||
|
||
process.once('SIGINT', onSigint);
|
||
process.once('SIGTERM', onSigterm);
|
||
|
||
try {
|
||
const page = await resolveActivePage(context, '/detail/my_customer/~/customer/list');
|
||
await page.goto(datasets.customers.url, { waitUntil: 'domcontentloaded' });
|
||
console.log('请在打开的浏览器里完成阿里云伙伴中心登录。检测到进入“我的客户”和“账单查询”页面后,脚本会自动保存登录态并关闭浏览器。');
|
||
await waitUntilReady(page, datasets.customers.heading, 10 * 60 * 1000, { allowInteractiveAuth: true });
|
||
console.log('[login] 我的客户页验证通过,继续验证账单页登录态...');
|
||
await ensureDatasetAccessible(page, datasets.bills, 60 * 1000, { allowInteractiveAuth: true });
|
||
await sleep(1000);
|
||
await saveStorageState(context);
|
||
console.log('登录态已写入 .browser 目录,且已验证“我的客户”和“账单查询”页面可访问,后续可直接执行 npm run sync 或 npm run bills。');
|
||
} finally {
|
||
process.off('SIGINT', onSigint);
|
||
process.off('SIGTERM', onSigterm);
|
||
await closeContextIfNeeded();
|
||
runtimeController.unbind();
|
||
}
|
||
}
|
||
|
||
export async function syncAll(options = {}) {
|
||
const runtimeController = getRuntimeController();
|
||
runtimeController.bind();
|
||
const context = await getContext();
|
||
let page = null;
|
||
const { resume = false } = options;
|
||
|
||
try {
|
||
const summary = { startedAt: new Date().toISOString(), datasets: {} };
|
||
page = await resolveActivePage(context, '/detail/my_customer/~/customer/list');
|
||
|
||
if (config.fullSync) {
|
||
summary.datasets.customers = await syncCustomers(page, { resume });
|
||
summary.datasets.customerDetails = await syncCustomerDetails(page, { resume });
|
||
}
|
||
|
||
summary.datasets.orders = await syncOrders(page, { incremental: !config.fullSync, resume });
|
||
|
||
// syncOrders 完成后,从最新的 orders.json 读取 orderId 列表
|
||
const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey);
|
||
const orderIdsForDetail = collectValidOrderIds(latestOrders.records || []);
|
||
|
||
summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail, options);
|
||
summary.datasets.bills = await syncBills(page, { incremental: !config.fullSync, resume });
|
||
summary.datasets.messages = await syncMessages(page, { incremental: !config.fullSync, resume });
|
||
summary.finishedAt = new Date().toISOString();
|
||
|
||
const stamp = nowStamp();
|
||
saveRunSummary(stamp, summary);
|
||
return summary;
|
||
} catch (error) {
|
||
await reportRuntimeError(error, page, { label: 'syncAll', dataset: 'all', mode: config.fullSync ? 'full' : 'incremental' });
|
||
throw error;
|
||
} finally {
|
||
if (config.closeBrowser) {
|
||
await closeContextIfNeeded();
|
||
} else {
|
||
console.log('浏览器保持运行');
|
||
}
|
||
await closeDbPool();
|
||
runtimeController.unbind();
|
||
}
|
||
}
|
||
|
||
export async function syncBillsOnly(options = {}) {
|
||
const runtimeController = getRuntimeController();
|
||
runtimeController.bind();
|
||
const context = await getContext();
|
||
let page = null;
|
||
|
||
try {
|
||
const summary = { startedAt: new Date().toISOString(), datasets: {} };
|
||
page = await resolveActivePage(context, '/detail/bill/~/costCenter/bill');
|
||
|
||
summary.datasets.bills = await syncBills(page, options);
|
||
summary.finishedAt = new Date().toISOString();
|
||
|
||
const stamp = nowStamp();
|
||
saveRunSummary(stamp, summary);
|
||
return summary;
|
||
} catch (error) {
|
||
await reportRuntimeError(error, page, { label: 'syncBillsOnly', dataset: 'bills', mode: options.incremental ? 'incremental' : 'full' });
|
||
throw error;
|
||
} finally {
|
||
if (options.keepBrowserOpen === true) {
|
||
console.log('浏览器保持运行(schedule bills)');
|
||
} else if (config.closeBrowser) {
|
||
await closeContextIfNeeded();
|
||
} else {
|
||
console.log('浏览器保持运行');
|
||
}
|
||
await closeDbPool();
|
||
runtimeController.unbind();
|
||
}
|
||
}
|
||
|
||
export async function syncOrdersOnly(options = {}) {
|
||
const runtimeController = getRuntimeController();
|
||
runtimeController.bind();
|
||
const context = await getContext();
|
||
let page = null;
|
||
|
||
try {
|
||
const summary = { startedAt: new Date().toISOString(), datasets: {} };
|
||
page = await resolveActivePage(context, '/detail/order/~/costCenter/order');
|
||
const orderSyncResult = await syncOrders(page, options);
|
||
summary.datasets.orders = orderSyncResult;
|
||
const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey);
|
||
const orderDetailsState = loadCurrentState('orderDetails', datasets.orderDetails.uniqueKey);
|
||
const orderIdsForDetail = options.hot
|
||
? selectOrderDetailCandidates(latestOrders.records || [], orderSyncResult.changedOrderIds || [], orderDetailsState.records || [])
|
||
: collectValidOrderIds(latestOrders.records || []);
|
||
summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail);
|
||
summary.finishedAt = new Date().toISOString();
|
||
|
||
const stamp = nowStamp();
|
||
saveRunSummary(stamp, summary);
|
||
return summary;
|
||
} catch (error) {
|
||
await reportRuntimeError(error, page, { label: 'syncOrdersOnly', dataset: 'orders', mode: options.incremental ? 'incremental' : 'full' });
|
||
throw error;
|
||
} finally {
|
||
if (config.closeBrowser) {
|
||
await closeContextIfNeeded();
|
||
} else {
|
||
console.log('浏览器保持运行');
|
||
}
|
||
await closeDbPool();
|
||
runtimeController.unbind();
|
||
}
|
||
}
|
||
|
||
export async function syncMessagesOnly(options = {}) {
|
||
const runtimeController = getRuntimeController();
|
||
runtimeController.bind();
|
||
const context = await getContext();
|
||
let page = null;
|
||
|
||
try {
|
||
const summary = { startedAt: new Date().toISOString(), datasets: {} };
|
||
page = await resolveActivePage(context, '/message');
|
||
summary.datasets.messages = await syncMessages(page, options);
|
||
summary.finishedAt = new Date().toISOString();
|
||
|
||
const stamp = nowStamp();
|
||
saveRunSummary(stamp, summary);
|
||
return summary;
|
||
} catch (error) {
|
||
await reportRuntimeError(error, page, { label: 'syncMessagesOnly', dataset: 'messages', mode: options.incremental ? 'incremental' : 'full' });
|
||
throw error;
|
||
} finally {
|
||
if (config.closeBrowser) {
|
||
await closeContextIfNeeded();
|
||
} else {
|
||
console.log('浏览器保持运行');
|
||
}
|
||
await closeDbPool();
|
||
runtimeController.unbind();
|
||
}
|
||
}
|
||
|
||
export async function scheduleSync() {
|
||
console.log(`定时任务已启动: bills=${config.cron}, hot=${config.hotCron} (${config.timezone})`);
|
||
setInterval(() => {
|
||
console.log(`[${new Date().toISOString()}] 定时守护存活中: bills=${config.cron}, hot=${config.hotCron}, mode=${config.scheduleMode}`);
|
||
}, 60 * 1000);
|
||
cron.schedule(
|
||
config.cron,
|
||
async () => {
|
||
if (config.scheduleMode === 'hot') {
|
||
return runLockedJob('schedule-shared', async () => {
|
||
try {
|
||
recordScheduleEvent({ track: 'bills', status: 'started', mode: 'bills-incremental' });
|
||
console.log(`[${new Date().toISOString()}] 开始执行账单定时同步 mode=bills-incremental`);
|
||
const summary = await syncBillsOnly({ incremental: true, keepBrowserOpen: true });
|
||
recordScheduleEvent({ track: 'bills', status: 'completed', mode: 'bills-incremental', summary });
|
||
console.log(`[${new Date().toISOString()}] 账单定时同步完成`, JSON.stringify(summary, null, 2));
|
||
} catch (error) {
|
||
recordScheduleEvent({ track: 'bills', status: 'failed', mode: 'bills-incremental', error: error.message });
|
||
console.error(`[${new Date().toISOString()}] 账单定时同步失败`, error);
|
||
}
|
||
});
|
||
}
|
||
|
||
try {
|
||
console.log(`[${new Date().toISOString()}] 开始执行同步 mode=${config.scheduleMode}`);
|
||
const summary = config.scheduleMode === 'full'
|
||
? await syncAll()
|
||
: await syncAllIncremental();
|
||
console.log(`[${new Date().toISOString()}] 同步完成`, JSON.stringify(summary, null, 2));
|
||
} catch (error) {
|
||
console.error(`[${new Date().toISOString()}] 同步失败`, error);
|
||
}
|
||
},
|
||
{ timezone: config.timezone },
|
||
);
|
||
|
||
cron.schedule(
|
||
config.hotCron,
|
||
async () => {
|
||
if (config.scheduleMode !== 'hot') {
|
||
return;
|
||
}
|
||
return runLockedJob('schedule-shared', async () => {
|
||
try {
|
||
recordScheduleEvent({ track: 'hot', status: 'started', mode: 'hot' });
|
||
console.log(`[${new Date().toISOString()}] 开始执行高频同步 mode=hot`);
|
||
const summary = await syncHot({ keepBrowserOpen: true });
|
||
if (summary?.skipped) {
|
||
recordScheduleEvent({ track: 'hot', status: 'skipped', mode: 'hot', reason: summary.reason || 'already_running' });
|
||
} else {
|
||
recordScheduleEvent({ track: 'hot', status: 'completed', mode: 'hot', summary });
|
||
}
|
||
console.log(`[${new Date().toISOString()}] 高频同步完成`, JSON.stringify(summary, null, 2));
|
||
} catch (error) {
|
||
recordScheduleEvent({ track: 'hot', status: 'failed', mode: 'hot', error: error.message });
|
||
console.error(`[${new Date().toISOString()}] 高频同步失败`, error);
|
||
}
|
||
});
|
||
},
|
||
{ timezone: config.timezone },
|
||
);
|
||
}
|
||
|
||
export async function syncHot(options = {}) {
|
||
return runLockedJob('hot-sync', async () => {
|
||
const runtimeController = getRuntimeController();
|
||
runtimeController.bind();
|
||
const context = await getContext();
|
||
let page = null;
|
||
|
||
try {
|
||
const summary = { startedAt: new Date().toISOString(), mode: 'hot', datasets: {} };
|
||
page = await resolveActivePage(context, '/detail/order/~/costCenter/order');
|
||
const orderSyncResult = await syncOrders(page, { ...options, hot: true, incremental: true, resume: options.resume === true });
|
||
summary.datasets.orders = orderSyncResult;
|
||
|
||
const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey);
|
||
const orderDetailsState = loadCurrentState('orderDetails', datasets.orderDetails.uniqueKey);
|
||
const orderIdsForDetail = selectOrderDetailCandidates(
|
||
latestOrders.records || [],
|
||
orderSyncResult.changedOrderIds || [],
|
||
orderDetailsState.records || [],
|
||
);
|
||
|
||
summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail, { resume: options.resume === true });
|
||
page = await resolveActivePage(context, '/message');
|
||
summary.datasets.messages = await syncMessages(page, { incremental: true, resume: options.resume === true, hot: true });
|
||
page = await resolveActivePage(context, '/detail/my_customer/~/customer/list');
|
||
summary.datasets.customerHot = await syncOneCustomerHot(page, { resume: options.resume === true });
|
||
summary.finishedAt = new Date().toISOString();
|
||
|
||
const stamp = nowStamp();
|
||
saveRunSummary(stamp, summary);
|
||
return summary;
|
||
} catch (error) {
|
||
await reportRuntimeError(error, page, { label: 'syncHot', dataset: 'hot', mode: 'hot' });
|
||
throw error;
|
||
} finally {
|
||
if (options.keepBrowserOpen === true) {
|
||
console.log('浏览器保持运行(schedule hot)');
|
||
} else if (config.closeBrowser) {
|
||
await closeContextIfNeeded();
|
||
} else {
|
||
console.log('浏览器保持运行');
|
||
}
|
||
await closeDbPool();
|
||
runtimeController.unbind();
|
||
}
|
||
});
|
||
}
|
||
|
||
async function syncOneCustomerHot(page, options = {}) {
|
||
await runtimeCheckpoint('高频同步客户');
|
||
const dataset = datasets.customers;
|
||
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
|
||
await waitUntilReady(page, dataset.heading);
|
||
await trySetPageSize(page, dataset.pageSize);
|
||
|
||
const pageData = await extractTable(page);
|
||
const normalizedRows = normalizeDatasetRecords(dataset, pageData.rows || [], { pageNum: 1 });
|
||
const target = normalizedRows.find((record) => String(record.accountId || '').trim());
|
||
if (!target) {
|
||
return { skipped: true, reason: 'no_customer_found' };
|
||
}
|
||
|
||
if (await customerExists(target.accountId)) {
|
||
console.log(`[客户高频] accountId=${target.accountId} 已存在,停止本轮客户抓取`);
|
||
return { skipped: true, reason: 'customer_exists', accountId: target.accountId };
|
||
}
|
||
|
||
await upsertCustomers([target]);
|
||
|
||
const clicked = await clickCustomerDetailFromListWithRetry(page, target);
|
||
if (!clicked) {
|
||
return { skipped: false, inserted: true, accountId: target.accountId, detail: 'click_failed' };
|
||
}
|
||
|
||
try {
|
||
await page.waitForFunction(
|
||
(text) => document.body && document.body.innerText.includes(text),
|
||
'详情',
|
||
{ timeout: 15000 },
|
||
);
|
||
await sleep(1000);
|
||
const detail = await extractCustomerDetail(page);
|
||
const normalizedDetail = normalizeDatasetRecords(datasets.customerDetails, [{ ...detail, accountId: target.accountId, loginName: target.loginName }], { accountId: target.accountId });
|
||
await upsertCustomerDetails(normalizedDetail);
|
||
return { skipped: false, inserted: true, accountId: target.accountId, detail: 'ok' };
|
||
} catch (error) {
|
||
return { skipped: false, inserted: true, accountId: target.accountId, detail: `extract_failed:${error.message}` };
|
||
}
|
||
}
|
||
|
||
export async function syncAllIncremental() {
|
||
const runtimeController = getRuntimeController();
|
||
runtimeController.bind();
|
||
const context = await getContext();
|
||
let page = null;
|
||
|
||
try {
|
||
const summary = { startedAt: new Date().toISOString(), mode: 'incremental', datasets: {} };
|
||
page = await resolveActivePage(context, '/detail/order/~/costCenter/order');
|
||
const orderSyncResult = await syncOrders(page, { incremental: true, resume: true });
|
||
summary.datasets.orders = orderSyncResult;
|
||
const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey);
|
||
const orderIdsForDetail = collectValidOrderIds(latestOrders.records || []);
|
||
summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail, { resume: true });
|
||
summary.datasets.bills = await syncBills(page, { incremental: true, resume: true });
|
||
summary.datasets.messages = await syncMessages(page, { incremental: true, resume: true });
|
||
summary.finishedAt = new Date().toISOString();
|
||
|
||
const stamp = nowStamp();
|
||
saveRunSummary(stamp, summary);
|
||
return summary;
|
||
} catch (error) {
|
||
await reportRuntimeError(error, page, { label: 'syncAllIncremental', dataset: 'incremental', mode: 'incremental' });
|
||
throw error;
|
||
} finally {
|
||
if (config.closeBrowser) {
|
||
await closeContextIfNeeded();
|
||
} else {
|
||
console.log('浏览器保持运行');
|
||
}
|
||
await closeDbPool();
|
||
runtimeController.unbind();
|
||
}
|
||
}
|
||
|
||
async function syncCustomers(page, options = {}) {
|
||
await runtimeCheckpoint('同步客户');
|
||
const dataset = datasets.customers;
|
||
const { resume = false } = options;
|
||
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
|
||
await waitUntilReady(page, dataset.heading);
|
||
await trySetPageSize(page, dataset.pageSize);
|
||
|
||
const resumeCheckpoint = resume ? loadLatestCustomersCheckpoint() : null;
|
||
let resumeFromPage = Number.parseInt(String(resumeCheckpoint?.pageNum || 0), 10) || 0;
|
||
let shouldContinueScrape = true;
|
||
let allNormalizedRecords = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : [];
|
||
|
||
if (resumeFromPage > 0) {
|
||
console.log(`[客户续爬] 从 checkpoint 恢复: page=${resumeFromPage}, records=${allNormalizedRecords.length}`);
|
||
const moved = await moveCustomersToResumeStart(page, resumeFromPage);
|
||
if (!moved) {
|
||
console.log('[客户续爬] checkpoint 已在最后一页,无需继续抓取');
|
||
shouldContinueScrape = false;
|
||
}
|
||
}
|
||
|
||
let records = [];
|
||
if (shouldContinueScrape) {
|
||
records = await scrapePagedTable(page, dataset, {}, {
|
||
onPage: async ({ pageNum, pageRows }) => {
|
||
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, {});
|
||
allNormalizedRecords.push(...normalizedPageRows);
|
||
if (hasDbConfig()) {
|
||
await upsertCustomers(normalizedPageRows);
|
||
}
|
||
await saveCustomersCheckpoint(dataset, pageNum, allNormalizedRecords);
|
||
},
|
||
skipInitialPage: resumeFromPage > 0,
|
||
});
|
||
}
|
||
|
||
if (resumeFromPage === 0) {
|
||
allNormalizedRecords = normalizeDatasetRecords(dataset, records, {});
|
||
if (hasDbConfig()) {
|
||
await upsertCustomers(dedupeByHash(allNormalizedRecords));
|
||
}
|
||
}
|
||
|
||
return persistDataset(dataset, dedupeByHash(allNormalizedRecords), {});
|
||
}
|
||
|
||
async function syncCustomerDetails(page, options = {}) {
|
||
await runtimeCheckpoint('同步客户详情');
|
||
const dataset = datasets.customerDetails;
|
||
const customersState = loadCurrentState('customers', datasets.customers.uniqueKey);
|
||
const customerTargets = collectCustomerDetailTargets(customersState.records || []);
|
||
const resumeCheckpoint = options.resume ? loadLatestCustomerDetailsCheckpoint() : null;
|
||
|
||
if (customerTargets.length === 0) {
|
||
console.log('[客户详情] 本地无有效客户定位信息,跳过');
|
||
return persistDataset(dataset, [], {});
|
||
}
|
||
|
||
console.log(`[客户详情] 共 ${customerTargets.length} 个客户需要获取详情`);
|
||
const allDetails = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : [];
|
||
let currentListPage = 0;
|
||
let currentGroupKey = '';
|
||
let startIndex = Number.parseInt(String(resumeCheckpoint?.currentIndex || 0), 10) || 0;
|
||
if (startIndex > 0) {
|
||
console.log(`[客户详情续爬] 从 checkpoint 恢复: index=${startIndex}, records=${allDetails.length}`);
|
||
}
|
||
|
||
await page.goto(datasets.customers.url, { waitUntil: 'domcontentloaded' });
|
||
await waitUntilReady(page, datasets.customers.heading);
|
||
await trySetPageSize(page, datasets.customers.pageSize);
|
||
|
||
for (let index = startIndex; index < customerTargets.length; index += 1) {
|
||
await runtimeCheckpoint(`客户详情 ${index + 1}/${customerTargets.length}`);
|
||
const target = customerTargets[index];
|
||
console.log(`[客户详情] ${index + 1}/${customerTargets.length} accountId=${target.accountId} page=${target.pageNum}`);
|
||
const pauseMs = randomIntBetween(1000, 3000);
|
||
console.log(`[客户详情] 随机等待 ${pauseMs}ms 后继续`);
|
||
await sleep(pauseMs);
|
||
|
||
if (target.pageNum > 0 && currentListPage !== target.pageNum) {
|
||
const reached = await jumpToCustomerPage(page, target.pageNum);
|
||
if (!reached) {
|
||
console.warn(`[客户详情] 无法跳到第 ${target.pageNum} 页,跳过 ${target.accountId}`);
|
||
continue;
|
||
}
|
||
currentListPage = target.pageNum;
|
||
}
|
||
|
||
const nextGroupKey = `${target.pageNum}`;
|
||
if (currentGroupKey !== nextGroupKey) {
|
||
currentGroupKey = nextGroupKey;
|
||
}
|
||
|
||
const clicked = await clickCustomerDetailFromListWithRetry(page, target);
|
||
if (!clicked) {
|
||
console.warn(`[客户详情] 列表中未找到 accountId=${target.accountId},跳过`);
|
||
continue;
|
||
}
|
||
|
||
try {
|
||
await page.waitForFunction(
|
||
(text) => document.body && document.body.innerText.includes(text),
|
||
'详情',
|
||
{ timeout: 15000 },
|
||
);
|
||
await sleep(1000);
|
||
} catch {
|
||
console.warn(`[客户详情] ${target.accountId} 详情页加载超时,跳过`);
|
||
await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null);
|
||
await waitForStableCustomerList(page).catch(() => null);
|
||
continue;
|
||
}
|
||
|
||
const detail = await extractCustomerDetail(page);
|
||
allDetails.push({ ...detail, __context: { accountId: target.accountId } });
|
||
await saveCustomerDetailsCheckpoint(dataset, index + 1, allDetails);
|
||
if (hasDbConfig()) {
|
||
const normalizedDetail = normalizeDatasetRecords(dataset, [{ ...detail, __context: { accountId: target.accountId } }], {});
|
||
await upsertCustomerDetails(normalizedDetail);
|
||
}
|
||
|
||
await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null);
|
||
await waitForStableCustomerList(page).catch(() => null);
|
||
currentListPage = target.pageNum;
|
||
}
|
||
|
||
return persistDataset(dataset, dedupeByHash(allDetails), {});
|
||
}
|
||
|
||
async function syncOrders(page, options = {}) {
|
||
await runtimeCheckpoint('同步订单');
|
||
const dataset = datasets.orders;
|
||
const { incremental = false, resume = false, hot = false } = options;
|
||
let windows;
|
||
|
||
if (hot) {
|
||
windows = buildTodayOrderWindow();
|
||
} else if (!incremental) {
|
||
windows = buildMonthlyDateWindows(config.orderStartDate);
|
||
} else {
|
||
windows = await buildIncrementalOrderWindows();
|
||
}
|
||
|
||
const resumeCheckpoint = resume ? loadLatestOrdersCheckpoint() : null;
|
||
if (resumeCheckpoint?.windowStart) {
|
||
const resumeIndex = windows.findIndex((window) => window.start === resumeCheckpoint.windowStart && window.end === resumeCheckpoint.windowEnd);
|
||
if (resumeIndex >= 0) {
|
||
windows = windows.slice(resumeIndex);
|
||
console.log(`[订单续爬] 从 checkpoint 恢复: ${resumeCheckpoint.windowStart} ~ ${resumeCheckpoint.windowEnd}, page=${resumeCheckpoint.pageNum || 1}, records=${(resumeCheckpoint.records || []).length}`);
|
||
}
|
||
}
|
||
|
||
const previousState = loadCurrentState(dataset.name, dataset.uniqueKey);
|
||
const previousRecords = previousState.records || [];
|
||
const previousOrderMap = new Map(previousRecords.map((record) => [String(record.orderId || '').trim(), record]));
|
||
const allNormalizedRecords = [];
|
||
const hotStats = {
|
||
pagesScanned: 0,
|
||
stableRows: 0,
|
||
newRows: 0,
|
||
changedRows: 0,
|
||
stoppedEarly: false,
|
||
stopReason: '',
|
||
};
|
||
|
||
for (const window of windows) {
|
||
await runtimeCheckpoint(`订单窗口 ${window.start} ~ ${window.end}`);
|
||
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
|
||
await waitUntilReady(page, dataset.heading);
|
||
await setDateRange(page, window.start, window.end);
|
||
await clickQuery(page);
|
||
await trySetPageSize(page, dataset.pageSize);
|
||
let windowNormalizedRecords = [];
|
||
let resumeFromPage = 0;
|
||
let shouldContinueScrape = true;
|
||
if (resumeCheckpoint?.windowStart === window.start && resumeCheckpoint?.windowEnd === window.end) {
|
||
windowNormalizedRecords = Array.isArray(resumeCheckpoint.records) ? resumeCheckpoint.records : [];
|
||
resumeFromPage = Number.parseInt(String(resumeCheckpoint.pageNum || 0), 10) || 0;
|
||
if (resumeFromPage > 0) {
|
||
const moved = await moveOrdersToResumeStart(page, resumeFromPage);
|
||
if (!moved) {
|
||
console.log(`[订单续爬] checkpoint 已在最后一页,无需继续抓取 window=${window.start}~${window.end}`);
|
||
shouldContinueScrape = false;
|
||
}
|
||
}
|
||
}
|
||
|
||
let records = [];
|
||
let stableRowsInARow = 0;
|
||
let stablePagesInARow = 0;
|
||
if (shouldContinueScrape) {
|
||
records = await scrapePagedTable(page, dataset, window, {
|
||
onPage: async ({ pageNum, pageRows }) => {
|
||
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, window);
|
||
windowNormalizedRecords.push(...normalizedPageRows);
|
||
if (hasDbConfig()) {
|
||
await upsertOrders(normalizedPageRows);
|
||
}
|
||
await saveOrdersCheckpoint(dataset, window, pageNum, windowNormalizedRecords);
|
||
|
||
if (hot) {
|
||
hotStats.pagesScanned += 1;
|
||
const pageSummary = summarizeHotPage(previousOrderMap, normalizedPageRows);
|
||
hotStats.stableRows += pageSummary.stableCount;
|
||
hotStats.newRows += pageSummary.newCount;
|
||
hotStats.changedRows += pageSummary.changedCount;
|
||
|
||
if (pageSummary.changedCount === 0 && pageSummary.newCount === 0) {
|
||
stablePagesInARow += 1;
|
||
} else {
|
||
stablePagesInARow = 0;
|
||
}
|
||
|
||
if (pageSummary.stableCount === normalizedPageRows.length && normalizedPageRows.length > 0) {
|
||
stableRowsInARow += normalizedPageRows.length;
|
||
} else {
|
||
stableRowsInARow = 0;
|
||
}
|
||
}
|
||
},
|
||
shouldStop: hot
|
||
? async ({ pageNum }) => {
|
||
if (pageNum >= config.hotOrderMaxPagesPerRun) {
|
||
hotStats.stoppedEarly = true;
|
||
hotStats.stopReason = `max_pages:${config.hotOrderMaxPagesPerRun}`;
|
||
return true;
|
||
}
|
||
if (stableRowsInARow >= config.hotOrderStableThreshold) {
|
||
hotStats.stoppedEarly = true;
|
||
hotStats.stopReason = `stable_rows:${stableRowsInARow}`;
|
||
return true;
|
||
}
|
||
if (stablePagesInARow >= config.hotOrderStablePageThreshold) {
|
||
hotStats.stoppedEarly = true;
|
||
hotStats.stopReason = `stable_pages:${stablePagesInARow}`;
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
: undefined,
|
||
});
|
||
}
|
||
|
||
if (resumeFromPage === 0) {
|
||
windowNormalizedRecords = normalizeDatasetRecords(dataset, records, window);
|
||
if (hasDbConfig()) {
|
||
await upsertOrders(dedupeByHash(windowNormalizedRecords));
|
||
}
|
||
}
|
||
|
||
allNormalizedRecords.push(...windowNormalizedRecords);
|
||
}
|
||
|
||
const normalizedRecords = dedupeByHash(allNormalizedRecords);
|
||
const changedOrderIds = computeChangedOrderIds(previousRecords, normalizedRecords);
|
||
const persisted = persistNormalizedDataset(dataset, normalizedRecords);
|
||
return {
|
||
...persisted,
|
||
changedOrderIds,
|
||
hot: hot ? hotStats : undefined,
|
||
};
|
||
}
|
||
|
||
async function buildIncrementalOrderWindows() {
|
||
const configuredStartDate = normalizeConfiguredDate(config.incrementalOrderStartDate);
|
||
if (configuredStartDate) {
|
||
const windows = buildMonthlyDateWindows(configuredStartDate);
|
||
console.log(`[增量模式] 订单从指定日期开始查询: ${configuredStartDate}`);
|
||
return windows;
|
||
}
|
||
|
||
if (!hasDbConfig()) {
|
||
const yesterday = new Date();
|
||
yesterday.setDate(yesterday.getDate() - 1);
|
||
const dateStr = formatDate(yesterday);
|
||
console.log(`[增量模式] 未配置数据库,订单仅查询: ${dateStr}`);
|
||
return buildSingleDateWindow(dateStr, dateStr);
|
||
}
|
||
|
||
const latestOrderTime = await getLatestOrderTimeFromDb();
|
||
const runAt = new Date();
|
||
const parsed = parseDbDateTime(latestOrderTime);
|
||
if (!parsed) {
|
||
const dateStr = formatDate(runAt);
|
||
console.log(`[增量模式] 数据库无订单水位,订单仅查询当天: ${dateStr}`);
|
||
return buildSingleDateWindow(dateStr, dateStr);
|
||
}
|
||
|
||
const startDate = formatDate(subtractDays(parsed, config.orderIncrementalOverlapDays));
|
||
const endDate = formatDate(runAt);
|
||
console.log(`[增量模式] 订单窗口: ${startDate} ~ ${endDate} (db_last=${latestOrderTime}, overlap=${config.orderIncrementalOverlapDays}d)`);
|
||
return buildSingleDateWindow(startDate, endDate);
|
||
}
|
||
|
||
function normalizeConfiguredDate(value) {
|
||
const normalized = String(value || '').trim();
|
||
if (!normalized) {
|
||
return '';
|
||
}
|
||
if (!/^\d{4}-\d{2}-\d{2}$/.test(normalized)) {
|
||
throw new Error(`ALIYUN_APS_INCREMENTAL_ORDER_START_DATE 格式无效: ${normalized},期望 YYYY-MM-DD`);
|
||
}
|
||
|
||
const parsed = new Date(`${normalized}T00:00:00+08:00`);
|
||
if (Number.isNaN(parsed.getTime())) {
|
||
throw new Error(`ALIYUN_APS_INCREMENTAL_ORDER_START_DATE 不是有效日期: ${normalized}`);
|
||
}
|
||
|
||
return normalized;
|
||
}
|
||
|
||
async function syncBills(page, options = {}) {
|
||
await runtimeCheckpoint('同步账单');
|
||
const dataset = datasets.bills;
|
||
const { resume = false, incremental = false } = options;
|
||
let months;
|
||
let latestConsumptionDate = null;
|
||
|
||
if (!incremental) {
|
||
months = buildMonthList(config.billStartMonth);
|
||
} else {
|
||
latestConsumptionDate = await getLatestBillConsumptionDate();
|
||
const startDate = latestConsumptionDate ? latestConsumptionDate.slice(0, 10) : formatDate(new Date());
|
||
const endDate = formatDate(new Date());
|
||
const startMonth = startDate.slice(0, 7);
|
||
const endMonth = endDate.slice(0, 7);
|
||
months = buildMonthList(startMonth).filter((month) => month <= endMonth);
|
||
console.log(`[增量模式] 账单窗口: ${startDate} ~ ${endDate}${latestConsumptionDate ? `, 数据库最新消费时间: ${latestConsumptionDate}` : ''}`);
|
||
}
|
||
|
||
const resumeCheckpoint = resume ? loadLatestBillsCheckpoint() : null;
|
||
if (resumeCheckpoint?.month) {
|
||
const resumeIndex = months.indexOf(resumeCheckpoint.month);
|
||
if (resumeIndex >= 0) {
|
||
months = months.slice(resumeIndex);
|
||
console.log(`[账单续爬] 从 checkpoint 恢复: month=${resumeCheckpoint.month}, page=${resumeCheckpoint.pageNum || 1}, records=${(resumeCheckpoint.records || []).length}`);
|
||
}
|
||
}
|
||
|
||
const allNormalizedRecords = [];
|
||
|
||
for (const month of months) {
|
||
await runtimeCheckpoint(`账单月份 ${month}`);
|
||
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
|
||
await waitUntilReady(page, dataset.heading);
|
||
await setMonthValue(page, month);
|
||
await clickQuery(page);
|
||
await trySetPageSize(page, dataset.pageSize);
|
||
|
||
let monthNormalizedRecords = [];
|
||
let resumeFromPage = 0;
|
||
let shouldContinueScrape = true;
|
||
if (resumeCheckpoint?.month === month) {
|
||
monthNormalizedRecords = Array.isArray(resumeCheckpoint.records) ? resumeCheckpoint.records : [];
|
||
resumeFromPage = Number.parseInt(String(resumeCheckpoint.pageNum || 0), 10) || 0;
|
||
if (resumeFromPage > 0) {
|
||
const moved = await moveBillsToResumeStart(page, resumeFromPage);
|
||
if (!moved) {
|
||
console.log(`[账单续爬] checkpoint 已在最后一页,无需继续抓取 month=${month}`);
|
||
shouldContinueScrape = false;
|
||
}
|
||
}
|
||
}
|
||
|
||
let rawRecords = [];
|
||
if (shouldContinueScrape) {
|
||
rawRecords = await scrapePagedTable(page, dataset, { month }, {
|
||
onPage: async ({ pageNum, pageRows }) => {
|
||
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, { month });
|
||
monthNormalizedRecords.push(...normalizedPageRows);
|
||
if (hasDbConfig()) {
|
||
await upsertBills(normalizedPageRows);
|
||
}
|
||
let checkpointRecords = monthNormalizedRecords;
|
||
if (latestConsumptionDate) {
|
||
checkpointRecords = monthNormalizedRecords.filter((record) => isAfterLatestConsumptionDate(record, latestConsumptionDate));
|
||
}
|
||
await saveBillsCheckpoint(dataset, month, pageNum, checkpointRecords);
|
||
},
|
||
});
|
||
}
|
||
|
||
if (resumeFromPage === 0) {
|
||
monthNormalizedRecords = normalizeDatasetRecords(dataset, rawRecords, { month });
|
||
}
|
||
|
||
if (latestConsumptionDate) {
|
||
const before = monthNormalizedRecords.length;
|
||
monthNormalizedRecords = monthNormalizedRecords.filter((record) => isAfterLatestConsumptionDate(record, latestConsumptionDate));
|
||
console.log(`[增量模式] 账单按消费时间过滤: ${before} -> ${monthNormalizedRecords.length}`);
|
||
}
|
||
allNormalizedRecords.push(...monthNormalizedRecords);
|
||
}
|
||
|
||
return persistNormalizedDataset(dataset, dedupeByHash(allNormalizedRecords));
|
||
}
|
||
|
||
async function syncMessages(page, options = {}) {
|
||
await runtimeCheckpoint('同步消息');
|
||
const dataset = datasets.messages;
|
||
const { incremental = false, resume = false, hot = false } = options;
|
||
const fullSyncWatermark = !incremental && !hot ? subtractMonths(new Date(), 3) : null;
|
||
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
|
||
await waitUntilReady(page, dataset.heading);
|
||
await trySetPageSize(page, dataset.pageSize);
|
||
|
||
const resumeCheckpoint = resume ? loadLatestMessagesCheckpoint() : null;
|
||
let resumeFromPage = Number.parseInt(String(resumeCheckpoint?.pageNum || 0), 10) || 0;
|
||
let shouldContinueScrape = true;
|
||
let allNormalizedRecords = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : [];
|
||
|
||
const shouldStopForFullSyncPage = (pageRows) => {
|
||
if (!fullSyncWatermark) {
|
||
return false;
|
||
}
|
||
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, {});
|
||
const pageTimeStats = getMessagePageTimeStats(normalizedPageRows);
|
||
console.log(`[全量模式] 当前页时间范围: parsed=${pageTimeStats.parsed}/${pageTimeStats.total}, earliest=${pageTimeStats.earliest || 'N/A'}, latest=${pageTimeStats.latest || 'N/A'}, watermark=${formatDateTime(fullSyncWatermark)}`);
|
||
return normalizedPageRows.length > 0
|
||
&& normalizedPageRows.every((record) => !isAfterLatestMessageTime(record, fullSyncWatermark));
|
||
};
|
||
|
||
if (resumeFromPage > 0) {
|
||
console.log(`[消息续爬] 从 checkpoint 恢复: page=${resumeFromPage}, records=${allNormalizedRecords.length}`);
|
||
const moved = await moveMessagesToResumeStart(page, resumeFromPage);
|
||
if (!moved) {
|
||
console.log('[消息续爬] checkpoint 已在最后一页,无需继续抓取');
|
||
shouldContinueScrape = false;
|
||
} else if (fullSyncWatermark) {
|
||
await waitForTableRows(page);
|
||
const resumedPageData = await extractTable(page);
|
||
if (shouldStopForFullSyncPage(resumedPageData.rows)) {
|
||
console.log(`[全量模式] 当前续爬页已超出近三个月范围,停止继续抓取: page=${resumeFromPage + 1}, watermark=${formatDateTime(fullSyncWatermark)}`);
|
||
shouldContinueScrape = false;
|
||
}
|
||
}
|
||
}
|
||
|
||
let records = [];
|
||
let hotWatermark = null;
|
||
let stopByExistingPage = false;
|
||
if (hot && hasDbConfig()) {
|
||
const latestMessageTime = await getLatestMessageTimeFromDb();
|
||
const latest = parseDbDateTime(latestMessageTime);
|
||
if (latest) {
|
||
hotWatermark = addMinutes(latest, -config.hotMessageOverlapMinutes);
|
||
}
|
||
}
|
||
if (shouldContinueScrape) {
|
||
records = await scrapePagedTable(page, dataset, {}, {
|
||
onPage: async ({ pageNum, pageRows }) => {
|
||
const detailedPageRows = await enrichMessageRowsWithDetails(page, pageRows, pageNum);
|
||
const normalizedPageRows = normalizeDatasetRecords(dataset, detailedPageRows, {});
|
||
const filteredPageRows = fullSyncWatermark
|
||
? normalizedPageRows.filter((record) => isAfterLatestMessageTime(record, fullSyncWatermark))
|
||
: normalizedPageRows;
|
||
let pageRowsToPersist = filteredPageRows;
|
||
if (hasDbConfig() && filteredPageRows.length > 0) {
|
||
const pageMsgIds = filteredPageRows.map((record) => record.msgId).filter(Boolean);
|
||
const existingIds = await getExistingMessageIds(pageMsgIds);
|
||
const fingerprintCandidates = filteredPageRows
|
||
.map((record) => String(record.receivedAt || record.gmtModified || record.gmtCreated || '').trim())
|
||
.filter(Boolean);
|
||
const existingFingerprintRows = await getExistingMessageFingerprints(fingerprintCandidates);
|
||
const existingFingerprints = new Set(
|
||
existingFingerprintRows.map((row) => buildMessageFingerprint({ title: row.title, receivedAt: row.received_at, orderNo: row.order_no })),
|
||
);
|
||
stopByExistingPage = filteredPageRows.length > 0 && filteredPageRows.every((record) => {
|
||
if (record.msgId) {
|
||
return existingIds.has(record.msgId);
|
||
}
|
||
return existingFingerprints.has(buildMessageFingerprint(record));
|
||
});
|
||
pageRowsToPersist = filteredPageRows.filter((record) => {
|
||
if (record.msgId) {
|
||
return !existingIds.has(record.msgId);
|
||
}
|
||
return !existingFingerprints.has(buildMessageFingerprint(record));
|
||
});
|
||
if (pageRowsToPersist.length > 0) {
|
||
await upsertMessages(pageRowsToPersist);
|
||
}
|
||
}
|
||
allNormalizedRecords.push(...pageRowsToPersist);
|
||
await saveMessagesCheckpoint(dataset, pageNum, allNormalizedRecords);
|
||
},
|
||
skipInitialPage: resumeFromPage > 0,
|
||
shouldStop: hot
|
||
? async ({ pageNum, pageRows }) => {
|
||
if (pageNum >= config.hotMessageMaxPagesPerRun) {
|
||
return true;
|
||
}
|
||
if (stopByExistingPage) {
|
||
return true;
|
||
}
|
||
if (!hotWatermark) {
|
||
return false;
|
||
}
|
||
const detailedPageRows = await enrichMessageRowsWithDetails(page, pageRows, pageNum);
|
||
const normalizedPageRows = normalizeDatasetRecords(dataset, detailedPageRows, {});
|
||
return normalizedPageRows.length > 0
|
||
&& normalizedPageRows.every((record) => !isAfterLatestMessageTime(record, hotWatermark));
|
||
}
|
||
: fullSyncWatermark
|
||
? async ({ pageNum, pageRows }) => {
|
||
const detailedPageRows = await enrichMessageRowsWithDetails(page, pageRows, pageNum);
|
||
return stopByExistingPage || shouldStopForFullSyncPage(detailedPageRows);
|
||
}
|
||
: undefined,
|
||
});
|
||
}
|
||
|
||
if (resumeFromPage === 0) {
|
||
allNormalizedRecords = normalizeDatasetRecords(dataset, records, {});
|
||
}
|
||
if (fullSyncWatermark) {
|
||
const before = allNormalizedRecords.length;
|
||
allNormalizedRecords = allNormalizedRecords.filter((record) => isAfterLatestMessageTime(record, fullSyncWatermark));
|
||
console.log(`[全量模式] 消息仅保留近三个月: ${before} -> ${allNormalizedRecords.length} (watermark=${formatDateTime(fullSyncWatermark)})`);
|
||
}
|
||
if ((incremental || hot) && hasDbConfig()) {
|
||
try {
|
||
const latestMessageTime = await getLatestMessageTimeFromDb();
|
||
if (latestMessageTime) {
|
||
const latest = parseDbDateTime(latestMessageTime);
|
||
if (latest) {
|
||
const watermark = hot
|
||
? addMinutes(latest, -config.hotMessageOverlapMinutes)
|
||
: subtractDays(latest, config.messageIncrementalOverlapDays);
|
||
const before = allNormalizedRecords.length;
|
||
allNormalizedRecords = allNormalizedRecords.filter((record) => isAfterLatestMessageTime(record, watermark));
|
||
console.log(`[${hot ? '高频模式' : '增量模式'}] 消息按时间过滤: ${before} -> ${allNormalizedRecords.length} (db_last=${latestMessageTime}, overlap=${hot ? `${config.hotMessageOverlapMinutes}m` : `${config.messageIncrementalOverlapDays}d`})`);
|
||
}
|
||
}
|
||
} catch (error) {
|
||
console.error(`[${hot ? '高频模式' : '增量模式'}] 查询数据库最新消息时间失败:`, error.message);
|
||
}
|
||
}
|
||
|
||
const previousState = loadCurrentState(dataset.name, dataset.uniqueKey);
|
||
return persistNormalizedDataset(dataset, dedupeByHash([...(previousState.records || []), ...allNormalizedRecords]));
|
||
}
|
||
|
||
async function saveMessagesCheckpoint(dataset, pageNum, normalizedRecords) {
|
||
const normalized = dedupeByHash(normalizedRecords);
|
||
saveCheckpoint(dataset.name, `page-${pageNum}`, {
|
||
pageNum,
|
||
savedAt: new Date().toISOString(),
|
||
stats: { total: normalized.length },
|
||
records: normalized,
|
||
});
|
||
console.log(`[消息检查点] 已落盘: page=${pageNum}, records=${normalized.length}`);
|
||
}
|
||
|
||
function loadLatestMessagesCheckpoint() {
|
||
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'messages');
|
||
if (!fs.existsSync(checkpointDir)) {
|
||
return null;
|
||
}
|
||
|
||
const candidates = fs.readdirSync(checkpointDir)
|
||
.filter((fileName) => fileName.endsWith('.json'))
|
||
.map((fileName) => {
|
||
const filePath = path.join(checkpointDir, fileName);
|
||
const stat = fs.statSync(filePath);
|
||
return { fileName, filePath, mtimeMs: stat.mtimeMs };
|
||
})
|
||
.sort((a, b) => b.mtimeMs - a.mtimeMs);
|
||
|
||
if (candidates.length === 0) {
|
||
return null;
|
||
}
|
||
|
||
try {
|
||
const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8'));
|
||
if (!latest || typeof latest !== 'object') {
|
||
return null;
|
||
}
|
||
return latest;
|
||
} catch (error) {
|
||
console.warn(`[消息检查点] 读取失败,忽略断点续爬: ${error.message}`);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function moveMessagesToResumeStart(page, resumeFromPage) {
|
||
if (resumeFromPage <= 0) {
|
||
return true;
|
||
}
|
||
|
||
const reached = await jumpToPage(page, resumeFromPage);
|
||
if (!reached) {
|
||
throw new Error(`消息续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`);
|
||
}
|
||
|
||
const moved = await gotoNextPage(page);
|
||
return moved;
|
||
}
|
||
|
||
async function saveBillsCheckpoint(dataset, month, pageNum, normalizedRecords) {
|
||
const normalized = dedupeByHash(normalizedRecords);
|
||
const checkpointName = `${month}-latest`;
|
||
saveCheckpoint(dataset.name, checkpointName, {
|
||
month,
|
||
pageNum,
|
||
savedAt: new Date().toISOString(),
|
||
stats: {
|
||
total: normalized.length,
|
||
},
|
||
records: normalized,
|
||
});
|
||
console.log(`[账单检查点] 已落盘: month=${month}, page=${pageNum}, records=${normalized.length}`);
|
||
}
|
||
|
||
async function saveOrdersCheckpoint(dataset, window, pageNum, normalizedRecords) {
|
||
const normalized = dedupeByHash(normalizedRecords);
|
||
const checkpointName = `${window.start}_${window.end}`.replace(/[^0-9_-]/g, '-');
|
||
saveCheckpoint(dataset.name, checkpointName, {
|
||
windowStart: window.start,
|
||
windowEnd: window.end,
|
||
pageNum,
|
||
savedAt: new Date().toISOString(),
|
||
stats: {
|
||
total: normalized.length,
|
||
},
|
||
records: normalized,
|
||
});
|
||
console.log(`[订单检查点] 已落盘: ${window.start} ~ ${window.end}, page=${pageNum}, records=${normalized.length}`);
|
||
}
|
||
|
||
async function saveCustomersCheckpoint(dataset, pageNum, normalizedRecords) {
|
||
const normalized = dedupeByHash(normalizedRecords);
|
||
saveCheckpoint(dataset.name, `page-${pageNum}`, {
|
||
pageNum,
|
||
savedAt: new Date().toISOString(),
|
||
stats: { total: normalized.length },
|
||
records: normalized,
|
||
});
|
||
console.log(`[客户检查点] 已落盘: page=${pageNum}, records=${normalized.length}`);
|
||
}
|
||
|
||
function loadLatestCustomersCheckpoint() {
|
||
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'customers');
|
||
if (!fs.existsSync(checkpointDir)) {
|
||
return null;
|
||
}
|
||
|
||
const candidates = fs.readdirSync(checkpointDir)
|
||
.filter((fileName) => fileName.endsWith('.json'))
|
||
.map((fileName) => {
|
||
const filePath = path.join(checkpointDir, fileName);
|
||
const stat = fs.statSync(filePath);
|
||
return { fileName, filePath, mtimeMs: stat.mtimeMs };
|
||
})
|
||
.sort((a, b) => b.mtimeMs - a.mtimeMs);
|
||
|
||
if (candidates.length === 0) {
|
||
return null;
|
||
}
|
||
|
||
try {
|
||
const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8'));
|
||
if (!latest || typeof latest !== 'object') {
|
||
return null;
|
||
}
|
||
return latest;
|
||
} catch (error) {
|
||
console.warn(`[客户检查点] 读取失败,忽略断点续爬: ${error.message}`);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function saveCustomerDetailsCheckpoint(dataset, currentIndex, records) {
|
||
const normalized = dedupeByHash(records);
|
||
saveCheckpoint(dataset.name, 'latest', {
|
||
currentIndex,
|
||
savedAt: new Date().toISOString(),
|
||
stats: { total: normalized.length },
|
||
records: normalized,
|
||
});
|
||
console.log(`[客户详情检查点] 已落盘: index=${currentIndex}, records=${normalized.length}`);
|
||
}
|
||
|
||
function loadLatestCustomerDetailsCheckpoint() {
|
||
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'customerDetails');
|
||
if (!fs.existsSync(checkpointDir)) {
|
||
return null;
|
||
}
|
||
|
||
const latestFile = path.join(checkpointDir, 'latest.json');
|
||
if (!fs.existsSync(latestFile)) {
|
||
const candidates = fs.readdirSync(checkpointDir).filter((fileName) => fileName.endsWith('.json'));
|
||
if (candidates.length === 0) {
|
||
return null;
|
||
}
|
||
return JSON.parse(fs.readFileSync(path.join(checkpointDir, candidates[0]), 'utf-8'));
|
||
}
|
||
|
||
try {
|
||
return JSON.parse(fs.readFileSync(latestFile, 'utf-8'));
|
||
} catch (error) {
|
||
console.warn(`[客户详情检查点] 读取失败: ${error.message}`);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function saveOrderDetailsCheckpoint(dataset, currentIndex, records) {
|
||
const normalized = dedupeByHash(records);
|
||
saveCheckpoint(dataset.name, 'latest', {
|
||
currentIndex,
|
||
savedAt: new Date().toISOString(),
|
||
stats: { total: normalized.length },
|
||
records: normalized,
|
||
});
|
||
console.log(`[订单详情检查点] 已落盘: index=${currentIndex}, records=${normalized.length}`);
|
||
}
|
||
|
||
function loadLatestOrderDetailsCheckpoint() {
|
||
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'orderDetails');
|
||
if (!fs.existsSync(checkpointDir)) {
|
||
return null;
|
||
}
|
||
|
||
const latestFile = path.join(checkpointDir, 'latest.json');
|
||
if (!fs.existsSync(latestFile)) {
|
||
const candidates = fs.readdirSync(checkpointDir).filter((fileName) => fileName.endsWith('.json'));
|
||
if (candidates.length === 0) {
|
||
return null;
|
||
}
|
||
return JSON.parse(fs.readFileSync(path.join(checkpointDir, candidates[0]), 'utf-8'));
|
||
}
|
||
|
||
try {
|
||
return JSON.parse(fs.readFileSync(latestFile, 'utf-8'));
|
||
} catch (error) {
|
||
console.warn(`[订单详情检查点] 读取失败: ${error.message}`);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
function normalizeDatasetRecords(dataset, records, context) {
|
||
return records.map((record) => withHash(dataset.normalize(record, record.__context || context)));
|
||
}
|
||
|
||
async function moveBillsToResumeStart(page, resumeFromPage) {
|
||
if (resumeFromPage <= 0) {
|
||
return true;
|
||
}
|
||
|
||
const reached = await jumpToPage(page, resumeFromPage);
|
||
if (!reached) {
|
||
throw new Error(`账单续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`);
|
||
}
|
||
|
||
const moved = await gotoNextPage(page);
|
||
return moved;
|
||
}
|
||
|
||
async function moveOrdersToResumeStart(page, resumeFromPage) {
|
||
if (resumeFromPage <= 0) {
|
||
return true;
|
||
}
|
||
|
||
const reached = await jumpToPage(page, resumeFromPage);
|
||
if (!reached) {
|
||
throw new Error(`订单续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`);
|
||
}
|
||
|
||
const moved = await gotoNextPage(page);
|
||
return moved;
|
||
}
|
||
|
||
async function moveCustomersToResumeStart(page, resumeFromPage) {
|
||
if (resumeFromPage <= 0) {
|
||
return true;
|
||
}
|
||
|
||
const reached = await jumpToPage(page, resumeFromPage);
|
||
if (!reached) {
|
||
throw new Error(`客户续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`);
|
||
}
|
||
|
||
const moved = await gotoNextPage(page);
|
||
return moved;
|
||
}
|
||
|
||
async function getLatestBillConsumptionDate() {
|
||
if (!hasDbConfig()) {
|
||
console.warn('[增量模式] 未配置数据库连接,无法读取账单水位,回退到当前日期');
|
||
return null;
|
||
}
|
||
|
||
try {
|
||
const latest = await getLatestBillConsumptionTimeFromDb();
|
||
if (!latest || !/^\d{4}-\d{2}-\d{2}/.test(latest)) {
|
||
return null;
|
||
}
|
||
const parsed = parseDbDateTime(latest);
|
||
if (!parsed) {
|
||
return latest.slice(0, 10);
|
||
}
|
||
return formatDate(subtractDays(parsed, config.billIncrementalOverlapDays));
|
||
} catch (error) {
|
||
console.error('[增量模式] 查询数据库最新账单消费时间失败:', error.message);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
function isAfterLatestConsumptionDate(record, latestConsumptionDate) {
|
||
const consumeDate = String(record['消费时间'] || record.consumeDate || '').trim().slice(0, 10);
|
||
if (!/^\d{4}-\d{2}-\d{2}$/.test(consumeDate)) {
|
||
return false;
|
||
}
|
||
return consumeDate > latestConsumptionDate;
|
||
}
|
||
|
||
function isAfterLatestMessageTime(record, watermarkDate) {
|
||
const value = String(record['消息修改时间'] || record['修改时间'] || record.gmtModified || record['消息创建时间'] || record['创建时间'] || record.gmtCreated || '').trim();
|
||
if (!value) {
|
||
return false;
|
||
}
|
||
const parsed = parseDbDateTime(value);
|
||
if (!parsed) {
|
||
return false;
|
||
}
|
||
return parsed >= watermarkDate;
|
||
}
|
||
|
||
function extractMessageTime(record) {
|
||
const value = String(record['消息修改时间'] || record['修改时间'] || record.gmtModified || record['消息创建时间'] || record['创建时间'] || record.gmtCreated || '').trim();
|
||
if (!value) {
|
||
return null;
|
||
}
|
||
return parseDbDateTime(value);
|
||
}
|
||
|
||
function getMessagePageTimeStats(records) {
|
||
const parsedTimes = records
|
||
.map((record) => extractMessageTime(record))
|
||
.filter(Boolean)
|
||
.sort((a, b) => a.getTime() - b.getTime());
|
||
|
||
if (parsedTimes.length === 0) {
|
||
return {
|
||
total: records.length,
|
||
parsed: 0,
|
||
earliest: '',
|
||
latest: '',
|
||
};
|
||
}
|
||
|
||
return {
|
||
total: records.length,
|
||
parsed: parsedTimes.length,
|
||
earliest: formatDateTime(parsedTimes[0]),
|
||
latest: formatDateTime(parsedTimes[parsedTimes.length - 1]),
|
||
};
|
||
}
|
||
|
||
function buildMessageFingerprint(record) {
|
||
const title = String(record.title || record.detailTitle || record.column_1 || '').trim();
|
||
const receivedAt = String(record.receivedAt || record.gmtModified || record.gmtCreated || record.column_2 || '').trim();
|
||
const orderNo = String(record.orderNo || record.refundOrderNo || '').trim();
|
||
return `${title}__${receivedAt}__${orderNo}`;
|
||
}
|
||
|
||
async function syncOrderDetails(page, cachedOrderIds, options = {}) {
|
||
await runtimeCheckpoint('同步订单详情');
|
||
const dataset = datasets.orderDetails;
|
||
const resumeCheckpoint = options.resume ? loadLatestOrderDetailsCheckpoint() : null;
|
||
|
||
const ordersState = loadCurrentState('orders', datasets.orders.uniqueKey);
|
||
const orderTargets = collectOrderDetailTargets(ordersState.records || [], cachedOrderIds || []);
|
||
|
||
if (orderTargets.length === 0) {
|
||
console.log('[订单详情] 本地无订单定位数据,跳过');
|
||
return persistDataset(dataset, [], {});
|
||
}
|
||
|
||
console.log(`[订单详情] 共 ${orderTargets.length} 个订单需要获取详情`);
|
||
const allDetails = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : [];
|
||
const startIndex = Number.parseInt(String(resumeCheckpoint?.currentIndex || 0), 10) || 0;
|
||
if (startIndex > 0) {
|
||
console.log(`[订单详情续爬] 从 checkpoint 恢复: index=${startIndex}, records=${allDetails.length}`);
|
||
}
|
||
let currentListPage = 0;
|
||
let currentWindowKey = '';
|
||
let currentGroupKey = '';
|
||
|
||
await page.goto(datasets.orders.url, { waitUntil: 'domcontentloaded' });
|
||
await waitUntilReady(page, datasets.orders.heading);
|
||
await trySetPageSize(page, datasets.orders.pageSize);
|
||
|
||
for (let index = startIndex; index < orderTargets.length; index += 1) {
|
||
await runtimeCheckpoint(`订单详情 ${index + 1}/${orderTargets.length}`);
|
||
const target = orderTargets[index];
|
||
console.log(`[订单详情] ${index + 1}/${orderTargets.length} orderId=${target.orderId} page=${target.pageNum} window=${target.windowStart}~${target.windowEnd}`);
|
||
|
||
const nextWindowKey = `${target.windowStart}|${target.windowEnd}`;
|
||
if (target.windowStart && target.windowEnd && currentWindowKey !== nextWindowKey) {
|
||
await restoreOrderWindow(page, target.windowStart, target.windowEnd);
|
||
currentWindowKey = nextWindowKey;
|
||
currentListPage = 1;
|
||
}
|
||
|
||
const nextGroupKey = `${nextWindowKey}|${target.pageNum}`;
|
||
if (target.pageNum > 0 && (currentListPage !== target.pageNum || currentGroupKey !== nextGroupKey)) {
|
||
const reached = await jumpToOrderPage(page, target.pageNum);
|
||
if (!reached) {
|
||
console.warn(`[订单详情] 无法跳到第 ${target.pageNum} 页,跳过 ${target.orderId}`);
|
||
continue;
|
||
}
|
||
currentListPage = target.pageNum;
|
||
}
|
||
currentGroupKey = nextGroupKey;
|
||
|
||
const clicked = await clickOrderDetailFromListWithRetry(page, target);
|
||
if (!clicked) {
|
||
console.warn(`[订单详情] 列表中未找到 orderId=${target.orderId},跳过`);
|
||
continue;
|
||
}
|
||
|
||
try {
|
||
await page.waitForFunction(
|
||
(text) => document.body && document.body.innerText.includes(text),
|
||
'订单详情',
|
||
{ timeout: 15000 },
|
||
);
|
||
await sleep(1000);
|
||
} catch {
|
||
console.warn(`[订单详情] ${target.orderId} 详情页加载超时,跳过`);
|
||
await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null);
|
||
await waitForStableOrderList(page).catch(() => null);
|
||
continue;
|
||
}
|
||
|
||
const detail = await extractOrderDetail(page);
|
||
if (!isValidOrderId(detail.orderId)) {
|
||
detail.orderId = target.orderId;
|
||
}
|
||
const detailContext = { detailSyncedAt: new Date().toISOString() };
|
||
allDetails.push({ ...detail, __context: detailContext });
|
||
await saveOrderDetailsCheckpoint(dataset, index + 1, allDetails);
|
||
if (hasDbConfig()) {
|
||
const normalizedDetail = normalizeDatasetRecords(dataset, [{ ...detail, __context: detailContext }], detailContext);
|
||
await upsertOrderDetails(normalizedDetail);
|
||
}
|
||
|
||
await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null);
|
||
await waitForStableOrderList(page).catch(() => null);
|
||
currentListPage = target.pageNum;
|
||
}
|
||
|
||
return persistDataset(dataset, dedupeByHash(allDetails), {});
|
||
}
|
||
|
||
function persistDataset(dataset, records, context) {
|
||
const normalized = normalizeDatasetRecords(dataset, records, context);
|
||
return persistNormalizedDataset(dataset, normalized);
|
||
}
|
||
|
||
function persistNormalizedDataset(dataset, normalizedRecords) {
|
||
const previousState = loadCurrentState(dataset.name, dataset.uniqueKey);
|
||
const nextState = diffRecords(previousState, normalizedRecords, dataset.uniqueKey);
|
||
const stamp = saveDatasetRun(dataset.name, nextState);
|
||
saveDelta(dataset.name, stamp, nextState.delta);
|
||
return {
|
||
stamp,
|
||
stats: nextState.stats,
|
||
};
|
||
}
|
||
|
||
async function waitUntilReady(page, heading, timeout = 120000, options = {}) {
|
||
await runtimeCheckpoint(`等待页面 ${heading}`);
|
||
const { allowInteractiveAuth = false } = options;
|
||
await page.waitForLoadState('domcontentloaded');
|
||
console.log(`[waitUntilReady] 当前URL: ${page.url()}`);
|
||
console.log(`[waitUntilReady] 等待页面出现: "${heading}"`);
|
||
|
||
const initialState = await detectAuthRedirect(page);
|
||
if (initialState.isAuthPage) {
|
||
console.error(`[waitUntilReady] 检测到登录页/鉴权页: ${initialState.currentUrl}`);
|
||
console.error(`[waitUntilReady] 页面内容前500字: ${initialState.bodyText}`);
|
||
if (!allowInteractiveAuth && isAuthUrl(initialState.currentUrl)) {
|
||
try {
|
||
await sendLoginAlert(initialState.currentUrl);
|
||
} catch (notifyErr) {
|
||
console.error('[通知] 发送登录提醒失败:', notifyErr.message);
|
||
}
|
||
}
|
||
if (!allowInteractiveAuth) {
|
||
throw new Error(`当前页面仍处于登录/鉴权页,无法进入「${heading}」。请重新执行 npm run login,并确认该账号对该页面有访问权限。`);
|
||
}
|
||
console.log(`[waitUntilReady] 允许交互式登录,等待用户完成认证后进入「${heading}」...`);
|
||
}
|
||
|
||
try {
|
||
await page.waitForFunction(
|
||
(text) => document.body && document.body.innerText.includes(text),
|
||
heading,
|
||
{ timeout },
|
||
);
|
||
} catch (err) {
|
||
// 超时时打印诊断信息
|
||
const { currentUrl, bodyText, isAuthPage } = await detectAuthRedirect(page);
|
||
console.error(`[waitUntilReady] 超时!当前URL: ${currentUrl}`);
|
||
console.error(`[waitUntilReady] 页面内容前500字: ${bodyText}`);
|
||
if (isAuthPage && !allowInteractiveAuth) {
|
||
try {
|
||
await sendLoginAlert(currentUrl);
|
||
} catch (notifyErr) {
|
||
console.error('[通知] 发送登录提醒失败:', notifyErr.message);
|
||
}
|
||
throw new Error(`当前页面停留在登录/鉴权页,未能进入「${heading}」。请重新执行 npm run login,并确认该账号对该页面有访问权限。`);
|
||
}
|
||
if (isAuthPage && allowInteractiveAuth) {
|
||
throw new Error(`交互式登录超时,仍未进入「${heading}」。请确认已在浏览器中完成 RAM/阿里云登录,并且当前账号有访问该页面的权限。`);
|
||
}
|
||
throw err;
|
||
}
|
||
|
||
const finalState = await detectAuthRedirect(page);
|
||
if (finalState.isAuthPage && !allowInteractiveAuth) {
|
||
throw new Error(`当前页面仍处于登录/鉴权页,未成功进入「${heading}」。请重新执行 npm run login,并确认该账号对该页面有访问权限。`);
|
||
}
|
||
await sleep(1500);
|
||
}
|
||
|
||
async function scrapePagedTable(page, dataset, context, options = {}) {
|
||
const { onPage, skipInitialPage = false, shouldStop } = options;
|
||
const pages = [];
|
||
const visited = new Set();
|
||
let shouldSkipCurrentPage = skipInitialPage;
|
||
|
||
const describeStopReason = (reason) => {
|
||
if (!reason) {
|
||
return 'unknown';
|
||
}
|
||
const details = [];
|
||
if (reason.beforePage != null) {
|
||
details.push(`before=${reason.beforePage}`);
|
||
}
|
||
if (reason.afterPage != null) {
|
||
details.push(`after=${reason.afterPage}`);
|
||
}
|
||
return details.length > 0 ? `${reason.code} (${details.join(', ')})` : reason.code;
|
||
};
|
||
|
||
while (true) {
|
||
await runtimeCheckpoint(`抓取 ${dataset.name} 分页`);
|
||
await waitForTableRows(page);
|
||
const pageData = await extractTable(page);
|
||
const pageNum = await currentPageNumber(page);
|
||
const pageKey = `${pageNum}-${pageData.rows.length}`;
|
||
console.log(`[抓取] 第${pageNum}页, ${pageData.rows.length}行, key="${pageKey}"`);
|
||
if (shouldSkipCurrentPage) {
|
||
console.log(`[抓取] 跳过 checkpoint 已保存页: ${pageNum}`);
|
||
shouldSkipCurrentPage = false;
|
||
const { moved, reason } = await gotoNextPage(page);
|
||
if (!moved) {
|
||
console.log(`[抓取] checkpoint 已停止续爬: ${describeStopReason(reason)}`);
|
||
break;
|
||
}
|
||
continue;
|
||
}
|
||
if (visited.has(pageKey)) {
|
||
console.log(`[抓取] 重复页面key,停止翻页`);
|
||
break;
|
||
}
|
||
visited.add(pageKey);
|
||
const pageRows = pageData.rows.map((row) => ({ ...row, __context: { ...context, pageNum } }));
|
||
pages.push(...pageRows);
|
||
if (onPage) {
|
||
await onPage({ pageData, pageNum, pageRows });
|
||
}
|
||
|
||
if (shouldStop && await shouldStop({ pageData, pageNum, pageRows, pages })) {
|
||
console.log(`[抓取] 满足停止条件,在第${pageNum}页提前停止`);
|
||
break;
|
||
}
|
||
|
||
const { moved, reason } = await gotoNextPage(page);
|
||
if (!moved) {
|
||
console.log(`[抓取] 停止翻页: ${describeStopReason(reason)}`);
|
||
break;
|
||
}
|
||
}
|
||
|
||
console.log(`[抓取] 共采集 ${pages.length} 条记录`);
|
||
return pages;
|
||
}
|
||
|
||
async function raiseIfSessionExpired(page, label) {
|
||
const { currentUrl, bodyText, isAuthPage } = await detectAuthRedirect(page);
|
||
if (!isAuthPage) {
|
||
return;
|
||
}
|
||
|
||
console.error(`[鉴权] ${label} 时检测到登录页/鉴权页: ${currentUrl}`);
|
||
console.error(`[鉴权] 页面内容前500字: ${bodyText}`);
|
||
try {
|
||
await sendLoginAlert(currentUrl);
|
||
} catch (notifyErr) {
|
||
console.error('[通知] 发送登录提醒失败:', notifyErr.message);
|
||
}
|
||
throw new Error(`运行过程中登录态失效(${label})。请重新执行 npm run login 后再继续同步。`);
|
||
}
|
||
|
||
async function extractTable(page) {
|
||
return page.evaluate(() => {
|
||
const normalize = (value) =>
|
||
String(value || '')
|
||
.replace(/\u00a0/g, ' ')
|
||
.replace(/\s+\n/g, '\n')
|
||
.replace(/\n\s+/g, '\n')
|
||
.trim();
|
||
|
||
const toRecords = (headers, rows) => rows
|
||
.map((cells) => cells.map((cell) => normalize(cell)))
|
||
.filter((cells) => cells.some(Boolean))
|
||
.map((cells) => {
|
||
const record = {};
|
||
const keys = headers.length ? headers : cells.map((_, index) => `column_${index + 1}`);
|
||
keys.forEach((header, index) => {
|
||
record[header || `column_${index + 1}`] = cells[index] || '';
|
||
});
|
||
return record;
|
||
});
|
||
|
||
const extractFromNativeTables = () => {
|
||
const headerTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('thead th').length > 1);
|
||
const headerTable = headerTables.sort((a, b) => b.querySelectorAll('thead th').length - a.querySelectorAll('thead th').length)[0];
|
||
const headers = headerTable
|
||
? Array.from(headerTable.querySelectorAll('thead th')).map((cell) => normalize(cell.textContent))
|
||
: [];
|
||
|
||
const bodyTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('tbody tr').length > 0);
|
||
const bodyTable = bodyTables.sort((a, b) => {
|
||
const aSize = Math.max(...Array.from(a.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0);
|
||
const bSize = Math.max(...Array.from(b.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0);
|
||
return bSize - aSize;
|
||
})[0];
|
||
if (!bodyTable) {
|
||
return { headers, rows: [] };
|
||
}
|
||
|
||
const rows = Array.from(bodyTable.querySelectorAll('tbody tr'))
|
||
.map((row) => Array.from(row.querySelectorAll('td')).map((cell) => normalize(cell.innerText || cell.textContent)));
|
||
return { headers, rows: toRecords(headers, rows) };
|
||
};
|
||
|
||
const extractFromNextTable = () => {
|
||
const container = document.querySelector('.next-table, .next-table-inner, [class*="next-table"]');
|
||
if (!container) {
|
||
return { headers: [], rows: [] };
|
||
}
|
||
|
||
const headers = Array.from(container.querySelectorAll('.next-table-header .next-table-cell, .next-table-header th, [role="columnheader"]'))
|
||
.map((cell) => normalize(cell.innerText || cell.textContent))
|
||
.filter(Boolean);
|
||
|
||
const rowCandidates = Array.from(container.querySelectorAll('.next-table-body .next-table-row, .next-table-row, [role="row"]'));
|
||
const rows = rowCandidates
|
||
.map((row) => {
|
||
const cells = Array.from(row.querySelectorAll('.next-table-cell, [role="gridcell"], [role="cell"], td'))
|
||
.map((cell) => normalize(cell.innerText || cell.textContent));
|
||
return cells;
|
||
})
|
||
.filter((cells) => cells.length > 0 && cells.some(Boolean));
|
||
|
||
return { headers, rows: toRecords(headers, rows) };
|
||
};
|
||
|
||
const nativeResult = extractFromNativeTables();
|
||
if (nativeResult.rows.length > 0) {
|
||
return nativeResult;
|
||
}
|
||
|
||
const nextTableResult = extractFromNextTable();
|
||
if (nextTableResult.rows.length > 0) {
|
||
return nextTableResult;
|
||
}
|
||
|
||
return nextTableResult.headers.length > 0 ? nextTableResult : nativeResult;
|
||
});
|
||
}
|
||
|
||
function isTargetClosedError(error) {
|
||
const message = String(error?.message || error || '');
|
||
return message.includes('Target page, context or browser has been closed');
|
||
}
|
||
|
||
function assertPageAvailable(page, label) {
|
||
if (!page || page.isClosed?.()) {
|
||
throw new Error(`页面在${label}前已被关闭。请检查是否手动关闭了浏览器,或浏览器是否异常退出,然后重新执行同步。`);
|
||
}
|
||
}
|
||
|
||
async function waitForTableRows(page) {
|
||
await runtimeCheckpoint('等待表格加载');
|
||
assertPageAvailable(page, '等待表格加载');
|
||
try {
|
||
await page.waitForFunction(() => {
|
||
const nativeRows = document.querySelectorAll('table tbody tr').length;
|
||
const nextRows = document.querySelectorAll('.next-table-body .next-table-row, .next-table-row, [role="row"]').length;
|
||
const emptyState = document.querySelector('.next-table-empty, .next-empty, [class*="empty"], [class*="no-data"]');
|
||
return nativeRows > 0 || nextRows > 0 || Boolean(emptyState);
|
||
}, null, { timeout: 120000 });
|
||
} catch (error) {
|
||
if (isTargetClosedError(error)) {
|
||
throw new Error('等待消息表格加载时,浏览器页面已被关闭。请勿手动关闭浏览器窗口,并检查浏览器是否异常退出后重试。');
|
||
}
|
||
await raiseIfSessionExpired(page, '等待表格加载');
|
||
throw error;
|
||
}
|
||
await sleep(800);
|
||
}
|
||
|
||
async function currentPageNumber(page) {
|
||
assertPageAvailable(page, '读取当前页码');
|
||
try {
|
||
const active = page.locator('.next-pagination-item.next-current');
|
||
if ((await active.count()) === 0) return 1;
|
||
return Number.parseInt((await active.first().innerText()).trim(), 10) || 1;
|
||
} catch (error) {
|
||
if (isTargetClosedError(error)) {
|
||
throw new Error('读取分页页码时,浏览器页面已被关闭。请勿手动关闭浏览器窗口,并检查浏览器是否异常退出后重试。');
|
||
}
|
||
throw error;
|
||
}
|
||
}
|
||
|
||
async function jumpToPage(page, targetPage, options = {}) {
|
||
const { allowSequentialFallback = true } = options;
|
||
if (targetPage <= 1) {
|
||
return true;
|
||
}
|
||
|
||
const current = await currentPageNumber(page);
|
||
if (current === targetPage) {
|
||
return true;
|
||
}
|
||
|
||
const jumpInputCandidates = [
|
||
'.next-pagination-jump-input input',
|
||
'input[aria-label*="页码"]',
|
||
'input[aria-label*="页"]',
|
||
];
|
||
|
||
for (const selector of jumpInputCandidates) {
|
||
const input = page.locator(selector).first();
|
||
if ((await input.count()) === 0) {
|
||
continue;
|
||
}
|
||
await input.click().catch(() => null);
|
||
await sleep(100);
|
||
await page.keyboard.press('Control+A').catch(() => null);
|
||
await page.keyboard.type(String(targetPage), { delay: 20 }).catch(() => null);
|
||
await page.keyboard.press('Enter').catch(() => null);
|
||
await sleep(1500);
|
||
const afterJump = await currentPageNumber(page);
|
||
if (afterJump === targetPage) {
|
||
console.log(`[跳页] 已跳转到第 ${targetPage} 页`);
|
||
return true;
|
||
}
|
||
}
|
||
|
||
if (!allowSequentialFallback) {
|
||
console.warn(`[跳页] 未找到可用跳页输入框,且当前模式禁止顺序兜底: target=${targetPage}`);
|
||
return false;
|
||
}
|
||
|
||
console.warn(`[跳页] 未找到可用跳页输入框,尝试顺序翻到第 ${targetPage} 页`);
|
||
let guard = 0;
|
||
while (guard < targetPage + 5) {
|
||
const currentPage = await currentPageNumber(page);
|
||
if (currentPage >= targetPage) {
|
||
return currentPage === targetPage;
|
||
}
|
||
const { moved } = await gotoNextPage(page);
|
||
if (!moved) {
|
||
return false;
|
||
}
|
||
guard += 1;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
async function gotoNextPage(page) {
|
||
await runtimeCheckpoint('翻页');
|
||
assertPageAvailable(page, '翻页');
|
||
const before = await currentPageNumber(page);
|
||
|
||
try {
|
||
// 用 Playwright locator 定位"下一页"按钮
|
||
const nextBtn = page.locator('button.next-pagination-item.next-next');
|
||
if ((await nextBtn.count()) === 0) {
|
||
return {
|
||
moved: false,
|
||
reason: { code: 'next_button_missing', beforePage: before },
|
||
};
|
||
}
|
||
|
||
const disabled = (await nextBtn.getAttribute('disabled')) != null;
|
||
if (disabled) {
|
||
return {
|
||
moved: false,
|
||
reason: { code: 'next_button_disabled', beforePage: before },
|
||
};
|
||
}
|
||
|
||
// 用 Playwright click(而非 DOM click),确保 React 事件正常触发
|
||
await nextBtn.click();
|
||
await sleep(2000);
|
||
await raiseIfSessionExpired(page, `翻页 ${before} -> next`);
|
||
|
||
const after = await currentPageNumber(page);
|
||
console.log(`[翻页] ${before} -> ${after}`);
|
||
|
||
if (before > 1 && after === 1) {
|
||
throw new Error(`分页从第 ${before} 页异常回退到第 1 页,疑似登录态失效或页面会话已重置。请重新执行 npm run login 后再继续同步。`);
|
||
}
|
||
|
||
if (after < before) {
|
||
throw new Error(`分页从第 ${before} 页异常回退到第 ${after} 页,疑似登录态失效或页面状态被重置。请重新执行 npm run login 后再继续同步。`);
|
||
}
|
||
|
||
if (before === after) {
|
||
const fallbackTarget = before + 1;
|
||
console.warn(`[翻页] next 点击后页码未推进,尝试跳页到 ${fallbackTarget}`);
|
||
const jumped = await jumpToPage(page, fallbackTarget, { allowSequentialFallback: false });
|
||
if (jumped) {
|
||
const afterJump = await currentPageNumber(page);
|
||
console.log(`[翻页] fallback jump ${before} -> ${afterJump}`);
|
||
return {
|
||
moved: true,
|
||
reason: { code: 'advanced_via_jump', beforePage: before, afterPage: afterJump },
|
||
};
|
||
}
|
||
return {
|
||
moved: false,
|
||
reason: { code: 'page_number_not_advanced', beforePage: before, afterPage: after },
|
||
};
|
||
}
|
||
|
||
return {
|
||
moved: true,
|
||
reason: { code: 'advanced', beforePage: before, afterPage: after },
|
||
};
|
||
} catch (error) {
|
||
if (isTargetClosedError(error)) {
|
||
throw new Error(`翻页到下一页时,浏览器页面在第 ${before} 页之后被关闭。请勿手动关闭浏览器窗口,并检查浏览器是否异常退出后重试。`);
|
||
}
|
||
throw error;
|
||
}
|
||
}
|
||
|
||
async function trySetPageSize(page, pageSize) {
|
||
await runtimeCheckpoint(`设置每页 ${pageSize}`);
|
||
const input = page.locator('input[aria-label="请选择每页显示几条"]').first();
|
||
if ((await input.count()) === 0) return;
|
||
await input.click().catch(() => null);
|
||
await sleep(300);
|
||
const option = page.locator(`text=${pageSize}`).last();
|
||
if ((await option.count()) === 0) {
|
||
await page.keyboard.press('Escape').catch(() => null);
|
||
return;
|
||
}
|
||
await option.click().catch(() => null);
|
||
await sleep(1200);
|
||
}
|
||
|
||
async function setDateRange(page, start, end) {
|
||
await runtimeCheckpoint(`设置订单日期 ${start} ~ ${end}`);
|
||
console.log(`[订单日期] 设置: ${start} ~ ${end}`);
|
||
|
||
await _fillDateRange(page, start, end);
|
||
|
||
// 验证
|
||
const startActual = await page.locator('input[placeholder="起始日期"]').inputValue().catch(() => '');
|
||
const endActual = await page.locator('input[placeholder="结束日期"]').inputValue().catch(() => '');
|
||
|
||
// 如果结果不对,用反向顺序重试(先填开始再填结束)
|
||
if (startActual !== start || endActual !== end) {
|
||
console.log(`[订单日期] 首次结果不对: "${startActual}" ~ "${endActual}",反向重试`);
|
||
await _fillDateRange(page, start, end, true);
|
||
const s2 = await page.locator('input[placeholder="起始日期"]').inputValue().catch(() => '');
|
||
const e2 = await page.locator('input[placeholder="结束日期"]').inputValue().catch(() => '');
|
||
console.log(`[订单日期] 重试结果: "${s2}" ~ "${e2}"`);
|
||
} else {
|
||
console.log(`[订单日期] 结果: "${startActual}" ~ "${endActual}"`);
|
||
}
|
||
}
|
||
|
||
async function _fillDateRange(page, start, end, startFirst = false) {
|
||
await runtimeCheckpoint('填写订单日期');
|
||
const trigger = page.locator('input[placeholder="结束日期"]');
|
||
await trigger.click();
|
||
await sleep(1000);
|
||
|
||
const panelStartInput = page.locator('.next-range-picker-panel-input-start-date input');
|
||
const panelEndInput = page.locator('.next-range-picker-panel-input-end-date input');
|
||
|
||
if (startFirst) {
|
||
// 先填开始日期
|
||
await panelStartInput.click();
|
||
await sleep(100);
|
||
await page.keyboard.press('Control+A');
|
||
await page.keyboard.type(start, { delay: 30 });
|
||
await sleep(300);
|
||
// 再填结束日期
|
||
await panelEndInput.click();
|
||
await sleep(100);
|
||
await page.keyboard.press('Control+A');
|
||
await page.keyboard.type(end, { delay: 30 });
|
||
await sleep(300);
|
||
} else {
|
||
// 先填结束日期(默认)
|
||
await panelEndInput.click();
|
||
await sleep(100);
|
||
await page.keyboard.press('Control+A');
|
||
await page.keyboard.type(end, { delay: 30 });
|
||
await sleep(300);
|
||
// 再填开始日期
|
||
await panelStartInput.click();
|
||
await sleep(100);
|
||
await page.keyboard.press('Control+A');
|
||
await page.keyboard.type(start, { delay: 30 });
|
||
await sleep(300);
|
||
}
|
||
|
||
await page.keyboard.press('Enter');
|
||
await sleep(500);
|
||
await page.mouse.click(0, 0);
|
||
await sleep(300);
|
||
await page.keyboard.press('Escape');
|
||
await sleep(300);
|
||
await page.locator('.next-overlay-wrapper.opened').waitFor({ state: 'hidden', timeout: 3000 }).catch(() => null);
|
||
await sleep(300);
|
||
}
|
||
|
||
async function setMonthValue(page, month) {
|
||
await runtimeCheckpoint(`设置账单月份 ${month}`);
|
||
// 先尝试按 inputValue 匹配 YYYY-MM 格式
|
||
const inputs = page.locator('input');
|
||
const total = await inputs.count();
|
||
const allValues = [];
|
||
|
||
for (let index = 0; index < total; index += 1) {
|
||
const input = inputs.nth(index);
|
||
const value = await input.inputValue().catch(() => '');
|
||
const placeholder = await input.getAttribute('placeholder').catch(() => '');
|
||
allValues.push({ index, value, placeholder });
|
||
|
||
if (/^\d{4}-\d{2}$/.test(value)) {
|
||
console.log(`[账单月份] 通过 value 匹配到 input[${index}], 设置: ${month}`);
|
||
await typeIntoDateInput(input, month, page);
|
||
return;
|
||
}
|
||
}
|
||
|
||
// 如果 value 为空,尝试按 placeholder 匹配月份选择器
|
||
for (const item of allValues) {
|
||
if (item.placeholder && /月/.test(item.placeholder)) {
|
||
console.log(`[账单月份] 通过 placeholder 匹配到 input[${item.index}], 设置: ${month}`);
|
||
await typeIntoDateInput(inputs.nth(item.index), month, page);
|
||
return;
|
||
}
|
||
}
|
||
|
||
// 兜底:找任何看起来像日期/月份选择器的 input(排除搜索框等)
|
||
for (const item of allValues) {
|
||
const input = inputs.nth(item.index);
|
||
const cls = await input.evaluate((el) => el.closest('[class*="date-picker"], [class*="month-picker"], [class*="range-picker"]')?.className || '').catch(() => '');
|
||
if (cls) {
|
||
console.log(`[账单月份] 通过父级 class 匹配到 input[${item.index}] (${cls}), 设置: ${month}`);
|
||
await typeIntoDateInput(input, month, page);
|
||
return;
|
||
}
|
||
}
|
||
|
||
console.error('[DEBUG] 账单页面所有 input:', JSON.stringify(allValues, null, 2));
|
||
throw new Error('未识别到账单佣金月份输入框,请打开页面确认结构是否变化。');
|
||
}
|
||
|
||
/**
|
||
* 用键盘输入日期值。
|
||
* 策略:focus → 全选 → 快速键入 → Tab 移开焦点(触发 blur 提交,但不会像 click 那样打开面板)。
|
||
* 即使面板弹出,快速键入 + Tab 也能在面板滚动前完成提交并关闭。
|
||
*/
|
||
async function typeIntoDateInput(locator, value, page) {
|
||
await runtimeCheckpoint(`填写日期输入 ${value}`);
|
||
// 移除 readonly
|
||
await locator.evaluate((node) => node.removeAttribute('readonly'));
|
||
|
||
// focus 并全选当前内容
|
||
await locator.focus();
|
||
await sleep(100);
|
||
await page.keyboard.press('Control+A');
|
||
await sleep(100);
|
||
|
||
// 快速逐字符输入新值
|
||
await page.keyboard.type(value, { delay: 30 });
|
||
await sleep(200);
|
||
|
||
// Tab 移开焦点 → 触发 onBlur 提交值 + 关闭面板
|
||
await page.keyboard.press('Tab');
|
||
await sleep(300);
|
||
|
||
// 如果面板还在,Escape 兜底关闭
|
||
await page.keyboard.press('Escape');
|
||
await sleep(300);
|
||
|
||
// 验证
|
||
const actual = await locator.inputValue().catch(() => '');
|
||
if (actual !== value) {
|
||
console.warn(`[WARN] typeIntoDateInput: 期望 "${value}",实际 "${actual}"`);
|
||
} else {
|
||
console.log(`[日期设置] 成功: "${value}"`);
|
||
}
|
||
}
|
||
|
||
async function clickQuery(page) {
|
||
await runtimeCheckpoint('点击查询');
|
||
const button = page.locator('button:has-text("查询")').first();
|
||
await button.scrollIntoViewIfNeeded().catch(() => null);
|
||
await button.evaluate((node) => {
|
||
node.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
|
||
}).catch(() => null);
|
||
|
||
try {
|
||
await button.click({ timeout: 8000 });
|
||
} catch (error) {
|
||
console.warn(`[查询按钮] click 失败,尝试 Enter 兜底: ${error.message}`);
|
||
await button.focus().catch(() => null);
|
||
await page.keyboard.press('Enter');
|
||
}
|
||
await sleep(1800);
|
||
}
|
||
|
||
function buildMonthlyDateWindows(startDate) {
|
||
const start = new Date(`${startDate}T00:00:00+08:00`);
|
||
const end = new Date();
|
||
const windows = [];
|
||
const cursor = new Date(start.getFullYear(), start.getMonth(), 1);
|
||
|
||
while (cursor <= end) {
|
||
const windowStart = new Date(cursor);
|
||
const windowEnd = new Date(cursor.getFullYear(), cursor.getMonth() + 1, 0);
|
||
const actualEnd = windowEnd > end ? end : windowEnd;
|
||
windows.push({
|
||
windowStart: formatDate(windowStart),
|
||
windowEnd: formatDate(actualEnd),
|
||
start: formatDate(windowStart),
|
||
end: formatDate(actualEnd),
|
||
});
|
||
cursor.setMonth(cursor.getMonth() + 1);
|
||
}
|
||
|
||
return windows;
|
||
}
|
||
|
||
function buildMonthList(startMonth) {
|
||
const [year, month] = startMonth.split('-').map(Number);
|
||
const cursor = new Date(year, month - 1, 1);
|
||
const end = new Date();
|
||
const months = [];
|
||
|
||
while (cursor <= end) {
|
||
months.push(`${cursor.getFullYear()}-${String(cursor.getMonth() + 1).padStart(2, '0')}`);
|
||
cursor.setMonth(cursor.getMonth() + 1);
|
||
}
|
||
|
||
return months;
|
||
}
|
||
|
||
function formatDate(date) {
|
||
return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')}`;
|
||
}
|
||
|
||
function dedupeByHash(records) {
|
||
const seen = new Set();
|
||
return records.filter((record) => {
|
||
const key = JSON.stringify(record);
|
||
if (seen.has(key)) return false;
|
||
seen.add(key);
|
||
return true;
|
||
});
|
||
}
|
||
|
||
function collectValidOrderIds(records) {
|
||
const ids = [];
|
||
const seen = new Set();
|
||
for (const record of records) {
|
||
// 支持两种字段名:normalized 后的 orderId 和原始的 订单号
|
||
const rawOrderId = String(record.orderId || record['订单号'] || '').trim();
|
||
if (!rawOrderId || rawOrderId.includes('没有数据')) {
|
||
continue;
|
||
}
|
||
if (!isValidOrderId(rawOrderId)) {
|
||
console.log(`[订单详情] 跳过无效订单号: ${rawOrderId}`);
|
||
continue;
|
||
}
|
||
if (seen.has(rawOrderId)) {
|
||
continue;
|
||
}
|
||
seen.add(rawOrderId);
|
||
ids.push(rawOrderId);
|
||
}
|
||
return ids;
|
||
}
|
||
|
||
function collectValidAccountIds(records) {
|
||
const ids = [];
|
||
const seen = new Set();
|
||
for (const record of records) {
|
||
const rawAccountId = String(record.accountId || '').trim();
|
||
if (!rawAccountId || rawAccountId.includes('没有数据')) {
|
||
continue;
|
||
}
|
||
if (!isValidAccountId(rawAccountId)) {
|
||
console.log(`[客户详情] 跳过无效 accountId: ${rawAccountId}`);
|
||
continue;
|
||
}
|
||
if (seen.has(rawAccountId)) {
|
||
continue;
|
||
}
|
||
seen.add(rawAccountId);
|
||
ids.push(rawAccountId);
|
||
}
|
||
return ids;
|
||
}
|
||
|
||
function collectCustomerDetailTargets(records) {
|
||
const targets = [];
|
||
const seen = new Set();
|
||
for (const record of records) {
|
||
const accountId = String(record.accountId || '').trim();
|
||
const loginName = String(record.loginName || '').trim();
|
||
const pageNum = Number.parseInt(String(record.listPageNum || 0), 10) || 0;
|
||
if (!accountId || !isValidAccountId(accountId) || pageNum <= 0) {
|
||
continue;
|
||
}
|
||
if (seen.has(accountId)) {
|
||
continue;
|
||
}
|
||
seen.add(accountId);
|
||
targets.push({ accountId, loginName, pageNum });
|
||
}
|
||
return targets.sort((a, b) => a.pageNum - b.pageNum);
|
||
}
|
||
|
||
function collectOrderDetailTargets(records, cachedOrderIds = []) {
|
||
const allowSet = new Set((cachedOrderIds || []).map((value) => String(value || '').trim()).filter(Boolean));
|
||
const targets = [];
|
||
const seen = new Set();
|
||
for (const record of records) {
|
||
const orderId = String(record.orderId || '').trim();
|
||
const pageNum = Number.parseInt(String(record.listPageNum || 0), 10) || 0;
|
||
const windowStart = String(record.windowStart || '').trim();
|
||
const windowEnd = String(record.windowEnd || '').trim();
|
||
if (!orderId || !isValidOrderId(orderId) || pageNum <= 0) {
|
||
continue;
|
||
}
|
||
if (allowSet.size > 0 && !allowSet.has(orderId)) {
|
||
continue;
|
||
}
|
||
if (seen.has(orderId)) {
|
||
continue;
|
||
}
|
||
seen.add(orderId);
|
||
targets.push({ orderId, pageNum, windowStart, windowEnd });
|
||
}
|
||
return targets.sort((a, b) => {
|
||
const windowCompare = `${a.windowStart}|${a.windowEnd}`.localeCompare(`${b.windowStart}|${b.windowEnd}`);
|
||
if (windowCompare !== 0) {
|
||
return windowCompare;
|
||
}
|
||
return a.pageNum - b.pageNum;
|
||
});
|
||
}
|
||
|
||
async function clickCustomerDetailFromList(page, target) {
|
||
const clicked = await page.evaluate(({ accountId, loginName }) => {
|
||
const normalize = (value) => String(value || '').replace(/\s+/g, '').trim();
|
||
const rows = Array.from(document.querySelectorAll('table tbody tr'));
|
||
const targetRow = rows.find((row) => {
|
||
const text = normalize(row.innerText || row.textContent || '');
|
||
return text.includes(accountId) || (loginName && text.includes(loginName));
|
||
});
|
||
if (!targetRow) {
|
||
return false;
|
||
}
|
||
|
||
const detailButton = Array.from(targetRow.querySelectorAll('button, a, span'))
|
||
.find((node) => /详情/.test(String(node.textContent || '').trim()));
|
||
if (!detailButton) {
|
||
return false;
|
||
}
|
||
|
||
detailButton.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
|
||
detailButton.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }));
|
||
return true;
|
||
}, target).catch(() => false);
|
||
|
||
if (clicked) {
|
||
await sleep(1200);
|
||
}
|
||
return clicked;
|
||
}
|
||
|
||
async function clickCustomerDetailFromListWithRetry(page, target) {
|
||
const attempts = [target.pageNum, Math.max(1, target.pageNum - 1), target.pageNum + 1];
|
||
for (const pageNum of attempts) {
|
||
if (pageNum > 0 && pageNum !== target.pageNum) {
|
||
const reached = await jumpToCustomerPage(page, pageNum);
|
||
if (!reached) {
|
||
continue;
|
||
}
|
||
await waitForStableCustomerList(page);
|
||
}
|
||
|
||
const clicked = await clickCustomerDetailFromList(page, target);
|
||
if (clicked) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
async function clickOrderDetailFromList(page, target) {
|
||
const clicked = await page.evaluate(({ orderId }) => {
|
||
const normalize = (value) => String(value || '').replace(/\s+/g, '').trim();
|
||
const rows = Array.from(document.querySelectorAll('table tbody tr'));
|
||
const targetRow = rows.find((row) => {
|
||
const text = normalize(row.innerText || row.textContent || '');
|
||
return text.includes(orderId);
|
||
});
|
||
if (!targetRow) {
|
||
return false;
|
||
}
|
||
|
||
const detailButton = Array.from(targetRow.querySelectorAll('button, a, span'))
|
||
.find((node) => /详情/.test(String(node.textContent || '').trim()));
|
||
if (!detailButton) {
|
||
return false;
|
||
}
|
||
|
||
detailButton.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
|
||
detailButton.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }));
|
||
return true;
|
||
}, target).catch(() => false);
|
||
|
||
if (clicked) {
|
||
await sleep(1200);
|
||
}
|
||
return clicked;
|
||
}
|
||
|
||
async function clickOrderDetailFromListWithRetry(page, target) {
|
||
const attempts = [target.pageNum, Math.max(1, target.pageNum - 1), target.pageNum + 1];
|
||
for (const pageNum of attempts) {
|
||
if (pageNum > 0 && pageNum !== target.pageNum) {
|
||
const reached = await jumpToOrderPage(page, pageNum);
|
||
if (!reached) {
|
||
continue;
|
||
}
|
||
await waitForStableOrderList(page);
|
||
}
|
||
|
||
const clicked = await clickOrderDetailFromList(page, target);
|
||
if (clicked) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
async function jumpToCustomerPage(page, pageNum) {
|
||
const reached = await jumpToPage(page, pageNum);
|
||
if (reached) {
|
||
console.log(`[客户详情] 已跳转到第 ${pageNum} 页`);
|
||
}
|
||
return reached;
|
||
}
|
||
|
||
async function jumpToOrderPage(page, pageNum) {
|
||
const reached = await jumpToPage(page, pageNum);
|
||
if (reached) {
|
||
console.log(`[订单详情] 已跳转到第 ${pageNum} 页`);
|
||
}
|
||
return reached;
|
||
}
|
||
|
||
async function waitForStableCustomerList(page) {
|
||
await waitForTableRows(page).catch(() => null);
|
||
await sleep(600);
|
||
await waitForTableRows(page).catch(() => null);
|
||
}
|
||
|
||
async function waitForStableOrderList(page) {
|
||
await waitForTableRows(page).catch(() => null);
|
||
await sleep(600);
|
||
await waitForTableRows(page).catch(() => null);
|
||
}
|
||
|
||
async function clickMessageDetailButton(page, rowText, rowIndex) {
|
||
const clicked = await page.evaluate(({ rowTextValue, rowIndexValue }) => {
|
||
const normalize = (value) => String(value || '').replace(/\s+/g, ' ').trim();
|
||
const target = normalize(rowTextValue);
|
||
|
||
const rows = Array.from(document.querySelectorAll('.next-table-row, table tbody tr, [role="row"]'))
|
||
.filter((row) => normalize(row.innerText || row.textContent || ''));
|
||
const row = rows[rowIndexValue];
|
||
if (!row) {
|
||
return { clicked: false, reason: 'row_not_found', rowCount: rows.length };
|
||
}
|
||
|
||
const rowTextActual = normalize(row.innerText || row.textContent || '');
|
||
const clickableNodes = Array.from(row.querySelectorAll('button, a, [role="button"], .next-btn-text'));
|
||
const preferred = clickableNodes.find((node) => {
|
||
const text = normalize(node.innerText || node.textContent || '');
|
||
return text && rowTextActual.includes(text);
|
||
}) || clickableNodes[0];
|
||
|
||
if (!preferred) {
|
||
return { clicked: false, reason: 'clickable_node_not_found', rowTextActual };
|
||
}
|
||
|
||
preferred.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
|
||
preferred.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }));
|
||
return {
|
||
clicked: true,
|
||
buttonText: normalize(preferred.innerText || preferred.textContent || ''),
|
||
rowTextActual,
|
||
matched: rowTextActual.includes(target),
|
||
};
|
||
}, { rowTextValue: rowText, rowIndexValue: rowIndex }).catch(() => ({ clicked: false, reason: 'evaluate_failed' }));
|
||
|
||
if (clicked.clicked) {
|
||
await sleep(1200);
|
||
}
|
||
return clicked;
|
||
}
|
||
|
||
async function waitForMessageDetailDrawer(page) {
|
||
await page.waitForFunction(() => {
|
||
const header = document.querySelector('.next-drawer-header');
|
||
const body = document.querySelector('.next-drawer-body');
|
||
return !!header && !!body && String(header.textContent || '').includes('消息详情');
|
||
}, null, { timeout: 15000 });
|
||
await sleep(600);
|
||
}
|
||
|
||
async function extractMessageDetail(page) {
|
||
return page.evaluate(() => {
|
||
const normalize = (value) => String(value || '').replace(/\u00a0/g, ' ').trim();
|
||
const header = normalize(document.querySelector('.next-drawer-header')?.innerText || '');
|
||
const body = normalize(document.querySelector('.next-drawer-body')?.innerText || '');
|
||
const lines = body.split(/\r?\n/).map((line) => normalize(line)).filter(Boolean);
|
||
const firstLine = lines[0] || '';
|
||
|
||
const extract = (label) => {
|
||
const line = lines.find((item) => item.startsWith(`${label}:`) || item.startsWith(`${label}:`));
|
||
if (!line) return '';
|
||
return normalize(line.replace(`${label}:`, '').replace(`${label}:`, ''));
|
||
};
|
||
|
||
const match = (pattern) => {
|
||
const matched = body.match(pattern);
|
||
return matched?.[1] ? normalize(matched[1]) : '';
|
||
};
|
||
|
||
const accountIdMatches = Array.from(body.matchAll(/账号ID[::]?(\d{6,})/g)).map((item) => normalize(item[1])).filter(Boolean);
|
||
|
||
const classification = (() => {
|
||
if (/退款/.test(header) || /退款/.test(body)) return 'refund';
|
||
if (/释放预警/.test(header) || /预计于【.*】释放/.test(body)) return 'release_warning';
|
||
if (/释放通知/.test(header) || /已释放/.test(body)) return 'release_notice';
|
||
if (/未支付提醒/.test(header) || /未支付/.test(body)) return 'unpaid_reminder';
|
||
if (/取消通知/.test(header) || /取消了一笔未支付订单/.test(body)) return 'order_cancel';
|
||
if (/余额-预警通知/.test(header) || /账户现金余额/.test(body)) return 'balance_warning';
|
||
if (/关联成功/.test(header) || /关联关系已完成建立/.test(body)) return 'association_success';
|
||
if (/注册成功/.test(header) || /受邀注册UID/.test(body)) return 'registration_success';
|
||
if (/变更已超期/.test(header) || /变更申请已超期/.test(body)) return 'change_overdue';
|
||
return 'general';
|
||
})();
|
||
|
||
const detailContent = lines.filter((line) => !/^(接收时间|客户账号|订单号|退款订单号|订单金额|退款金额|客户下单时间|退款时间|受邀注册UID)[::]/.test(line));
|
||
|
||
return {
|
||
detailTitle: firstLine || header,
|
||
detailContent: body,
|
||
receivedAt: extract('接收时间'),
|
||
customerName: extract('客户账号'),
|
||
customerNo: extract('客户账号') || match(/贵司的代付(?:关联)?客户【[^/]+\/(\d{6,})】/) || match(/受邀注册UID[::]?(\d{6,})/) || accountIdMatches[0] || '',
|
||
orderNo: extract('订单号') || extract('退款订单号'),
|
||
orderAmount: extract('订单金额'),
|
||
customerOrderTime: extract('客户下单时间'),
|
||
refundOrderNo: extract('退款订单号'),
|
||
refundAmount: extract('退款金额'),
|
||
refundTime: extract('退款时间'),
|
||
invitedRegisterUid: extract('受邀注册UID') || match(/受邀注册UID[::]?(\d{6,})/),
|
||
accountIds: accountIdMatches.join(','),
|
||
messageClassification: classification,
|
||
status: '未读',
|
||
title: firstLine || header,
|
||
content: detailContent.join('\n'),
|
||
};
|
||
});
|
||
}
|
||
|
||
async function closeMessageDetailDrawer(page) {
|
||
const closeButton = page.locator('.next-drawer-close, .next-dialog-close, .next-icon-close').first();
|
||
if ((await closeButton.count()) > 0) {
|
||
await closeButton.click().catch(() => null);
|
||
} else {
|
||
await page.keyboard.press('Escape').catch(() => null);
|
||
}
|
||
await page.waitForFunction(() => !document.querySelector('.next-drawer-header'), null, { timeout: 10000 }).catch(() => null);
|
||
await sleep(400);
|
||
}
|
||
|
||
async function fetchMessageApiRows(page, pageNum, pageSize) {
|
||
return page.evaluate(async ({ currentPage, currentPageSize }) => {
|
||
const response = await fetch(`/api/taskapi/msgbox/queryUserMsg.json?lv2CategoryId=0&pageNo=${currentPage}&pageSize=${currentPageSize}`, {
|
||
credentials: 'include',
|
||
});
|
||
const payload = await response.json();
|
||
return Array.isArray(payload?.data?.list) ? payload.data.list : [];
|
||
}, { currentPage: pageNum, currentPageSize: pageSize }).catch(() => []);
|
||
}
|
||
|
||
function stripHtmlTags(value) {
|
||
return String(value || '')
|
||
.replace(/<br\s*\/?>(\r?\n)?/gi, '\n')
|
||
.replace(/<\/div>/gi, '\n')
|
||
.replace(/<\/p>/gi, '\n')
|
||
.replace(/<[^>]+>/g, '')
|
||
.replace(/ /gi, ' ')
|
||
.replace(/\r/g, '')
|
||
.split('\n')
|
||
.map((line) => line.trim())
|
||
.filter(Boolean)
|
||
.join('\n');
|
||
}
|
||
|
||
function mapApiMessageRecord(record) {
|
||
if (!record || typeof record !== 'object') {
|
||
return null;
|
||
}
|
||
return {
|
||
msgId: String(record.id || '').trim(),
|
||
title: String(record.title || '').trim(),
|
||
detailTitle: String(record.title || '').trim(),
|
||
detailContent: stripHtmlTags(record.htmlContent || record.content || ''),
|
||
content: stripHtmlTags(record.htmlContent || record.content || ''),
|
||
fromApp: String(record.fromApp || '').trim(),
|
||
bizCode: String(record.bizCode || '').trim(),
|
||
msgChannel: String(record.msgChannel || '').trim(),
|
||
categoryName: String(record.categoryName || '').trim(),
|
||
categoryId: String(record.lv3CategoryId || '').trim(),
|
||
lv1CategoryId: String(record.lv1CategoryId || '').trim(),
|
||
lv2CategoryId: String(record.lv2CategoryId || '').trim(),
|
||
lv3CategoryId: String(record.lv3CategoryId || '').trim(),
|
||
gmtCreated: record.createDate ? formatDateTime(new Date(record.createDate)) : '',
|
||
gmtModified: record.updateDate ? formatDateTime(new Date(record.updateDate)) : '',
|
||
status: Number(record.isRead) === 1 ? '已读' : '未读',
|
||
};
|
||
}
|
||
|
||
async function enrichMessageRowsWithDetails(page, pageRows, pageNum) {
|
||
const enrichedRows = [];
|
||
let detailSuccess = 0;
|
||
let detailFailed = 0;
|
||
const apiRows = await fetchMessageApiRows(page, pageNum, datasets.messages.pageSize);
|
||
for (let index = 0; index < pageRows.length; index += 1) {
|
||
const row = pageRows[index];
|
||
const rowText = String(row['消息标题'] || row['标题'] || row.title || row.column_1 || '').trim();
|
||
if (!rowText) {
|
||
enrichedRows.push(row);
|
||
continue;
|
||
}
|
||
|
||
const apiDetail = mapApiMessageRecord(apiRows[index]);
|
||
if (apiDetail?.msgId) {
|
||
detailSuccess += 1;
|
||
enrichedRows.push({ ...row, ...apiDetail });
|
||
continue;
|
||
}
|
||
|
||
const clicked = await clickMessageDetailButton(page, rowText, index);
|
||
if (!clicked.clicked) {
|
||
detailFailed += 1;
|
||
console.warn(`[消息详情] 打开失败: pageRow=${index + 1}, title="${rowText}", reason=${clicked.reason || 'unknown'}`);
|
||
enrichedRows.push(row);
|
||
continue;
|
||
}
|
||
|
||
try {
|
||
await waitForMessageDetailDrawer(page);
|
||
const detail = await extractMessageDetail(page);
|
||
detailSuccess += 1;
|
||
enrichedRows.push({ ...row, ...detail });
|
||
} catch (error) {
|
||
detailFailed += 1;
|
||
console.warn(`[消息详情] 提取失败: pageRow=${index + 1}, title="${rowText}", error=${error.message}`);
|
||
enrichedRows.push(row);
|
||
} finally {
|
||
await closeMessageDetailDrawer(page);
|
||
await waitForTableRows(page).catch(() => null);
|
||
}
|
||
}
|
||
console.log(`[消息详情] 本页详情提取: success=${detailSuccess}, failed=${detailFailed}, total=${pageRows.length}`);
|
||
return enrichedRows;
|
||
}
|
||
|
||
async function restoreOrderWindow(page, windowStart, windowEnd) {
|
||
await waitUntilReady(page, datasets.orders.heading).catch(() => null);
|
||
await setDateRange(page, windowStart, windowEnd);
|
||
await clickQuery(page);
|
||
await trySetPageSize(page, datasets.orders.pageSize).catch(() => null);
|
||
await waitForStableOrderList(page).catch(() => null);
|
||
}
|
||
|
||
async function recoverCustomerListState(page, pageNum) {
|
||
await waitUntilReady(page, datasets.customers.heading).catch(() => null);
|
||
await trySetPageSize(page, datasets.customers.pageSize).catch(() => null);
|
||
if (pageNum > 0) {
|
||
await jumpToCustomerPage(page, pageNum).catch(() => null);
|
||
await waitForStableCustomerList(page).catch(() => null);
|
||
}
|
||
}
|
||
|
||
async function recoverOrderListState(page, pageNum, windowStart = '', windowEnd = '') {
|
||
await waitUntilReady(page, datasets.orders.heading).catch(() => null);
|
||
if (windowStart && windowEnd) {
|
||
await restoreOrderWindow(page, windowStart, windowEnd).catch(() => null);
|
||
} else {
|
||
await trySetPageSize(page, datasets.orders.pageSize).catch(() => null);
|
||
}
|
||
if (pageNum > 0) {
|
||
await jumpToOrderPage(page, pageNum).catch(() => null);
|
||
await waitForStableOrderList(page).catch(() => null);
|
||
}
|
||
}
|
||
|
||
function isValidOrderId(orderId) {
|
||
const value = String(orderId || '').trim();
|
||
if (!value) return false;
|
||
if (value.includes('<27>')) return false;
|
||
return /^\d+$/.test(value);
|
||
}
|
||
|
||
function isValidAccountId(accountId) {
|
||
const value = String(accountId || '').trim();
|
||
if (!value) return false;
|
||
if (value.includes('<27>')) return false;
|
||
return /^\d+$/.test(value);
|
||
}
|
||
|
||
async function extractOrderDetail(page) {
|
||
return page.evaluate(() => {
|
||
const text = document.body?.innerText || '';
|
||
|
||
const extract = (label) => {
|
||
const lineBreakPattern = new RegExp(`${label}\\s*(?:\\r?\\n)+\\s*([^\\r\\n]+)`);
|
||
const lineBreakMatch = text.match(lineBreakPattern);
|
||
if (lineBreakMatch) return lineBreakMatch[1].trim();
|
||
|
||
const inlinePattern = new RegExp(`${label}\\s*[::]?\\s*([^\\r\\n]+)`);
|
||
const inlineMatch = text.match(inlinePattern);
|
||
return inlineMatch ? inlineMatch[1].trim() : '';
|
||
};
|
||
|
||
return {
|
||
orderId: extract('订单号'),
|
||
orderType: extract('订单类型'),
|
||
status: extract('状态'),
|
||
tradeType: extract('交易类型'),
|
||
customerCategory: extract('客户分类'),
|
||
dealerName: extract('二级经销商名称'),
|
||
dealerUid: extract('二级经销商UID'),
|
||
customerType: extract('客户类型'),
|
||
opportunityId: extract('商机ID'),
|
||
paymentTime: extract('支付时间'),
|
||
orderTime: extract('下单时间'),
|
||
productName: extract('产品名称'),
|
||
productCode: extract('产品code'),
|
||
originalPriceCny: extract('订单原价\\(CNY\\)'),
|
||
paidAmountCny: extract('实付金额\\(CNY\\)'),
|
||
discount: extract('订单折扣'),
|
||
payableAmountCny: extract('应付金额(实付\\+代金券)\\(CNY\\)'),
|
||
couponAmountCny: extract('代金券金额\\(CNY\\)'),
|
||
};
|
||
});
|
||
}
|
||
|
||
async function extractCustomerDetail(page) {
|
||
return page.evaluate(() => {
|
||
const normalize = (value) =>
|
||
String(value || '')
|
||
.replace(/\u00a0/g, ' ')
|
||
.trim();
|
||
|
||
const text = normalize(document.body?.innerText || '').replace(/\r/g, '');
|
||
|
||
const extract = (label, sourceText = text) => {
|
||
const lineBreakPattern = new RegExp(`${label}\\s*(?:\\n)+\\s*([^\\n]+)`);
|
||
const lineBreakMatch = sourceText.match(lineBreakPattern);
|
||
if (lineBreakMatch) return normalize(lineBreakMatch[1]);
|
||
|
||
const inlinePattern = new RegExp(`${label}\\s*[::]?\\s*([^\\n]+)`);
|
||
const inlineMatch = sourceText.match(inlinePattern);
|
||
return inlineMatch ? normalize(inlineMatch[1]) : '';
|
||
};
|
||
|
||
const normalizeAmount = (value) => normalize(value).replace(/[¥,]/g, '').trim();
|
||
|
||
const buildSection = (startLabel, endLabel = '') => {
|
||
const start = text.indexOf(startLabel);
|
||
if (start < 0) return '';
|
||
const end = endLabel ? text.indexOf(endLabel, start + startLabel.length) : -1;
|
||
if (end > start) return text.slice(start, end);
|
||
return text.slice(start);
|
||
};
|
||
|
||
const lastMonthSection = buildSection('上月应付总金额(CNY)', '本月应付总金额(CNY)');
|
||
const currentMonthSection = buildSection('本月应付总金额(CNY)');
|
||
|
||
const extractAmountFromSection = (sectionText, label) => normalizeAmount(extract(label, sectionText));
|
||
|
||
let department = '';
|
||
const table = Array.from(document.querySelectorAll('table')).find((node) =>
|
||
(node.innerText || '').includes('所属部门'),
|
||
);
|
||
if (table) {
|
||
const rows = table.querySelectorAll('tbody tr');
|
||
for (const row of rows) {
|
||
const cells = row.querySelectorAll('td');
|
||
if (cells.length >= 2) {
|
||
const value = normalize(cells[1]?.innerText || cells[1]?.textContent || '');
|
||
if (value) {
|
||
department = value;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if (!department) {
|
||
department = extract('所属部门');
|
||
}
|
||
|
||
return {
|
||
customerAccount: extract('客户账号'),
|
||
customerName: extract('客户名称'),
|
||
customerType: extract('客户类型'),
|
||
tradeMode: extract('交易模式'),
|
||
customerSource: extract('客户来源'),
|
||
realNameStatus: extract('实名状态'),
|
||
email: extract('邮箱') || extract('Email') || extract('电子邮箱'),
|
||
relationDate: extract('关联日期'),
|
||
phone: extract('手机号') || extract('手机') || extract('联系电话') || extract('联系手机'),
|
||
remark: extract('备注') || extract('客户备注'),
|
||
paymentNoticeStatus: extract('代为支付告知状态'),
|
||
department,
|
||
lastMonthPayableTotalCny: extractAmountFromSection(lastMonthSection, '上月应付总金额(CNY)'),
|
||
lastMonthPrepayCny: extractAmountFromSection(lastMonthSection, '预付费金额'),
|
||
lastMonthPostpayCny: extractAmountFromSection(lastMonthSection, '后付费金额'),
|
||
currentMonthPayableTotalCny: extractAmountFromSection(currentMonthSection, '本月应付总金额(CNY)'),
|
||
currentMonthPrepayCny: extractAmountFromSection(currentMonthSection, '预付费金额'),
|
||
currentMonthPostpayCny: extractAmountFromSection(currentMonthSection, '后付费金额'),
|
||
};
|
||
});
|
||
}
|