Files
aliyunApsSkill/aliyun-sync/aliyun-aps-sync/src/sync.js
2026-04-27 09:16:07 +08:00

1309 lines
45 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { chromium } from 'playwright';
import cron from 'node-cron';
import fs from 'node:fs';
import path from 'node:path';
import readline from 'node:readline';
import { execSync } from 'node:child_process';
import { config, datasets } from './config.js';
import { sendLoginAlert } from './notify.js';
import {
diffRecords,
loadCurrentState,
nowStamp,
saveCheckpoint,
saveDatasetRun,
saveDelta,
saveRunSummary,
withHash,
} from './storage.js';
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
let _context = null;
let _runtimeController = null;
const AUTH_PAGE_KEYWORDS = [
'RAM 用户登录',
'主账号登录',
'钉钉扫码登录',
'用户名',
'下一步',
'登录并使用 RAM',
];
async function closeContextIfNeeded() {
if (!_context) return;
await _context.close();
_context = null;
}
function getRuntimeController() {
if (_runtimeController) return _runtimeController;
let paused = false;
let terminated = false;
let keypressBound = false;
const onKeypress = (_str, key = {}) => {
if (key.name === 'f7') {
if (!paused) {
paused = true;
console.log('[控制] 已暂停F7。按 F8 继续,按 F9 终止。');
}
return;
}
if (key.name === 'f8') {
if (paused) {
paused = false;
console.log('[控制] 已继续F8。');
}
return;
}
if (key.name === 'f9') {
terminated = true;
paused = false;
console.log('[控制] 已请求终止F9将在安全检查点停止。');
}
};
const bind = () => {
if (keypressBound || !process.stdin.isTTY) return;
readline.emitKeypressEvents(process.stdin);
if (typeof process.stdin.setRawMode === 'function') {
process.stdin.setRawMode(true);
}
process.stdin.resume();
process.stdin.on('keypress', onKeypress);
keypressBound = true;
console.log('[控制] 热键已启用F7 暂停 / F8 继续 / F9 终止');
};
const unbind = () => {
if (!keypressBound) return;
process.stdin.off('keypress', onKeypress);
if (process.stdin.isTTY && typeof process.stdin.setRawMode === 'function') {
process.stdin.setRawMode(false);
}
keypressBound = false;
};
const waitIfPaused = async (label = '任务') => {
if (terminated) {
throw new Error(`[控制] 已终止:${label}`);
}
while (paused) {
await sleep(300);
if (terminated) {
throw new Error(`[控制] 已终止:${label}`);
}
}
};
const throwIfTerminated = (label = '任务') => {
if (terminated) {
throw new Error(`[控制] 已终止:${label}`);
}
};
_runtimeController = {
bind,
unbind,
waitIfPaused,
throwIfTerminated,
};
return _runtimeController;
}
async function runtimeCheckpoint(label) {
const controller = getRuntimeController();
controller.throwIfTerminated(label);
await controller.waitIfPaused(label);
}
async function getContext() {
if (_context) return _context;
_context = await chromium.launchPersistentContext(config.userDataDir, {
channel: 'chrome',
headless: config.headless,
acceptDownloads: true,
downloadsPath: config.downloadDir,
});
await restoreStorageState(_context);
return _context;
}
async function restoreStorageState(context) {
if (!fs.existsSync(config.storageStateFile)) {
return;
}
try {
const state = JSON.parse(fs.readFileSync(config.storageStateFile, 'utf-8'));
if (Array.isArray(state.cookies) && state.cookies.length > 0) {
await context.addCookies(state.cookies);
console.log(`[storageState] 已恢复 ${state.cookies.length} 个 cookie`);
}
} catch (error) {
console.warn(`[storageState] 恢复失败,继续使用 .browser profile: ${error.message}`);
}
}
async function saveStorageState(context) {
await context.storageState({ path: config.storageStateFile });
console.log(`[storageState] 已保存登录态快照: ${config.storageStateFile}`);
}
function loadLatestBillsCheckpoint() {
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'bills');
if (!fs.existsSync(checkpointDir)) {
return null;
}
const candidates = fs.readdirSync(checkpointDir)
.filter((fileName) => fileName.endsWith('.json'))
.map((fileName) => {
const filePath = path.join(checkpointDir, fileName);
const stat = fs.statSync(filePath);
return { fileName, filePath, mtimeMs: stat.mtimeMs };
})
.sort((a, b) => b.mtimeMs - a.mtimeMs);
if (candidates.length === 0) {
return null;
}
try {
const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8'));
if (!latest || typeof latest !== 'object') {
return null;
}
return latest;
} catch (error) {
console.warn(`[账单检查点] 读取失败,忽略断点续爬: ${error.message}`);
return null;
}
}
async function getPageBodyPreview(page) {
return page
.evaluate(() => document.body?.innerText?.substring(0, 500) || '(空)')
.catch(() => '(无法获取)');
}
function isAuthUrl(url) {
return /account\.aliyun\.com|signin\.aliyun\.com/.test(url)
|| url.includes('login.htm')
|| url.includes('/#/signin');
}
function hasAuthKeywords(text) {
return AUTH_PAGE_KEYWORDS.some((keyword) => text.includes(keyword));
}
async function detectAuthRedirect(page) {
const currentUrl = page.url();
const bodyText = await getPageBodyPreview(page);
return {
currentUrl,
bodyText,
isAuthPage: isAuthUrl(currentUrl) || hasAuthKeywords(bodyText),
};
}
async function ensureDatasetAccessible(page, dataset, timeout = 120000, options = {}) {
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, dataset.heading, timeout, options);
}
export async function login() {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
const cleanupAndExit = async (signal) => {
console.log(`[login] 收到 ${signal},正在保存登录态并关闭浏览器...`);
await closeContextIfNeeded();
process.exit(130);
};
const onSigint = () => {
void cleanupAndExit('SIGINT');
};
const onSigterm = () => {
void cleanupAndExit('SIGTERM');
};
process.once('SIGINT', onSigint);
process.once('SIGTERM', onSigterm);
try {
const page = context.pages()[0] || (await context.newPage());
await page.goto(datasets.customers.url, { waitUntil: 'domcontentloaded' });
console.log('请在打开的浏览器里完成阿里云伙伴中心登录。检测到进入“我的客户”和“账单查询”页面后,脚本会自动保存登录态并关闭浏览器。');
await waitUntilReady(page, datasets.customers.heading, 10 * 60 * 1000, { allowInteractiveAuth: true });
console.log('[login] 我的客户页验证通过,继续验证账单页登录态...');
await ensureDatasetAccessible(page, datasets.bills, 60 * 1000, { allowInteractiveAuth: true });
await sleep(1000);
await saveStorageState(context);
console.log('登录态已写入 .browser 目录,且已验证“我的客户”和“账单查询”页面可访问,后续可直接执行 npm run sync 或 npm run bills。');
} finally {
process.off('SIGINT', onSigint);
process.off('SIGTERM', onSigterm);
await closeContextIfNeeded();
runtimeController.unbind();
}
}
export async function syncAll() {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
try {
const summary = { startedAt: new Date().toISOString(), datasets: {} };
const page = context.pages()[0] || (await context.newPage());
summary.datasets.customers = await syncCustomers(page);
summary.datasets.customerDetails = await syncCustomerDetails(page);
summary.datasets.orders = await syncOrders(page);
// syncOrders 完成后,从最新的 orders.json 读取 orderId 列表
const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey);
const orderIdsForDetail = collectValidOrderIds(latestOrders.records || []);
summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail);
summary.datasets.bills = await syncBills(page);
summary.finishedAt = new Date().toISOString();
const stamp = nowStamp();
saveRunSummary(stamp, summary);
return summary;
} finally {
if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
}
runtimeController.unbind();
}
}
export async function syncBillsOnly(options = {}) {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
try {
const summary = { startedAt: new Date().toISOString(), datasets: {} };
const page = context.pages()[0] || (await context.newPage());
summary.datasets.bills = await syncBills(page, options);
summary.finishedAt = new Date().toISOString();
const stamp = nowStamp();
saveRunSummary(stamp, summary);
return summary;
} finally {
if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
}
runtimeController.unbind();
}
}
export async function scheduleSync() {
console.log(`定时任务已启动: ${config.cron} (${config.timezone})`);
cron.schedule(
config.cron,
async () => {
try {
console.log(`[${new Date().toISOString()}] 开始执行同步`);
const summary = await syncAll();
console.log(`[${new Date().toISOString()}] 同步完成`, JSON.stringify(summary, null, 2));
try {
const scriptPath = path.resolve(config.rootDir, config.dbSyncScript);
const incrementalFlag = config.fullSync ? '' : ' --incremental';
console.log(`[入库] 执行 ${scriptPath}${incrementalFlag ? ' (增量模式)' : ''}`);
const output = execSync(`python "${scriptPath}"${incrementalFlag}`, {
cwd: path.dirname(scriptPath),
encoding: 'utf-8',
timeout: 120000,
});
console.log(output);
} catch (e) {
console.error('[入库] 失败:', e.message);
}
} catch (error) {
console.error(`[${new Date().toISOString()}] 同步失败`, error);
}
},
{ timezone: config.timezone },
);
}
async function syncCustomers(page) {
await runtimeCheckpoint('同步客户');
const dataset = datasets.customers;
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, dataset.heading);
await trySetPageSize(page, dataset.pageSize);
const records = await scrapePagedTable(page, dataset, {});
return persistDataset(dataset, records, {});
}
async function syncCustomerDetails(page) {
await runtimeCheckpoint('同步客户详情');
const dataset = datasets.customerDetails;
const customersState = loadCurrentState('customers', datasets.customers.uniqueKey);
const allAccountIds = collectValidAccountIds(customersState.records || []);
if (allAccountIds.length === 0) {
console.log('[客户详情] 本地无有效客户 accountId跳过');
return persistDataset(dataset, [], {});
}
console.log(`[客户详情] 共 ${allAccountIds.length} 个客户需要获取详情`);
const allDetails = [];
const detailBaseUrl =
'https://aps.aliyun.com/?spm=5176.12818093.top-nav.ditem-fx.785716d0LKDpKT#/detail/my_customer/~/customer/';
for (let index = 0; index < allAccountIds.length; index += 1) {
await runtimeCheckpoint(`客户详情 ${index + 1}/${allAccountIds.length}`);
const accountId = allAccountIds[index];
console.log(`[客户详情] ${index + 1}/${allAccountIds.length} accountId=${accountId}`);
// 先跳 about:blank 再跳详情URL强制 SPA 完整重新加载)
await page.goto('about:blank');
await sleep(300);
await page.goto(`${detailBaseUrl}${accountId}`, { waitUntil: 'domcontentloaded' });
try {
await page.waitForFunction(
(text) => document.body && document.body.innerText.includes(text),
'详情',
{ timeout: 15000 },
);
await sleep(1000);
} catch {
console.warn(`[客户详情] ${accountId} 详情页加载超时,跳过`);
continue;
}
const detail = await extractCustomerDetail(page);
allDetails.push({ ...detail, __context: { accountId } });
}
return persistDataset(dataset, dedupeByHash(allDetails), {});
}
async function syncOrders(page) {
await runtimeCheckpoint('同步订单');
const dataset = datasets.orders;
let windows;
if (config.fullSync) {
windows = buildMonthlyDateWindows(config.orderStartDate);
} else {
windows = buildIncrementalOrderWindows();
}
const allRecords = [];
for (const window of windows) {
await runtimeCheckpoint(`订单窗口 ${window.start} ~ ${window.end}`);
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, dataset.heading);
await setDateRange(page, window.start, window.end);
await clickQuery(page);
await trySetPageSize(page, dataset.pageSize);
const records = await scrapePagedTable(page, dataset, window);
allRecords.push(...records);
}
return persistDataset(dataset, dedupeByHash(allRecords), {});
}
function buildIncrementalOrderWindows() {
const configuredStartDate = normalizeConfiguredDate(config.incrementalOrderStartDate);
if (configuredStartDate) {
const windows = buildMonthlyDateWindows(configuredStartDate);
console.log(`[增量模式] 订单从指定日期开始查询: ${configuredStartDate}`);
return windows;
}
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
const dateStr = formatDate(yesterday);
console.log(`[增量模式] 订单仅查询: ${dateStr}`);
return [{ windowStart: dateStr, windowEnd: dateStr, start: dateStr, end: dateStr }];
}
function normalizeConfiguredDate(value) {
const normalized = String(value || '').trim();
if (!normalized) {
return '';
}
if (!/^\d{4}-\d{2}-\d{2}$/.test(normalized)) {
throw new Error(`ALIYUN_APS_INCREMENTAL_ORDER_START_DATE 格式无效: ${normalized},期望 YYYY-MM-DD`);
}
const parsed = new Date(`${normalized}T00:00:00+08:00`);
if (Number.isNaN(parsed.getTime())) {
throw new Error(`ALIYUN_APS_INCREMENTAL_ORDER_START_DATE 不是有效日期: ${normalized}`);
}
return normalized;
}
async function syncBills(page, options = {}) {
await runtimeCheckpoint('同步账单');
const dataset = datasets.bills;
const { resume = false } = options;
let months;
let latestConsumptionDate = null;
if (config.fullSync) {
months = buildMonthList(config.billStartMonth);
} else {
latestConsumptionDate = getLatestBillConsumptionDate();
const incrementalMonth = latestConsumptionDate?.slice(0, 7)
|| `${new Date().getFullYear()}-${String(new Date().getMonth() + 1).padStart(2, '0')}`;
months = [incrementalMonth];
console.log(`[增量模式] 账单仅查询: ${incrementalMonth}${latestConsumptionDate ? `, 数据库最新消费时间: ${latestConsumptionDate}` : ''}`);
}
const resumeCheckpoint = resume ? loadLatestBillsCheckpoint() : null;
if (resumeCheckpoint?.month) {
const resumeIndex = months.indexOf(resumeCheckpoint.month);
if (resumeIndex >= 0) {
months = months.slice(resumeIndex);
console.log(`[账单续爬] 从 checkpoint 恢复: month=${resumeCheckpoint.month}, page=${resumeCheckpoint.pageNum || 1}, records=${(resumeCheckpoint.records || []).length}`);
}
}
const allNormalizedRecords = [];
for (const month of months) {
await runtimeCheckpoint(`账单月份 ${month}`);
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, dataset.heading);
await setMonthValue(page, month);
await clickQuery(page);
await trySetPageSize(page, dataset.pageSize);
let monthNormalizedRecords = [];
let resumeFromPage = 0;
let shouldContinueScrape = true;
if (resumeCheckpoint?.month === month) {
monthNormalizedRecords = Array.isArray(resumeCheckpoint.records) ? resumeCheckpoint.records : [];
resumeFromPage = Number.parseInt(String(resumeCheckpoint.pageNum || 0), 10) || 0;
if (resumeFromPage > 0) {
const moved = await moveBillsToResumeStart(page, resumeFromPage);
if (!moved) {
console.log(`[账单续爬] checkpoint 已在最后一页,无需继续抓取 month=${month}`);
shouldContinueScrape = false;
}
}
}
let rawRecords = [];
if (shouldContinueScrape) {
rawRecords = await scrapePagedTable(page, dataset, { month }, {
onPage: async ({ pageNum, pageRows }) => {
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, { month });
monthNormalizedRecords.push(...normalizedPageRows);
let checkpointRecords = monthNormalizedRecords;
if (latestConsumptionDate) {
checkpointRecords = monthNormalizedRecords.filter((record) => isAfterLatestConsumptionDate(record, latestConsumptionDate));
}
await saveBillsCheckpoint(dataset, month, pageNum, checkpointRecords);
},
});
}
if (resumeFromPage === 0) {
monthNormalizedRecords = normalizeDatasetRecords(dataset, rawRecords, { month });
}
if (latestConsumptionDate) {
const before = monthNormalizedRecords.length;
monthNormalizedRecords = monthNormalizedRecords.filter((record) => isAfterLatestConsumptionDate(record, latestConsumptionDate));
console.log(`[增量模式] 账单按消费时间过滤: ${before} -> ${monthNormalizedRecords.length}`);
}
allNormalizedRecords.push(...monthNormalizedRecords);
}
return persistNormalizedDataset(dataset, dedupeByHash(allNormalizedRecords));
}
async function saveBillsCheckpoint(dataset, month, pageNum, normalizedRecords) {
const normalized = dedupeByHash(normalizedRecords);
const checkpointName = `${month}-latest`;
saveCheckpoint(dataset.name, checkpointName, {
month,
pageNum,
savedAt: new Date().toISOString(),
stats: {
total: normalized.length,
},
records: normalized,
});
console.log(`[账单检查点] 已落盘: month=${month}, page=${pageNum}, records=${normalized.length}`);
}
function normalizeDatasetRecords(dataset, records, context) {
return records.map((record) => withHash(dataset.normalize(record, record.__context || context)));
}
async function moveBillsToResumeStart(page, resumeFromPage) {
if (resumeFromPage <= 0) {
return true;
}
const reached = await jumpToPage(page, resumeFromPage);
if (!reached) {
throw new Error(`账单续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`);
}
const moved = await gotoNextPage(page);
return moved;
}
function getLatestBillConsumptionDate() {
const scriptPath = path.resolve(config.rootDir, config.dbSyncScript);
try {
const output = execSync(`python "${scriptPath}" --latest-bill-consumption-time`, {
cwd: path.dirname(scriptPath),
encoding: 'utf-8',
timeout: 120000,
}).trim();
const latest = output.split(/\r?\n/).map((line) => line.trim()).filter(Boolean).at(-1) || '';
return /^\d{4}-\d{2}-\d{2}/.test(latest) ? latest.slice(0, 10) : null;
} catch (error) {
console.error('[增量模式] 查询数据库最新账单消费时间失败:', error.message);
return null;
}
}
function isAfterLatestConsumptionDate(record, latestConsumptionDate) {
const consumeDate = String(record['消费时间'] || record.consumeDate || '').trim().slice(0, 10);
if (!/^\d{4}-\d{2}-\d{2}$/.test(consumeDate)) {
return false;
}
return consumeDate > latestConsumptionDate;
}
async function syncOrderDetails(page, cachedOrderIds) {
await runtimeCheckpoint('同步订单详情');
const dataset = datasets.orderDetails;
// 使用传入的 orderId 列表(在 syncOrders 覆盖 orders.json 之前缓存的)
const allOrderIds = cachedOrderIds || [];
if (allOrderIds.length === 0) {
console.log('[订单详情] 本地无订单数据,跳过');
return persistDataset(dataset, [], {});
}
console.log(`[订单详情] 共 ${allOrderIds.length} 个订单需要获取详情`);
const allDetails = [];
const detailBaseUrl = 'https://aps.aliyun.com/?spm=5176.12818093.top-nav.ditem-fx.785716d0LKDpKT#/detail/order/~/costCenter/order/detail/';
for (let index = 0; index < allOrderIds.length; index += 1) {
await runtimeCheckpoint(`订单详情 ${index + 1}/${allOrderIds.length}`);
const orderId = allOrderIds[index];
console.log(`[订单详情] ${index + 1}/${allOrderIds.length} orderId=${orderId}`);
// 先跳 about:blank 再跳详情URL强制 SPA 完整重新加载)
await page.goto('about:blank');
await sleep(300);
await page.goto(`${detailBaseUrl}${orderId}?projectId=`, { waitUntil: 'domcontentloaded' });
try {
await page.waitForFunction(
(text) => document.body && document.body.innerText.includes(text),
'订单详情',
{ timeout: 15000 },
);
await sleep(1000);
} catch {
console.warn(`[订单详情] ${orderId} 详情页加载超时,跳过`);
continue;
}
const detail = await extractOrderDetail(page);
if (!isValidOrderId(detail.orderId)) {
detail.orderId = orderId;
}
allDetails.push({ ...detail, __context: {} });
}
return persistDataset(dataset, dedupeByHash(allDetails), {});
}
function persistDataset(dataset, records, context) {
const normalized = normalizeDatasetRecords(dataset, records, context);
return persistNormalizedDataset(dataset, normalized);
}
function persistNormalizedDataset(dataset, normalizedRecords) {
const previousState = loadCurrentState(dataset.name, dataset.uniqueKey);
const nextState = diffRecords(previousState, normalizedRecords, dataset.uniqueKey);
const stamp = saveDatasetRun(dataset.name, nextState);
saveDelta(dataset.name, stamp, nextState.delta);
return {
stamp,
stats: nextState.stats,
};
}
async function waitUntilReady(page, heading, timeout = 120000, options = {}) {
await runtimeCheckpoint(`等待页面 ${heading}`);
const { allowInteractiveAuth = false } = options;
await page.waitForLoadState('domcontentloaded');
console.log(`[waitUntilReady] 当前URL: ${page.url()}`);
console.log(`[waitUntilReady] 等待页面出现: "${heading}"`);
const initialState = await detectAuthRedirect(page);
if (initialState.isAuthPage) {
console.error(`[waitUntilReady] 检测到登录页/鉴权页: ${initialState.currentUrl}`);
console.error(`[waitUntilReady] 页面内容前500字: ${initialState.bodyText}`);
if (!allowInteractiveAuth && isAuthUrl(initialState.currentUrl)) {
try {
await sendLoginAlert(initialState.currentUrl);
} catch (notifyErr) {
console.error('[通知] 发送登录提醒失败:', notifyErr.message);
}
}
if (!allowInteractiveAuth) {
throw new Error(`当前页面仍处于登录/鉴权页,无法进入「${heading}」。请重新执行 npm run login并确认该账号对该页面有访问权限。`);
}
console.log(`[waitUntilReady] 允许交互式登录,等待用户完成认证后进入「${heading}」...`);
}
try {
await page.waitForFunction(
(text) => document.body && document.body.innerText.includes(text),
heading,
{ timeout },
);
} catch (err) {
// 超时时打印诊断信息
const { currentUrl, bodyText, isAuthPage } = await detectAuthRedirect(page);
console.error(`[waitUntilReady] 超时当前URL: ${currentUrl}`);
console.error(`[waitUntilReady] 页面内容前500字: ${bodyText}`);
if (isAuthPage && !allowInteractiveAuth) {
try {
await sendLoginAlert(currentUrl);
} catch (notifyErr) {
console.error('[通知] 发送登录提醒失败:', notifyErr.message);
}
throw new Error(`当前页面停留在登录/鉴权页,未能进入「${heading}」。请重新执行 npm run login并确认该账号对该页面有访问权限。`);
}
if (isAuthPage && allowInteractiveAuth) {
throw new Error(`交互式登录超时,仍未进入「${heading}」。请确认已在浏览器中完成 RAM/阿里云登录,并且当前账号有访问该页面的权限。`);
}
throw err;
}
const finalState = await detectAuthRedirect(page);
if (finalState.isAuthPage && !allowInteractiveAuth) {
throw new Error(`当前页面仍处于登录/鉴权页,未成功进入「${heading}」。请重新执行 npm run login并确认该账号对该页面有访问权限。`);
}
await sleep(1500);
}
async function scrapePagedTable(page, dataset, context, options = {}) {
const { onPage, skipInitialPage = false } = options;
const pages = [];
const visited = new Set();
let shouldSkipCurrentPage = skipInitialPage;
while (true) {
await runtimeCheckpoint(`抓取 ${dataset.name} 分页`);
await waitForTableRows(page);
const pageData = await extractTable(page);
const pageNum = await currentPageNumber(page);
const pageKey = `${pageNum}-${pageData.rows.length}`;
console.log(`[抓取] 第${pageNum}页, ${pageData.rows.length}行, key="${pageKey}"`);
if (shouldSkipCurrentPage) {
console.log(`[抓取] 跳过 checkpoint 已保存页: ${pageNum}`);
shouldSkipCurrentPage = false;
const moved = await gotoNextPage(page);
if (!moved) {
console.log(`[抓取] checkpoint 已位于最后一页,停止`);
break;
}
continue;
}
if (visited.has(pageKey)) {
console.log(`[抓取] 重复页面key停止翻页`);
break;
}
visited.add(pageKey);
const pageRows = pageData.rows.map((row) => ({ ...row, __context: context }));
pages.push(...pageRows);
if (onPage) {
await onPage({ pageData, pageNum, pageRows });
}
const moved = await gotoNextPage(page);
if (!moved) {
console.log(`[抓取] 翻页失败或已到最后一页,停止`);
break;
}
}
console.log(`[抓取] 共采集 ${pages.length} 条记录`);
return pages;
}
async function raiseIfSessionExpired(page, label) {
const { currentUrl, bodyText, isAuthPage } = await detectAuthRedirect(page);
if (!isAuthPage) {
return;
}
console.error(`[鉴权] ${label} 时检测到登录页/鉴权页: ${currentUrl}`);
console.error(`[鉴权] 页面内容前500字: ${bodyText}`);
try {
await sendLoginAlert(currentUrl);
} catch (notifyErr) {
console.error('[通知] 发送登录提醒失败:', notifyErr.message);
}
throw new Error(`运行过程中登录态失效(${label})。请重新执行 npm run login 后再继续同步。`);
}
async function extractTable(page) {
return page.evaluate(() => {
const normalize = (value) =>
String(value || '')
.replace(/\u00a0/g, ' ')
.replace(/\s+\n/g, '\n')
.replace(/\n\s+/g, '\n')
.trim();
const headerTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('thead th').length > 1);
const headerTable = headerTables.sort((a, b) => b.querySelectorAll('thead th').length - a.querySelectorAll('thead th').length)[0];
if (!headerTable) return { headers: [], rows: [] };
const headers = Array.from(headerTable.querySelectorAll('thead th')).map((cell) => normalize(cell.textContent));
const bodyTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('tbody tr').length > 0);
const bodyTable = bodyTables.sort((a, b) => {
const aSize = Math.max(...Array.from(a.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0);
const bSize = Math.max(...Array.from(b.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0);
return bSize - aSize;
})[0];
if (!bodyTable) return { headers, rows: [] };
const rows = Array.from(bodyTable.querySelectorAll('tbody tr'))
.map((row) => Array.from(row.querySelectorAll('td')).map((cell) => normalize(cell.innerText || cell.textContent)))
.filter((cells) => cells.some(Boolean))
.map((cells) => {
const record = {};
headers.forEach((header, index) => {
record[header || `column_${index + 1}`] = cells[index] || '';
});
return record;
});
return { headers, rows };
});
}
async function waitForTableRows(page) {
await runtimeCheckpoint('等待表格加载');
try {
await page.waitForFunction(() => document.querySelectorAll('table tbody tr').length > 0, null, { timeout: 120000 });
} catch (error) {
await raiseIfSessionExpired(page, '等待表格加载');
throw error;
}
await sleep(800);
}
async function currentPageNumber(page) {
const active = page.locator('.next-pagination-item.next-current');
if ((await active.count()) === 0) return 1;
return Number.parseInt((await active.first().innerText()).trim(), 10) || 1;
}
async function jumpToPage(page, targetPage) {
if (targetPage <= 1) {
return true;
}
const current = await currentPageNumber(page);
if (current === targetPage) {
return true;
}
const jumpInputCandidates = [
'.next-pagination-jump-input input',
'input[aria-label*="页码"]',
'input[aria-label*="页"]',
];
for (const selector of jumpInputCandidates) {
const input = page.locator(selector).first();
if ((await input.count()) === 0) {
continue;
}
await input.click().catch(() => null);
await sleep(100);
await page.keyboard.press('Control+A').catch(() => null);
await page.keyboard.type(String(targetPage), { delay: 20 }).catch(() => null);
await page.keyboard.press('Enter').catch(() => null);
await sleep(1500);
const afterJump = await currentPageNumber(page);
if (afterJump === targetPage) {
console.log(`[账单续爬] 已跳转到第 ${targetPage}`);
return true;
}
}
console.warn(`[账单续爬] 未找到可用跳页输入框,尝试顺序翻到第 ${targetPage}`);
let guard = 0;
while (guard < targetPage + 5) {
const currentPage = await currentPageNumber(page);
if (currentPage >= targetPage) {
return currentPage === targetPage;
}
const moved = await gotoNextPage(page);
if (!moved) {
return false;
}
guard += 1;
}
return false;
}
async function gotoNextPage(page) {
await runtimeCheckpoint('翻页');
const before = await currentPageNumber(page);
// 用 Playwright locator 定位"下一页"按钮
const nextBtn = page.locator('button.next-pagination-item.next-next');
if ((await nextBtn.count()) === 0) {
console.log('[翻页] 未找到下一页按钮');
return false;
}
const disabled = (await nextBtn.getAttribute('disabled')) != null;
if (disabled) {
console.log('[翻页] 下一页按钮已禁用');
return false;
}
// 用 Playwright click而非 DOM click确保 React 事件正常触发
await nextBtn.click();
await sleep(2000);
await raiseIfSessionExpired(page, `翻页 ${before} -> next`);
const after = await currentPageNumber(page);
console.log(`[翻页] ${before} -> ${after}`);
if (before > 1 && after === 1) {
throw new Error(`分页从第 ${before} 页异常回退到第 1 页,疑似登录态失效或页面会话已重置。请重新执行 npm run login 后再继续同步。`);
}
if (after < before) {
throw new Error(`分页从第 ${before} 页异常回退到第 ${after} 页,疑似登录态失效或页面状态被重置。请重新执行 npm run login 后再继续同步。`);
}
return before !== after;
}
async function trySetPageSize(page, pageSize) {
await runtimeCheckpoint(`设置每页 ${pageSize}`);
const input = page.locator('input[aria-label="请选择每页显示几条"]').first();
if ((await input.count()) === 0) return;
await input.click().catch(() => null);
await sleep(300);
const option = page.locator(`text=${pageSize}`).last();
if ((await option.count()) === 0) {
await page.keyboard.press('Escape').catch(() => null);
return;
}
await option.click().catch(() => null);
await sleep(1200);
}
async function setDateRange(page, start, end) {
await runtimeCheckpoint(`设置订单日期 ${start} ~ ${end}`);
console.log(`[订单日期] 设置: ${start} ~ ${end}`);
await _fillDateRange(page, start, end);
// 验证
const startActual = await page.locator('input[placeholder="起始日期"]').inputValue().catch(() => '');
const endActual = await page.locator('input[placeholder="结束日期"]').inputValue().catch(() => '');
// 如果结果不对,用反向顺序重试(先填开始再填结束)
if (startActual !== start || endActual !== end) {
console.log(`[订单日期] 首次结果不对: "${startActual}" ~ "${endActual}",反向重试`);
await _fillDateRange(page, start, end, true);
const s2 = await page.locator('input[placeholder="起始日期"]').inputValue().catch(() => '');
const e2 = await page.locator('input[placeholder="结束日期"]').inputValue().catch(() => '');
console.log(`[订单日期] 重试结果: "${s2}" ~ "${e2}"`);
} else {
console.log(`[订单日期] 结果: "${startActual}" ~ "${endActual}"`);
}
}
async function _fillDateRange(page, start, end, startFirst = false) {
await runtimeCheckpoint('填写订单日期');
const trigger = page.locator('input[placeholder="结束日期"]');
await trigger.click();
await sleep(1000);
const panelStartInput = page.locator('.next-range-picker-panel-input-start-date input');
const panelEndInput = page.locator('.next-range-picker-panel-input-end-date input');
if (startFirst) {
// 先填开始日期
await panelStartInput.click();
await sleep(100);
await page.keyboard.press('Control+A');
await page.keyboard.type(start, { delay: 30 });
await sleep(300);
// 再填结束日期
await panelEndInput.click();
await sleep(100);
await page.keyboard.press('Control+A');
await page.keyboard.type(end, { delay: 30 });
await sleep(300);
} else {
// 先填结束日期(默认)
await panelEndInput.click();
await sleep(100);
await page.keyboard.press('Control+A');
await page.keyboard.type(end, { delay: 30 });
await sleep(300);
// 再填开始日期
await panelStartInput.click();
await sleep(100);
await page.keyboard.press('Control+A');
await page.keyboard.type(start, { delay: 30 });
await sleep(300);
}
await page.keyboard.press('Enter');
await sleep(500);
await page.mouse.click(0, 0);
await sleep(300);
await page.keyboard.press('Escape');
await sleep(300);
await page.locator('.next-overlay-wrapper.opened').waitFor({ state: 'hidden', timeout: 3000 }).catch(() => null);
await sleep(300);
}
async function setMonthValue(page, month) {
await runtimeCheckpoint(`设置账单月份 ${month}`);
// 先尝试按 inputValue 匹配 YYYY-MM 格式
const inputs = page.locator('input');
const total = await inputs.count();
const allValues = [];
for (let index = 0; index < total; index += 1) {
const input = inputs.nth(index);
const value = await input.inputValue().catch(() => '');
const placeholder = await input.getAttribute('placeholder').catch(() => '');
allValues.push({ index, value, placeholder });
if (/^\d{4}-\d{2}$/.test(value)) {
console.log(`[账单月份] 通过 value 匹配到 input[${index}], 设置: ${month}`);
await typeIntoDateInput(input, month, page);
return;
}
}
// 如果 value 为空,尝试按 placeholder 匹配月份选择器
for (const item of allValues) {
if (item.placeholder && /月/.test(item.placeholder)) {
console.log(`[账单月份] 通过 placeholder 匹配到 input[${item.index}], 设置: ${month}`);
await typeIntoDateInput(inputs.nth(item.index), month, page);
return;
}
}
// 兜底:找任何看起来像日期/月份选择器的 input排除搜索框等
for (const item of allValues) {
const input = inputs.nth(item.index);
const cls = await input.evaluate((el) => el.closest('[class*="date-picker"], [class*="month-picker"], [class*="range-picker"]')?.className || '').catch(() => '');
if (cls) {
console.log(`[账单月份] 通过父级 class 匹配到 input[${item.index}] (${cls}), 设置: ${month}`);
await typeIntoDateInput(input, month, page);
return;
}
}
console.error('[DEBUG] 账单页面所有 input:', JSON.stringify(allValues, null, 2));
throw new Error('未识别到账单佣金月份输入框,请打开页面确认结构是否变化。');
}
/**
* 用键盘输入日期值。
* 策略focus → 全选 → 快速键入 → Tab 移开焦点(触发 blur 提交,但不会像 click 那样打开面板)。
* 即使面板弹出,快速键入 + Tab 也能在面板滚动前完成提交并关闭。
*/
async function typeIntoDateInput(locator, value, page) {
await runtimeCheckpoint(`填写日期输入 ${value}`);
// 移除 readonly
await locator.evaluate((node) => node.removeAttribute('readonly'));
// focus 并全选当前内容
await locator.focus();
await sleep(100);
await page.keyboard.press('Control+A');
await sleep(100);
// 快速逐字符输入新值
await page.keyboard.type(value, { delay: 30 });
await sleep(200);
// Tab 移开焦点 → 触发 onBlur 提交值 + 关闭面板
await page.keyboard.press('Tab');
await sleep(300);
// 如果面板还在Escape 兜底关闭
await page.keyboard.press('Escape');
await sleep(300);
// 验证
const actual = await locator.inputValue().catch(() => '');
if (actual !== value) {
console.warn(`[WARN] typeIntoDateInput: 期望 "${value}",实际 "${actual}"`);
} else {
console.log(`[日期设置] 成功: "${value}"`);
}
}
async function clickQuery(page) {
await runtimeCheckpoint('点击查询');
const button = page.locator('button:has-text("查询")').first();
await button.click();
await sleep(1800);
}
function buildMonthlyDateWindows(startDate) {
const start = new Date(`${startDate}T00:00:00+08:00`);
const end = new Date();
const windows = [];
const cursor = new Date(start.getFullYear(), start.getMonth(), 1);
while (cursor <= end) {
const windowStart = new Date(cursor);
const windowEnd = new Date(cursor.getFullYear(), cursor.getMonth() + 1, 0);
const actualEnd = windowEnd > end ? end : windowEnd;
windows.push({
windowStart: formatDate(windowStart),
windowEnd: formatDate(actualEnd),
start: formatDate(windowStart),
end: formatDate(actualEnd),
});
cursor.setMonth(cursor.getMonth() + 1);
}
return windows;
}
function buildMonthList(startMonth) {
const [year, month] = startMonth.split('-').map(Number);
const cursor = new Date(year, month - 1, 1);
const end = new Date();
const months = [];
while (cursor <= end) {
months.push(`${cursor.getFullYear()}-${String(cursor.getMonth() + 1).padStart(2, '0')}`);
cursor.setMonth(cursor.getMonth() + 1);
}
return months;
}
function formatDate(date) {
return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')}`;
}
function dedupeByHash(records) {
const seen = new Set();
return records.filter((record) => {
const key = JSON.stringify(record);
if (seen.has(key)) return false;
seen.add(key);
return true;
});
}
function collectValidOrderIds(records) {
const ids = [];
const seen = new Set();
for (const record of records) {
// 支持两种字段名normalized 后的 orderId 和原始的 订单号
const rawOrderId = String(record.orderId || record['订单号'] || '').trim();
if (!rawOrderId || rawOrderId.includes('没有数据')) {
continue;
}
if (!isValidOrderId(rawOrderId)) {
console.log(`[订单详情] 跳过无效订单号: ${rawOrderId}`);
continue;
}
if (seen.has(rawOrderId)) {
continue;
}
seen.add(rawOrderId);
ids.push(rawOrderId);
}
return ids;
}
function collectValidAccountIds(records) {
const ids = [];
const seen = new Set();
for (const record of records) {
const rawAccountId = String(record.accountId || '').trim();
if (!rawAccountId || rawAccountId.includes('没有数据')) {
continue;
}
if (!isValidAccountId(rawAccountId)) {
console.log(`[客户详情] 跳过无效 accountId: ${rawAccountId}`);
continue;
}
if (seen.has(rawAccountId)) {
continue;
}
seen.add(rawAccountId);
ids.push(rawAccountId);
}
return ids;
}
function isValidOrderId(orderId) {
const value = String(orderId || '').trim();
if (!value) return false;
if (value.includes('<27>')) return false;
return /^\d+$/.test(value);
}
function isValidAccountId(accountId) {
const value = String(accountId || '').trim();
if (!value) return false;
if (value.includes('<27>')) return false;
return /^\d+$/.test(value);
}
async function extractOrderDetail(page) {
return page.evaluate(() => {
const text = document.body?.innerText || '';
const extract = (label) => {
const lineBreakPattern = new RegExp(`${label}\\s*(?:\\r?\\n)+\\s*([^\\r\\n]+)`);
const lineBreakMatch = text.match(lineBreakPattern);
if (lineBreakMatch) return lineBreakMatch[1].trim();
const inlinePattern = new RegExp(`${label}\\s*[:]?\\s*([^\\r\\n]+)`);
const inlineMatch = text.match(inlinePattern);
return inlineMatch ? inlineMatch[1].trim() : '';
};
return {
orderId: extract('订单号'),
orderType: extract('订单类型'),
status: extract('状态'),
tradeType: extract('交易类型'),
customerCategory: extract('客户分类'),
dealerName: extract('二级经销商名称'),
dealerUid: extract('二级经销商UID'),
customerType: extract('客户类型'),
opportunityId: extract('商机ID'),
paymentTime: extract('支付时间'),
orderTime: extract('下单时间'),
productName: extract('产品名称'),
productCode: extract('产品code'),
originalPriceCny: extract('订单原价\\(CNY\\)'),
paidAmountCny: extract('实付金额\\(CNY\\)'),
discount: extract('订单折扣'),
payableAmountCny: extract('应付金额(实付\\+代金券)\\(CNY\\)'),
couponAmountCny: extract('代金券金额\\(CNY\\)'),
};
});
}
async function extractCustomerDetail(page) {
return page.evaluate(() => {
const normalize = (value) =>
String(value || '')
.replace(/\u00a0/g, ' ')
.trim();
const text = normalize(document.body?.innerText || '').replace(/\r/g, '');
const extract = (label, sourceText = text) => {
const lineBreakPattern = new RegExp(`${label}\\s*(?:\\n)+\\s*([^\\n]+)`);
const lineBreakMatch = sourceText.match(lineBreakPattern);
if (lineBreakMatch) return normalize(lineBreakMatch[1]);
const inlinePattern = new RegExp(`${label}\\s*[:]?\\s*([^\\n]+)`);
const inlineMatch = sourceText.match(inlinePattern);
return inlineMatch ? normalize(inlineMatch[1]) : '';
};
const normalizeAmount = (value) => normalize(value).replace(/[¥,]/g, '').trim();
const buildSection = (startLabel, endLabel = '') => {
const start = text.indexOf(startLabel);
if (start < 0) return '';
const end = endLabel ? text.indexOf(endLabel, start + startLabel.length) : -1;
if (end > start) return text.slice(start, end);
return text.slice(start);
};
const lastMonthSection = buildSection('上月应付总金额CNY', '本月应付总金额CNY');
const currentMonthSection = buildSection('本月应付总金额CNY');
const extractAmountFromSection = (sectionText, label) => normalizeAmount(extract(label, sectionText));
let department = '';
const table = Array.from(document.querySelectorAll('table')).find((node) =>
(node.innerText || '').includes('所属部门'),
);
if (table) {
const rows = table.querySelectorAll('tbody tr');
for (const row of rows) {
const cells = row.querySelectorAll('td');
if (cells.length >= 2) {
const value = normalize(cells[1]?.innerText || cells[1]?.textContent || '');
if (value) {
department = value;
break;
}
}
}
}
return {
customerAccount: extract('客户账号'),
customerName: extract('客户名称'),
customerType: extract('客户类型'),
tradeMode: extract('交易模式'),
customerSource: extract('客户来源'),
realNameStatus: extract('实名状态'),
email: extract('邮箱'),
relationDate: extract('关联日期'),
phone: extract('手机号'),
remark: extract('备注'),
paymentNoticeStatus: extract('代为支付告知状态'),
department,
lastMonthPayableTotalCny: extractAmountFromSection(lastMonthSection, '上月应付总金额CNY'),
lastMonthPrepayCny: extractAmountFromSection(lastMonthSection, '预付费金额'),
lastMonthPostpayCny: extractAmountFromSection(lastMonthSection, '后付费金额'),
currentMonthPayableTotalCny: extractAmountFromSection(currentMonthSection, '本月应付总金额CNY'),
currentMonthPrepayCny: extractAmountFromSection(currentMonthSection, '预付费金额'),
currentMonthPostpayCny: extractAmountFromSection(currentMonthSection, '后付费金额'),
};
});
}