1309 lines
45 KiB
JavaScript
1309 lines
45 KiB
JavaScript
import { chromium } from 'playwright';
|
||
import cron from 'node-cron';
|
||
import fs from 'node:fs';
|
||
import path from 'node:path';
|
||
import readline from 'node:readline';
|
||
import { execSync } from 'node:child_process';
|
||
import { config, datasets } from './config.js';
|
||
import { sendLoginAlert } from './notify.js';
|
||
import {
|
||
diffRecords,
|
||
loadCurrentState,
|
||
nowStamp,
|
||
saveCheckpoint,
|
||
saveDatasetRun,
|
||
saveDelta,
|
||
saveRunSummary,
|
||
withHash,
|
||
} from './storage.js';
|
||
|
||
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
||
|
||
let _context = null;
|
||
let _runtimeController = null;
|
||
|
||
const AUTH_PAGE_KEYWORDS = [
|
||
'RAM 用户登录',
|
||
'主账号登录',
|
||
'钉钉扫码登录',
|
||
'用户名',
|
||
'下一步',
|
||
'登录并使用 RAM',
|
||
];
|
||
|
||
async function closeContextIfNeeded() {
|
||
if (!_context) return;
|
||
await _context.close();
|
||
_context = null;
|
||
}
|
||
|
||
function getRuntimeController() {
|
||
if (_runtimeController) return _runtimeController;
|
||
|
||
let paused = false;
|
||
let terminated = false;
|
||
let keypressBound = false;
|
||
|
||
const onKeypress = (_str, key = {}) => {
|
||
if (key.name === 'f7') {
|
||
if (!paused) {
|
||
paused = true;
|
||
console.log('[控制] 已暂停(F7)。按 F8 继续,按 F9 终止。');
|
||
}
|
||
return;
|
||
}
|
||
if (key.name === 'f8') {
|
||
if (paused) {
|
||
paused = false;
|
||
console.log('[控制] 已继续(F8)。');
|
||
}
|
||
return;
|
||
}
|
||
if (key.name === 'f9') {
|
||
terminated = true;
|
||
paused = false;
|
||
console.log('[控制] 已请求终止(F9),将在安全检查点停止。');
|
||
}
|
||
};
|
||
|
||
const bind = () => {
|
||
if (keypressBound || !process.stdin.isTTY) return;
|
||
readline.emitKeypressEvents(process.stdin);
|
||
if (typeof process.stdin.setRawMode === 'function') {
|
||
process.stdin.setRawMode(true);
|
||
}
|
||
process.stdin.resume();
|
||
process.stdin.on('keypress', onKeypress);
|
||
keypressBound = true;
|
||
console.log('[控制] 热键已启用:F7 暂停 / F8 继续 / F9 终止');
|
||
};
|
||
|
||
const unbind = () => {
|
||
if (!keypressBound) return;
|
||
process.stdin.off('keypress', onKeypress);
|
||
if (process.stdin.isTTY && typeof process.stdin.setRawMode === 'function') {
|
||
process.stdin.setRawMode(false);
|
||
}
|
||
keypressBound = false;
|
||
};
|
||
|
||
const waitIfPaused = async (label = '任务') => {
|
||
if (terminated) {
|
||
throw new Error(`[控制] 已终止:${label}`);
|
||
}
|
||
while (paused) {
|
||
await sleep(300);
|
||
if (terminated) {
|
||
throw new Error(`[控制] 已终止:${label}`);
|
||
}
|
||
}
|
||
};
|
||
|
||
const throwIfTerminated = (label = '任务') => {
|
||
if (terminated) {
|
||
throw new Error(`[控制] 已终止:${label}`);
|
||
}
|
||
};
|
||
|
||
_runtimeController = {
|
||
bind,
|
||
unbind,
|
||
waitIfPaused,
|
||
throwIfTerminated,
|
||
};
|
||
|
||
return _runtimeController;
|
||
}
|
||
|
||
async function runtimeCheckpoint(label) {
|
||
const controller = getRuntimeController();
|
||
controller.throwIfTerminated(label);
|
||
await controller.waitIfPaused(label);
|
||
}
|
||
|
||
async function getContext() {
|
||
if (_context) return _context;
|
||
_context = await chromium.launchPersistentContext(config.userDataDir, {
|
||
channel: 'chrome',
|
||
headless: config.headless,
|
||
acceptDownloads: true,
|
||
downloadsPath: config.downloadDir,
|
||
});
|
||
await restoreStorageState(_context);
|
||
return _context;
|
||
}
|
||
|
||
async function restoreStorageState(context) {
|
||
if (!fs.existsSync(config.storageStateFile)) {
|
||
return;
|
||
}
|
||
|
||
try {
|
||
const state = JSON.parse(fs.readFileSync(config.storageStateFile, 'utf-8'));
|
||
if (Array.isArray(state.cookies) && state.cookies.length > 0) {
|
||
await context.addCookies(state.cookies);
|
||
console.log(`[storageState] 已恢复 ${state.cookies.length} 个 cookie`);
|
||
}
|
||
} catch (error) {
|
||
console.warn(`[storageState] 恢复失败,继续使用 .browser profile: ${error.message}`);
|
||
}
|
||
}
|
||
|
||
async function saveStorageState(context) {
|
||
await context.storageState({ path: config.storageStateFile });
|
||
console.log(`[storageState] 已保存登录态快照: ${config.storageStateFile}`);
|
||
}
|
||
|
||
function loadLatestBillsCheckpoint() {
|
||
const checkpointDir = path.join(config.dataDir, 'checkpoints', 'bills');
|
||
if (!fs.existsSync(checkpointDir)) {
|
||
return null;
|
||
}
|
||
|
||
const candidates = fs.readdirSync(checkpointDir)
|
||
.filter((fileName) => fileName.endsWith('.json'))
|
||
.map((fileName) => {
|
||
const filePath = path.join(checkpointDir, fileName);
|
||
const stat = fs.statSync(filePath);
|
||
return { fileName, filePath, mtimeMs: stat.mtimeMs };
|
||
})
|
||
.sort((a, b) => b.mtimeMs - a.mtimeMs);
|
||
|
||
if (candidates.length === 0) {
|
||
return null;
|
||
}
|
||
|
||
try {
|
||
const latest = JSON.parse(fs.readFileSync(candidates[0].filePath, 'utf-8'));
|
||
if (!latest || typeof latest !== 'object') {
|
||
return null;
|
||
}
|
||
return latest;
|
||
} catch (error) {
|
||
console.warn(`[账单检查点] 读取失败,忽略断点续爬: ${error.message}`);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function getPageBodyPreview(page) {
|
||
return page
|
||
.evaluate(() => document.body?.innerText?.substring(0, 500) || '(空)')
|
||
.catch(() => '(无法获取)');
|
||
}
|
||
|
||
function isAuthUrl(url) {
|
||
return /account\.aliyun\.com|signin\.aliyun\.com/.test(url)
|
||
|| url.includes('login.htm')
|
||
|| url.includes('/#/signin');
|
||
}
|
||
|
||
function hasAuthKeywords(text) {
|
||
return AUTH_PAGE_KEYWORDS.some((keyword) => text.includes(keyword));
|
||
}
|
||
|
||
async function detectAuthRedirect(page) {
|
||
const currentUrl = page.url();
|
||
const bodyText = await getPageBodyPreview(page);
|
||
return {
|
||
currentUrl,
|
||
bodyText,
|
||
isAuthPage: isAuthUrl(currentUrl) || hasAuthKeywords(bodyText),
|
||
};
|
||
}
|
||
|
||
async function ensureDatasetAccessible(page, dataset, timeout = 120000, options = {}) {
|
||
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
|
||
await waitUntilReady(page, dataset.heading, timeout, options);
|
||
}
|
||
|
||
export async function login() {
|
||
const runtimeController = getRuntimeController();
|
||
runtimeController.bind();
|
||
const context = await getContext();
|
||
const cleanupAndExit = async (signal) => {
|
||
console.log(`[login] 收到 ${signal},正在保存登录态并关闭浏览器...`);
|
||
await closeContextIfNeeded();
|
||
process.exit(130);
|
||
};
|
||
|
||
const onSigint = () => {
|
||
void cleanupAndExit('SIGINT');
|
||
};
|
||
const onSigterm = () => {
|
||
void cleanupAndExit('SIGTERM');
|
||
};
|
||
|
||
process.once('SIGINT', onSigint);
|
||
process.once('SIGTERM', onSigterm);
|
||
|
||
try {
|
||
const page = context.pages()[0] || (await context.newPage());
|
||
await page.goto(datasets.customers.url, { waitUntil: 'domcontentloaded' });
|
||
console.log('请在打开的浏览器里完成阿里云伙伴中心登录。检测到进入“我的客户”和“账单查询”页面后,脚本会自动保存登录态并关闭浏览器。');
|
||
await waitUntilReady(page, datasets.customers.heading, 10 * 60 * 1000, { allowInteractiveAuth: true });
|
||
console.log('[login] 我的客户页验证通过,继续验证账单页登录态...');
|
||
await ensureDatasetAccessible(page, datasets.bills, 60 * 1000, { allowInteractiveAuth: true });
|
||
await sleep(1000);
|
||
await saveStorageState(context);
|
||
console.log('登录态已写入 .browser 目录,且已验证“我的客户”和“账单查询”页面可访问,后续可直接执行 npm run sync 或 npm run bills。');
|
||
} finally {
|
||
process.off('SIGINT', onSigint);
|
||
process.off('SIGTERM', onSigterm);
|
||
await closeContextIfNeeded();
|
||
runtimeController.unbind();
|
||
}
|
||
}
|
||
|
||
export async function syncAll() {
|
||
const runtimeController = getRuntimeController();
|
||
runtimeController.bind();
|
||
const context = await getContext();
|
||
|
||
try {
|
||
const summary = { startedAt: new Date().toISOString(), datasets: {} };
|
||
const page = context.pages()[0] || (await context.newPage());
|
||
|
||
summary.datasets.customers = await syncCustomers(page);
|
||
summary.datasets.customerDetails = await syncCustomerDetails(page);
|
||
summary.datasets.orders = await syncOrders(page);
|
||
|
||
// syncOrders 完成后,从最新的 orders.json 读取 orderId 列表
|
||
const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey);
|
||
const orderIdsForDetail = collectValidOrderIds(latestOrders.records || []);
|
||
|
||
summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail);
|
||
summary.datasets.bills = await syncBills(page);
|
||
summary.finishedAt = new Date().toISOString();
|
||
|
||
const stamp = nowStamp();
|
||
saveRunSummary(stamp, summary);
|
||
return summary;
|
||
} finally {
|
||
if (config.closeBrowser) {
|
||
await closeContextIfNeeded();
|
||
} else {
|
||
console.log('浏览器保持运行');
|
||
}
|
||
runtimeController.unbind();
|
||
}
|
||
}
|
||
|
||
export async function syncBillsOnly(options = {}) {
|
||
const runtimeController = getRuntimeController();
|
||
runtimeController.bind();
|
||
const context = await getContext();
|
||
|
||
try {
|
||
const summary = { startedAt: new Date().toISOString(), datasets: {} };
|
||
const page = context.pages()[0] || (await context.newPage());
|
||
|
||
summary.datasets.bills = await syncBills(page, options);
|
||
summary.finishedAt = new Date().toISOString();
|
||
|
||
const stamp = nowStamp();
|
||
saveRunSummary(stamp, summary);
|
||
return summary;
|
||
} finally {
|
||
if (config.closeBrowser) {
|
||
await closeContextIfNeeded();
|
||
} else {
|
||
console.log('浏览器保持运行');
|
||
}
|
||
runtimeController.unbind();
|
||
}
|
||
}
|
||
|
||
export async function scheduleSync() {
|
||
console.log(`定时任务已启动: ${config.cron} (${config.timezone})`);
|
||
cron.schedule(
|
||
config.cron,
|
||
async () => {
|
||
try {
|
||
console.log(`[${new Date().toISOString()}] 开始执行同步`);
|
||
const summary = await syncAll();
|
||
console.log(`[${new Date().toISOString()}] 同步完成`, JSON.stringify(summary, null, 2));
|
||
try {
|
||
const scriptPath = path.resolve(config.rootDir, config.dbSyncScript);
|
||
const incrementalFlag = config.fullSync ? '' : ' --incremental';
|
||
console.log(`[入库] 执行 ${scriptPath}${incrementalFlag ? ' (增量模式)' : ''}`);
|
||
const output = execSync(`python "${scriptPath}"${incrementalFlag}`, {
|
||
cwd: path.dirname(scriptPath),
|
||
encoding: 'utf-8',
|
||
timeout: 120000,
|
||
});
|
||
console.log(output);
|
||
} catch (e) {
|
||
console.error('[入库] 失败:', e.message);
|
||
}
|
||
} catch (error) {
|
||
console.error(`[${new Date().toISOString()}] 同步失败`, error);
|
||
}
|
||
},
|
||
{ timezone: config.timezone },
|
||
);
|
||
}
|
||
|
||
async function syncCustomers(page) {
|
||
await runtimeCheckpoint('同步客户');
|
||
const dataset = datasets.customers;
|
||
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
|
||
await waitUntilReady(page, dataset.heading);
|
||
await trySetPageSize(page, dataset.pageSize);
|
||
const records = await scrapePagedTable(page, dataset, {});
|
||
return persistDataset(dataset, records, {});
|
||
}
|
||
|
||
async function syncCustomerDetails(page) {
|
||
await runtimeCheckpoint('同步客户详情');
|
||
const dataset = datasets.customerDetails;
|
||
const customersState = loadCurrentState('customers', datasets.customers.uniqueKey);
|
||
const allAccountIds = collectValidAccountIds(customersState.records || []);
|
||
|
||
if (allAccountIds.length === 0) {
|
||
console.log('[客户详情] 本地无有效客户 accountId,跳过');
|
||
return persistDataset(dataset, [], {});
|
||
}
|
||
|
||
console.log(`[客户详情] 共 ${allAccountIds.length} 个客户需要获取详情`);
|
||
const allDetails = [];
|
||
const detailBaseUrl =
|
||
'https://aps.aliyun.com/?spm=5176.12818093.top-nav.ditem-fx.785716d0LKDpKT#/detail/my_customer/~/customer/';
|
||
|
||
for (let index = 0; index < allAccountIds.length; index += 1) {
|
||
await runtimeCheckpoint(`客户详情 ${index + 1}/${allAccountIds.length}`);
|
||
const accountId = allAccountIds[index];
|
||
console.log(`[客户详情] ${index + 1}/${allAccountIds.length} accountId=${accountId}`);
|
||
|
||
// 先跳 about:blank 再跳详情URL(强制 SPA 完整重新加载)
|
||
await page.goto('about:blank');
|
||
await sleep(300);
|
||
await page.goto(`${detailBaseUrl}${accountId}`, { waitUntil: 'domcontentloaded' });
|
||
|
||
try {
|
||
await page.waitForFunction(
|
||
(text) => document.body && document.body.innerText.includes(text),
|
||
'详情',
|
||
{ timeout: 15000 },
|
||
);
|
||
await sleep(1000);
|
||
} catch {
|
||
console.warn(`[客户详情] ${accountId} 详情页加载超时,跳过`);
|
||
continue;
|
||
}
|
||
|
||
const detail = await extractCustomerDetail(page);
|
||
allDetails.push({ ...detail, __context: { accountId } });
|
||
}
|
||
|
||
return persistDataset(dataset, dedupeByHash(allDetails), {});
|
||
}
|
||
|
||
async function syncOrders(page) {
|
||
await runtimeCheckpoint('同步订单');
|
||
const dataset = datasets.orders;
|
||
let windows;
|
||
|
||
if (config.fullSync) {
|
||
windows = buildMonthlyDateWindows(config.orderStartDate);
|
||
} else {
|
||
windows = buildIncrementalOrderWindows();
|
||
}
|
||
|
||
const allRecords = [];
|
||
|
||
for (const window of windows) {
|
||
await runtimeCheckpoint(`订单窗口 ${window.start} ~ ${window.end}`);
|
||
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
|
||
await waitUntilReady(page, dataset.heading);
|
||
await setDateRange(page, window.start, window.end);
|
||
await clickQuery(page);
|
||
await trySetPageSize(page, dataset.pageSize);
|
||
const records = await scrapePagedTable(page, dataset, window);
|
||
allRecords.push(...records);
|
||
}
|
||
|
||
return persistDataset(dataset, dedupeByHash(allRecords), {});
|
||
}
|
||
|
||
function buildIncrementalOrderWindows() {
|
||
const configuredStartDate = normalizeConfiguredDate(config.incrementalOrderStartDate);
|
||
if (configuredStartDate) {
|
||
const windows = buildMonthlyDateWindows(configuredStartDate);
|
||
console.log(`[增量模式] 订单从指定日期开始查询: ${configuredStartDate}`);
|
||
return windows;
|
||
}
|
||
|
||
const yesterday = new Date();
|
||
yesterday.setDate(yesterday.getDate() - 1);
|
||
const dateStr = formatDate(yesterday);
|
||
console.log(`[增量模式] 订单仅查询: ${dateStr}`);
|
||
return [{ windowStart: dateStr, windowEnd: dateStr, start: dateStr, end: dateStr }];
|
||
}
|
||
|
||
function normalizeConfiguredDate(value) {
|
||
const normalized = String(value || '').trim();
|
||
if (!normalized) {
|
||
return '';
|
||
}
|
||
if (!/^\d{4}-\d{2}-\d{2}$/.test(normalized)) {
|
||
throw new Error(`ALIYUN_APS_INCREMENTAL_ORDER_START_DATE 格式无效: ${normalized},期望 YYYY-MM-DD`);
|
||
}
|
||
|
||
const parsed = new Date(`${normalized}T00:00:00+08:00`);
|
||
if (Number.isNaN(parsed.getTime())) {
|
||
throw new Error(`ALIYUN_APS_INCREMENTAL_ORDER_START_DATE 不是有效日期: ${normalized}`);
|
||
}
|
||
|
||
return normalized;
|
||
}
|
||
|
||
async function syncBills(page, options = {}) {
|
||
await runtimeCheckpoint('同步账单');
|
||
const dataset = datasets.bills;
|
||
const { resume = false } = options;
|
||
let months;
|
||
let latestConsumptionDate = null;
|
||
|
||
if (config.fullSync) {
|
||
months = buildMonthList(config.billStartMonth);
|
||
} else {
|
||
latestConsumptionDate = getLatestBillConsumptionDate();
|
||
const incrementalMonth = latestConsumptionDate?.slice(0, 7)
|
||
|| `${new Date().getFullYear()}-${String(new Date().getMonth() + 1).padStart(2, '0')}`;
|
||
months = [incrementalMonth];
|
||
console.log(`[增量模式] 账单仅查询: ${incrementalMonth}${latestConsumptionDate ? `, 数据库最新消费时间: ${latestConsumptionDate}` : ''}`);
|
||
}
|
||
|
||
const resumeCheckpoint = resume ? loadLatestBillsCheckpoint() : null;
|
||
if (resumeCheckpoint?.month) {
|
||
const resumeIndex = months.indexOf(resumeCheckpoint.month);
|
||
if (resumeIndex >= 0) {
|
||
months = months.slice(resumeIndex);
|
||
console.log(`[账单续爬] 从 checkpoint 恢复: month=${resumeCheckpoint.month}, page=${resumeCheckpoint.pageNum || 1}, records=${(resumeCheckpoint.records || []).length}`);
|
||
}
|
||
}
|
||
|
||
const allNormalizedRecords = [];
|
||
|
||
for (const month of months) {
|
||
await runtimeCheckpoint(`账单月份 ${month}`);
|
||
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
|
||
await waitUntilReady(page, dataset.heading);
|
||
await setMonthValue(page, month);
|
||
await clickQuery(page);
|
||
await trySetPageSize(page, dataset.pageSize);
|
||
|
||
let monthNormalizedRecords = [];
|
||
let resumeFromPage = 0;
|
||
let shouldContinueScrape = true;
|
||
if (resumeCheckpoint?.month === month) {
|
||
monthNormalizedRecords = Array.isArray(resumeCheckpoint.records) ? resumeCheckpoint.records : [];
|
||
resumeFromPage = Number.parseInt(String(resumeCheckpoint.pageNum || 0), 10) || 0;
|
||
if (resumeFromPage > 0) {
|
||
const moved = await moveBillsToResumeStart(page, resumeFromPage);
|
||
if (!moved) {
|
||
console.log(`[账单续爬] checkpoint 已在最后一页,无需继续抓取 month=${month}`);
|
||
shouldContinueScrape = false;
|
||
}
|
||
}
|
||
}
|
||
|
||
let rawRecords = [];
|
||
if (shouldContinueScrape) {
|
||
rawRecords = await scrapePagedTable(page, dataset, { month }, {
|
||
onPage: async ({ pageNum, pageRows }) => {
|
||
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, { month });
|
||
monthNormalizedRecords.push(...normalizedPageRows);
|
||
let checkpointRecords = monthNormalizedRecords;
|
||
if (latestConsumptionDate) {
|
||
checkpointRecords = monthNormalizedRecords.filter((record) => isAfterLatestConsumptionDate(record, latestConsumptionDate));
|
||
}
|
||
await saveBillsCheckpoint(dataset, month, pageNum, checkpointRecords);
|
||
},
|
||
});
|
||
}
|
||
|
||
if (resumeFromPage === 0) {
|
||
monthNormalizedRecords = normalizeDatasetRecords(dataset, rawRecords, { month });
|
||
}
|
||
|
||
if (latestConsumptionDate) {
|
||
const before = monthNormalizedRecords.length;
|
||
monthNormalizedRecords = monthNormalizedRecords.filter((record) => isAfterLatestConsumptionDate(record, latestConsumptionDate));
|
||
console.log(`[增量模式] 账单按消费时间过滤: ${before} -> ${monthNormalizedRecords.length}`);
|
||
}
|
||
allNormalizedRecords.push(...monthNormalizedRecords);
|
||
}
|
||
|
||
return persistNormalizedDataset(dataset, dedupeByHash(allNormalizedRecords));
|
||
}
|
||
|
||
async function saveBillsCheckpoint(dataset, month, pageNum, normalizedRecords) {
|
||
const normalized = dedupeByHash(normalizedRecords);
|
||
const checkpointName = `${month}-latest`;
|
||
saveCheckpoint(dataset.name, checkpointName, {
|
||
month,
|
||
pageNum,
|
||
savedAt: new Date().toISOString(),
|
||
stats: {
|
||
total: normalized.length,
|
||
},
|
||
records: normalized,
|
||
});
|
||
console.log(`[账单检查点] 已落盘: month=${month}, page=${pageNum}, records=${normalized.length}`);
|
||
}
|
||
|
||
function normalizeDatasetRecords(dataset, records, context) {
|
||
return records.map((record) => withHash(dataset.normalize(record, record.__context || context)));
|
||
}
|
||
|
||
async function moveBillsToResumeStart(page, resumeFromPage) {
|
||
if (resumeFromPage <= 0) {
|
||
return true;
|
||
}
|
||
|
||
const reached = await jumpToPage(page, resumeFromPage);
|
||
if (!reached) {
|
||
throw new Error(`账单续爬失败:无法定位到 checkpoint 页码 ${resumeFromPage}`);
|
||
}
|
||
|
||
const moved = await gotoNextPage(page);
|
||
return moved;
|
||
}
|
||
|
||
function getLatestBillConsumptionDate() {
|
||
const scriptPath = path.resolve(config.rootDir, config.dbSyncScript);
|
||
try {
|
||
const output = execSync(`python "${scriptPath}" --latest-bill-consumption-time`, {
|
||
cwd: path.dirname(scriptPath),
|
||
encoding: 'utf-8',
|
||
timeout: 120000,
|
||
}).trim();
|
||
const latest = output.split(/\r?\n/).map((line) => line.trim()).filter(Boolean).at(-1) || '';
|
||
return /^\d{4}-\d{2}-\d{2}/.test(latest) ? latest.slice(0, 10) : null;
|
||
} catch (error) {
|
||
console.error('[增量模式] 查询数据库最新账单消费时间失败:', error.message);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
function isAfterLatestConsumptionDate(record, latestConsumptionDate) {
|
||
const consumeDate = String(record['消费时间'] || record.consumeDate || '').trim().slice(0, 10);
|
||
if (!/^\d{4}-\d{2}-\d{2}$/.test(consumeDate)) {
|
||
return false;
|
||
}
|
||
return consumeDate > latestConsumptionDate;
|
||
}
|
||
|
||
async function syncOrderDetails(page, cachedOrderIds) {
|
||
await runtimeCheckpoint('同步订单详情');
|
||
const dataset = datasets.orderDetails;
|
||
|
||
// 使用传入的 orderId 列表(在 syncOrders 覆盖 orders.json 之前缓存的)
|
||
const allOrderIds = cachedOrderIds || [];
|
||
|
||
if (allOrderIds.length === 0) {
|
||
console.log('[订单详情] 本地无订单数据,跳过');
|
||
return persistDataset(dataset, [], {});
|
||
}
|
||
|
||
console.log(`[订单详情] 共 ${allOrderIds.length} 个订单需要获取详情`);
|
||
const allDetails = [];
|
||
const detailBaseUrl = 'https://aps.aliyun.com/?spm=5176.12818093.top-nav.ditem-fx.785716d0LKDpKT#/detail/order/~/costCenter/order/detail/';
|
||
|
||
for (let index = 0; index < allOrderIds.length; index += 1) {
|
||
await runtimeCheckpoint(`订单详情 ${index + 1}/${allOrderIds.length}`);
|
||
const orderId = allOrderIds[index];
|
||
console.log(`[订单详情] ${index + 1}/${allOrderIds.length} orderId=${orderId}`);
|
||
|
||
// 先跳 about:blank 再跳详情URL(强制 SPA 完整重新加载)
|
||
await page.goto('about:blank');
|
||
await sleep(300);
|
||
await page.goto(`${detailBaseUrl}${orderId}?projectId=`, { waitUntil: 'domcontentloaded' });
|
||
|
||
try {
|
||
await page.waitForFunction(
|
||
(text) => document.body && document.body.innerText.includes(text),
|
||
'订单详情',
|
||
{ timeout: 15000 },
|
||
);
|
||
await sleep(1000);
|
||
} catch {
|
||
console.warn(`[订单详情] ${orderId} 详情页加载超时,跳过`);
|
||
continue;
|
||
}
|
||
|
||
const detail = await extractOrderDetail(page);
|
||
if (!isValidOrderId(detail.orderId)) {
|
||
detail.orderId = orderId;
|
||
}
|
||
allDetails.push({ ...detail, __context: {} });
|
||
}
|
||
|
||
return persistDataset(dataset, dedupeByHash(allDetails), {});
|
||
}
|
||
|
||
function persistDataset(dataset, records, context) {
|
||
const normalized = normalizeDatasetRecords(dataset, records, context);
|
||
return persistNormalizedDataset(dataset, normalized);
|
||
}
|
||
|
||
function persistNormalizedDataset(dataset, normalizedRecords) {
|
||
const previousState = loadCurrentState(dataset.name, dataset.uniqueKey);
|
||
const nextState = diffRecords(previousState, normalizedRecords, dataset.uniqueKey);
|
||
const stamp = saveDatasetRun(dataset.name, nextState);
|
||
saveDelta(dataset.name, stamp, nextState.delta);
|
||
return {
|
||
stamp,
|
||
stats: nextState.stats,
|
||
};
|
||
}
|
||
|
||
async function waitUntilReady(page, heading, timeout = 120000, options = {}) {
|
||
await runtimeCheckpoint(`等待页面 ${heading}`);
|
||
const { allowInteractiveAuth = false } = options;
|
||
await page.waitForLoadState('domcontentloaded');
|
||
console.log(`[waitUntilReady] 当前URL: ${page.url()}`);
|
||
console.log(`[waitUntilReady] 等待页面出现: "${heading}"`);
|
||
|
||
const initialState = await detectAuthRedirect(page);
|
||
if (initialState.isAuthPage) {
|
||
console.error(`[waitUntilReady] 检测到登录页/鉴权页: ${initialState.currentUrl}`);
|
||
console.error(`[waitUntilReady] 页面内容前500字: ${initialState.bodyText}`);
|
||
if (!allowInteractiveAuth && isAuthUrl(initialState.currentUrl)) {
|
||
try {
|
||
await sendLoginAlert(initialState.currentUrl);
|
||
} catch (notifyErr) {
|
||
console.error('[通知] 发送登录提醒失败:', notifyErr.message);
|
||
}
|
||
}
|
||
if (!allowInteractiveAuth) {
|
||
throw new Error(`当前页面仍处于登录/鉴权页,无法进入「${heading}」。请重新执行 npm run login,并确认该账号对该页面有访问权限。`);
|
||
}
|
||
console.log(`[waitUntilReady] 允许交互式登录,等待用户完成认证后进入「${heading}」...`);
|
||
}
|
||
|
||
try {
|
||
await page.waitForFunction(
|
||
(text) => document.body && document.body.innerText.includes(text),
|
||
heading,
|
||
{ timeout },
|
||
);
|
||
} catch (err) {
|
||
// 超时时打印诊断信息
|
||
const { currentUrl, bodyText, isAuthPage } = await detectAuthRedirect(page);
|
||
console.error(`[waitUntilReady] 超时!当前URL: ${currentUrl}`);
|
||
console.error(`[waitUntilReady] 页面内容前500字: ${bodyText}`);
|
||
if (isAuthPage && !allowInteractiveAuth) {
|
||
try {
|
||
await sendLoginAlert(currentUrl);
|
||
} catch (notifyErr) {
|
||
console.error('[通知] 发送登录提醒失败:', notifyErr.message);
|
||
}
|
||
throw new Error(`当前页面停留在登录/鉴权页,未能进入「${heading}」。请重新执行 npm run login,并确认该账号对该页面有访问权限。`);
|
||
}
|
||
if (isAuthPage && allowInteractiveAuth) {
|
||
throw new Error(`交互式登录超时,仍未进入「${heading}」。请确认已在浏览器中完成 RAM/阿里云登录,并且当前账号有访问该页面的权限。`);
|
||
}
|
||
throw err;
|
||
}
|
||
|
||
const finalState = await detectAuthRedirect(page);
|
||
if (finalState.isAuthPage && !allowInteractiveAuth) {
|
||
throw new Error(`当前页面仍处于登录/鉴权页,未成功进入「${heading}」。请重新执行 npm run login,并确认该账号对该页面有访问权限。`);
|
||
}
|
||
await sleep(1500);
|
||
}
|
||
|
||
async function scrapePagedTable(page, dataset, context, options = {}) {
|
||
const { onPage, skipInitialPage = false } = options;
|
||
const pages = [];
|
||
const visited = new Set();
|
||
let shouldSkipCurrentPage = skipInitialPage;
|
||
|
||
while (true) {
|
||
await runtimeCheckpoint(`抓取 ${dataset.name} 分页`);
|
||
await waitForTableRows(page);
|
||
const pageData = await extractTable(page);
|
||
const pageNum = await currentPageNumber(page);
|
||
const pageKey = `${pageNum}-${pageData.rows.length}`;
|
||
console.log(`[抓取] 第${pageNum}页, ${pageData.rows.length}行, key="${pageKey}"`);
|
||
if (shouldSkipCurrentPage) {
|
||
console.log(`[抓取] 跳过 checkpoint 已保存页: ${pageNum}`);
|
||
shouldSkipCurrentPage = false;
|
||
const moved = await gotoNextPage(page);
|
||
if (!moved) {
|
||
console.log(`[抓取] checkpoint 已位于最后一页,停止`);
|
||
break;
|
||
}
|
||
continue;
|
||
}
|
||
if (visited.has(pageKey)) {
|
||
console.log(`[抓取] 重复页面key,停止翻页`);
|
||
break;
|
||
}
|
||
visited.add(pageKey);
|
||
const pageRows = pageData.rows.map((row) => ({ ...row, __context: context }));
|
||
pages.push(...pageRows);
|
||
if (onPage) {
|
||
await onPage({ pageData, pageNum, pageRows });
|
||
}
|
||
|
||
const moved = await gotoNextPage(page);
|
||
if (!moved) {
|
||
console.log(`[抓取] 翻页失败或已到最后一页,停止`);
|
||
break;
|
||
}
|
||
}
|
||
|
||
console.log(`[抓取] 共采集 ${pages.length} 条记录`);
|
||
return pages;
|
||
}
|
||
|
||
async function raiseIfSessionExpired(page, label) {
|
||
const { currentUrl, bodyText, isAuthPage } = await detectAuthRedirect(page);
|
||
if (!isAuthPage) {
|
||
return;
|
||
}
|
||
|
||
console.error(`[鉴权] ${label} 时检测到登录页/鉴权页: ${currentUrl}`);
|
||
console.error(`[鉴权] 页面内容前500字: ${bodyText}`);
|
||
try {
|
||
await sendLoginAlert(currentUrl);
|
||
} catch (notifyErr) {
|
||
console.error('[通知] 发送登录提醒失败:', notifyErr.message);
|
||
}
|
||
throw new Error(`运行过程中登录态失效(${label})。请重新执行 npm run login 后再继续同步。`);
|
||
}
|
||
|
||
async function extractTable(page) {
|
||
return page.evaluate(() => {
|
||
const normalize = (value) =>
|
||
String(value || '')
|
||
.replace(/\u00a0/g, ' ')
|
||
.replace(/\s+\n/g, '\n')
|
||
.replace(/\n\s+/g, '\n')
|
||
.trim();
|
||
|
||
const headerTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('thead th').length > 1);
|
||
const headerTable = headerTables.sort((a, b) => b.querySelectorAll('thead th').length - a.querySelectorAll('thead th').length)[0];
|
||
if (!headerTable) return { headers: [], rows: [] };
|
||
|
||
const headers = Array.from(headerTable.querySelectorAll('thead th')).map((cell) => normalize(cell.textContent));
|
||
const bodyTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('tbody tr').length > 0);
|
||
const bodyTable = bodyTables.sort((a, b) => {
|
||
const aSize = Math.max(...Array.from(a.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0);
|
||
const bSize = Math.max(...Array.from(b.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0);
|
||
return bSize - aSize;
|
||
})[0];
|
||
if (!bodyTable) return { headers, rows: [] };
|
||
|
||
const rows = Array.from(bodyTable.querySelectorAll('tbody tr'))
|
||
.map((row) => Array.from(row.querySelectorAll('td')).map((cell) => normalize(cell.innerText || cell.textContent)))
|
||
.filter((cells) => cells.some(Boolean))
|
||
.map((cells) => {
|
||
const record = {};
|
||
headers.forEach((header, index) => {
|
||
record[header || `column_${index + 1}`] = cells[index] || '';
|
||
});
|
||
return record;
|
||
});
|
||
|
||
return { headers, rows };
|
||
});
|
||
}
|
||
|
||
async function waitForTableRows(page) {
|
||
await runtimeCheckpoint('等待表格加载');
|
||
try {
|
||
await page.waitForFunction(() => document.querySelectorAll('table tbody tr').length > 0, null, { timeout: 120000 });
|
||
} catch (error) {
|
||
await raiseIfSessionExpired(page, '等待表格加载');
|
||
throw error;
|
||
}
|
||
await sleep(800);
|
||
}
|
||
|
||
async function currentPageNumber(page) {
|
||
const active = page.locator('.next-pagination-item.next-current');
|
||
if ((await active.count()) === 0) return 1;
|
||
return Number.parseInt((await active.first().innerText()).trim(), 10) || 1;
|
||
}
|
||
|
||
async function jumpToPage(page, targetPage) {
|
||
if (targetPage <= 1) {
|
||
return true;
|
||
}
|
||
|
||
const current = await currentPageNumber(page);
|
||
if (current === targetPage) {
|
||
return true;
|
||
}
|
||
|
||
const jumpInputCandidates = [
|
||
'.next-pagination-jump-input input',
|
||
'input[aria-label*="页码"]',
|
||
'input[aria-label*="页"]',
|
||
];
|
||
|
||
for (const selector of jumpInputCandidates) {
|
||
const input = page.locator(selector).first();
|
||
if ((await input.count()) === 0) {
|
||
continue;
|
||
}
|
||
await input.click().catch(() => null);
|
||
await sleep(100);
|
||
await page.keyboard.press('Control+A').catch(() => null);
|
||
await page.keyboard.type(String(targetPage), { delay: 20 }).catch(() => null);
|
||
await page.keyboard.press('Enter').catch(() => null);
|
||
await sleep(1500);
|
||
const afterJump = await currentPageNumber(page);
|
||
if (afterJump === targetPage) {
|
||
console.log(`[账单续爬] 已跳转到第 ${targetPage} 页`);
|
||
return true;
|
||
}
|
||
}
|
||
|
||
console.warn(`[账单续爬] 未找到可用跳页输入框,尝试顺序翻到第 ${targetPage} 页`);
|
||
let guard = 0;
|
||
while (guard < targetPage + 5) {
|
||
const currentPage = await currentPageNumber(page);
|
||
if (currentPage >= targetPage) {
|
||
return currentPage === targetPage;
|
||
}
|
||
const moved = await gotoNextPage(page);
|
||
if (!moved) {
|
||
return false;
|
||
}
|
||
guard += 1;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
async function gotoNextPage(page) {
|
||
await runtimeCheckpoint('翻页');
|
||
const before = await currentPageNumber(page);
|
||
|
||
// 用 Playwright locator 定位"下一页"按钮
|
||
const nextBtn = page.locator('button.next-pagination-item.next-next');
|
||
if ((await nextBtn.count()) === 0) {
|
||
console.log('[翻页] 未找到下一页按钮');
|
||
return false;
|
||
}
|
||
|
||
const disabled = (await nextBtn.getAttribute('disabled')) != null;
|
||
if (disabled) {
|
||
console.log('[翻页] 下一页按钮已禁用');
|
||
return false;
|
||
}
|
||
|
||
// 用 Playwright click(而非 DOM click),确保 React 事件正常触发
|
||
await nextBtn.click();
|
||
await sleep(2000);
|
||
await raiseIfSessionExpired(page, `翻页 ${before} -> next`);
|
||
|
||
const after = await currentPageNumber(page);
|
||
console.log(`[翻页] ${before} -> ${after}`);
|
||
|
||
if (before > 1 && after === 1) {
|
||
throw new Error(`分页从第 ${before} 页异常回退到第 1 页,疑似登录态失效或页面会话已重置。请重新执行 npm run login 后再继续同步。`);
|
||
}
|
||
|
||
if (after < before) {
|
||
throw new Error(`分页从第 ${before} 页异常回退到第 ${after} 页,疑似登录态失效或页面状态被重置。请重新执行 npm run login 后再继续同步。`);
|
||
}
|
||
|
||
return before !== after;
|
||
}
|
||
|
||
async function trySetPageSize(page, pageSize) {
|
||
await runtimeCheckpoint(`设置每页 ${pageSize}`);
|
||
const input = page.locator('input[aria-label="请选择每页显示几条"]').first();
|
||
if ((await input.count()) === 0) return;
|
||
await input.click().catch(() => null);
|
||
await sleep(300);
|
||
const option = page.locator(`text=${pageSize}`).last();
|
||
if ((await option.count()) === 0) {
|
||
await page.keyboard.press('Escape').catch(() => null);
|
||
return;
|
||
}
|
||
await option.click().catch(() => null);
|
||
await sleep(1200);
|
||
}
|
||
|
||
async function setDateRange(page, start, end) {
|
||
await runtimeCheckpoint(`设置订单日期 ${start} ~ ${end}`);
|
||
console.log(`[订单日期] 设置: ${start} ~ ${end}`);
|
||
|
||
await _fillDateRange(page, start, end);
|
||
|
||
// 验证
|
||
const startActual = await page.locator('input[placeholder="起始日期"]').inputValue().catch(() => '');
|
||
const endActual = await page.locator('input[placeholder="结束日期"]').inputValue().catch(() => '');
|
||
|
||
// 如果结果不对,用反向顺序重试(先填开始再填结束)
|
||
if (startActual !== start || endActual !== end) {
|
||
console.log(`[订单日期] 首次结果不对: "${startActual}" ~ "${endActual}",反向重试`);
|
||
await _fillDateRange(page, start, end, true);
|
||
const s2 = await page.locator('input[placeholder="起始日期"]').inputValue().catch(() => '');
|
||
const e2 = await page.locator('input[placeholder="结束日期"]').inputValue().catch(() => '');
|
||
console.log(`[订单日期] 重试结果: "${s2}" ~ "${e2}"`);
|
||
} else {
|
||
console.log(`[订单日期] 结果: "${startActual}" ~ "${endActual}"`);
|
||
}
|
||
}
|
||
|
||
async function _fillDateRange(page, start, end, startFirst = false) {
|
||
await runtimeCheckpoint('填写订单日期');
|
||
const trigger = page.locator('input[placeholder="结束日期"]');
|
||
await trigger.click();
|
||
await sleep(1000);
|
||
|
||
const panelStartInput = page.locator('.next-range-picker-panel-input-start-date input');
|
||
const panelEndInput = page.locator('.next-range-picker-panel-input-end-date input');
|
||
|
||
if (startFirst) {
|
||
// 先填开始日期
|
||
await panelStartInput.click();
|
||
await sleep(100);
|
||
await page.keyboard.press('Control+A');
|
||
await page.keyboard.type(start, { delay: 30 });
|
||
await sleep(300);
|
||
// 再填结束日期
|
||
await panelEndInput.click();
|
||
await sleep(100);
|
||
await page.keyboard.press('Control+A');
|
||
await page.keyboard.type(end, { delay: 30 });
|
||
await sleep(300);
|
||
} else {
|
||
// 先填结束日期(默认)
|
||
await panelEndInput.click();
|
||
await sleep(100);
|
||
await page.keyboard.press('Control+A');
|
||
await page.keyboard.type(end, { delay: 30 });
|
||
await sleep(300);
|
||
// 再填开始日期
|
||
await panelStartInput.click();
|
||
await sleep(100);
|
||
await page.keyboard.press('Control+A');
|
||
await page.keyboard.type(start, { delay: 30 });
|
||
await sleep(300);
|
||
}
|
||
|
||
await page.keyboard.press('Enter');
|
||
await sleep(500);
|
||
await page.mouse.click(0, 0);
|
||
await sleep(300);
|
||
await page.keyboard.press('Escape');
|
||
await sleep(300);
|
||
await page.locator('.next-overlay-wrapper.opened').waitFor({ state: 'hidden', timeout: 3000 }).catch(() => null);
|
||
await sleep(300);
|
||
}
|
||
|
||
async function setMonthValue(page, month) {
|
||
await runtimeCheckpoint(`设置账单月份 ${month}`);
|
||
// 先尝试按 inputValue 匹配 YYYY-MM 格式
|
||
const inputs = page.locator('input');
|
||
const total = await inputs.count();
|
||
const allValues = [];
|
||
|
||
for (let index = 0; index < total; index += 1) {
|
||
const input = inputs.nth(index);
|
||
const value = await input.inputValue().catch(() => '');
|
||
const placeholder = await input.getAttribute('placeholder').catch(() => '');
|
||
allValues.push({ index, value, placeholder });
|
||
|
||
if (/^\d{4}-\d{2}$/.test(value)) {
|
||
console.log(`[账单月份] 通过 value 匹配到 input[${index}], 设置: ${month}`);
|
||
await typeIntoDateInput(input, month, page);
|
||
return;
|
||
}
|
||
}
|
||
|
||
// 如果 value 为空,尝试按 placeholder 匹配月份选择器
|
||
for (const item of allValues) {
|
||
if (item.placeholder && /月/.test(item.placeholder)) {
|
||
console.log(`[账单月份] 通过 placeholder 匹配到 input[${item.index}], 设置: ${month}`);
|
||
await typeIntoDateInput(inputs.nth(item.index), month, page);
|
||
return;
|
||
}
|
||
}
|
||
|
||
// 兜底:找任何看起来像日期/月份选择器的 input(排除搜索框等)
|
||
for (const item of allValues) {
|
||
const input = inputs.nth(item.index);
|
||
const cls = await input.evaluate((el) => el.closest('[class*="date-picker"], [class*="month-picker"], [class*="range-picker"]')?.className || '').catch(() => '');
|
||
if (cls) {
|
||
console.log(`[账单月份] 通过父级 class 匹配到 input[${item.index}] (${cls}), 设置: ${month}`);
|
||
await typeIntoDateInput(input, month, page);
|
||
return;
|
||
}
|
||
}
|
||
|
||
console.error('[DEBUG] 账单页面所有 input:', JSON.stringify(allValues, null, 2));
|
||
throw new Error('未识别到账单佣金月份输入框,请打开页面确认结构是否变化。');
|
||
}
|
||
|
||
/**
|
||
* 用键盘输入日期值。
|
||
* 策略:focus → 全选 → 快速键入 → Tab 移开焦点(触发 blur 提交,但不会像 click 那样打开面板)。
|
||
* 即使面板弹出,快速键入 + Tab 也能在面板滚动前完成提交并关闭。
|
||
*/
|
||
async function typeIntoDateInput(locator, value, page) {
|
||
await runtimeCheckpoint(`填写日期输入 ${value}`);
|
||
// 移除 readonly
|
||
await locator.evaluate((node) => node.removeAttribute('readonly'));
|
||
|
||
// focus 并全选当前内容
|
||
await locator.focus();
|
||
await sleep(100);
|
||
await page.keyboard.press('Control+A');
|
||
await sleep(100);
|
||
|
||
// 快速逐字符输入新值
|
||
await page.keyboard.type(value, { delay: 30 });
|
||
await sleep(200);
|
||
|
||
// Tab 移开焦点 → 触发 onBlur 提交值 + 关闭面板
|
||
await page.keyboard.press('Tab');
|
||
await sleep(300);
|
||
|
||
// 如果面板还在,Escape 兜底关闭
|
||
await page.keyboard.press('Escape');
|
||
await sleep(300);
|
||
|
||
// 验证
|
||
const actual = await locator.inputValue().catch(() => '');
|
||
if (actual !== value) {
|
||
console.warn(`[WARN] typeIntoDateInput: 期望 "${value}",实际 "${actual}"`);
|
||
} else {
|
||
console.log(`[日期设置] 成功: "${value}"`);
|
||
}
|
||
}
|
||
|
||
async function clickQuery(page) {
|
||
await runtimeCheckpoint('点击查询');
|
||
const button = page.locator('button:has-text("查询")').first();
|
||
await button.click();
|
||
await sleep(1800);
|
||
}
|
||
|
||
function buildMonthlyDateWindows(startDate) {
|
||
const start = new Date(`${startDate}T00:00:00+08:00`);
|
||
const end = new Date();
|
||
const windows = [];
|
||
const cursor = new Date(start.getFullYear(), start.getMonth(), 1);
|
||
|
||
while (cursor <= end) {
|
||
const windowStart = new Date(cursor);
|
||
const windowEnd = new Date(cursor.getFullYear(), cursor.getMonth() + 1, 0);
|
||
const actualEnd = windowEnd > end ? end : windowEnd;
|
||
windows.push({
|
||
windowStart: formatDate(windowStart),
|
||
windowEnd: formatDate(actualEnd),
|
||
start: formatDate(windowStart),
|
||
end: formatDate(actualEnd),
|
||
});
|
||
cursor.setMonth(cursor.getMonth() + 1);
|
||
}
|
||
|
||
return windows;
|
||
}
|
||
|
||
function buildMonthList(startMonth) {
|
||
const [year, month] = startMonth.split('-').map(Number);
|
||
const cursor = new Date(year, month - 1, 1);
|
||
const end = new Date();
|
||
const months = [];
|
||
|
||
while (cursor <= end) {
|
||
months.push(`${cursor.getFullYear()}-${String(cursor.getMonth() + 1).padStart(2, '0')}`);
|
||
cursor.setMonth(cursor.getMonth() + 1);
|
||
}
|
||
|
||
return months;
|
||
}
|
||
|
||
function formatDate(date) {
|
||
return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')}`;
|
||
}
|
||
|
||
function dedupeByHash(records) {
|
||
const seen = new Set();
|
||
return records.filter((record) => {
|
||
const key = JSON.stringify(record);
|
||
if (seen.has(key)) return false;
|
||
seen.add(key);
|
||
return true;
|
||
});
|
||
}
|
||
|
||
function collectValidOrderIds(records) {
|
||
const ids = [];
|
||
const seen = new Set();
|
||
for (const record of records) {
|
||
// 支持两种字段名:normalized 后的 orderId 和原始的 订单号
|
||
const rawOrderId = String(record.orderId || record['订单号'] || '').trim();
|
||
if (!rawOrderId || rawOrderId.includes('没有数据')) {
|
||
continue;
|
||
}
|
||
if (!isValidOrderId(rawOrderId)) {
|
||
console.log(`[订单详情] 跳过无效订单号: ${rawOrderId}`);
|
||
continue;
|
||
}
|
||
if (seen.has(rawOrderId)) {
|
||
continue;
|
||
}
|
||
seen.add(rawOrderId);
|
||
ids.push(rawOrderId);
|
||
}
|
||
return ids;
|
||
}
|
||
|
||
function collectValidAccountIds(records) {
|
||
const ids = [];
|
||
const seen = new Set();
|
||
for (const record of records) {
|
||
const rawAccountId = String(record.accountId || '').trim();
|
||
if (!rawAccountId || rawAccountId.includes('没有数据')) {
|
||
continue;
|
||
}
|
||
if (!isValidAccountId(rawAccountId)) {
|
||
console.log(`[客户详情] 跳过无效 accountId: ${rawAccountId}`);
|
||
continue;
|
||
}
|
||
if (seen.has(rawAccountId)) {
|
||
continue;
|
||
}
|
||
seen.add(rawAccountId);
|
||
ids.push(rawAccountId);
|
||
}
|
||
return ids;
|
||
}
|
||
|
||
function isValidOrderId(orderId) {
|
||
const value = String(orderId || '').trim();
|
||
if (!value) return false;
|
||
if (value.includes('<27>')) return false;
|
||
return /^\d+$/.test(value);
|
||
}
|
||
|
||
function isValidAccountId(accountId) {
|
||
const value = String(accountId || '').trim();
|
||
if (!value) return false;
|
||
if (value.includes('<27>')) return false;
|
||
return /^\d+$/.test(value);
|
||
}
|
||
|
||
async function extractOrderDetail(page) {
|
||
return page.evaluate(() => {
|
||
const text = document.body?.innerText || '';
|
||
|
||
const extract = (label) => {
|
||
const lineBreakPattern = new RegExp(`${label}\\s*(?:\\r?\\n)+\\s*([^\\r\\n]+)`);
|
||
const lineBreakMatch = text.match(lineBreakPattern);
|
||
if (lineBreakMatch) return lineBreakMatch[1].trim();
|
||
|
||
const inlinePattern = new RegExp(`${label}\\s*[::]?\\s*([^\\r\\n]+)`);
|
||
const inlineMatch = text.match(inlinePattern);
|
||
return inlineMatch ? inlineMatch[1].trim() : '';
|
||
};
|
||
|
||
return {
|
||
orderId: extract('订单号'),
|
||
orderType: extract('订单类型'),
|
||
status: extract('状态'),
|
||
tradeType: extract('交易类型'),
|
||
customerCategory: extract('客户分类'),
|
||
dealerName: extract('二级经销商名称'),
|
||
dealerUid: extract('二级经销商UID'),
|
||
customerType: extract('客户类型'),
|
||
opportunityId: extract('商机ID'),
|
||
paymentTime: extract('支付时间'),
|
||
orderTime: extract('下单时间'),
|
||
productName: extract('产品名称'),
|
||
productCode: extract('产品code'),
|
||
originalPriceCny: extract('订单原价\\(CNY\\)'),
|
||
paidAmountCny: extract('实付金额\\(CNY\\)'),
|
||
discount: extract('订单折扣'),
|
||
payableAmountCny: extract('应付金额(实付\\+代金券)\\(CNY\\)'),
|
||
couponAmountCny: extract('代金券金额\\(CNY\\)'),
|
||
};
|
||
});
|
||
}
|
||
|
||
async function extractCustomerDetail(page) {
|
||
return page.evaluate(() => {
|
||
const normalize = (value) =>
|
||
String(value || '')
|
||
.replace(/\u00a0/g, ' ')
|
||
.trim();
|
||
|
||
const text = normalize(document.body?.innerText || '').replace(/\r/g, '');
|
||
|
||
const extract = (label, sourceText = text) => {
|
||
const lineBreakPattern = new RegExp(`${label}\\s*(?:\\n)+\\s*([^\\n]+)`);
|
||
const lineBreakMatch = sourceText.match(lineBreakPattern);
|
||
if (lineBreakMatch) return normalize(lineBreakMatch[1]);
|
||
|
||
const inlinePattern = new RegExp(`${label}\\s*[::]?\\s*([^\\n]+)`);
|
||
const inlineMatch = sourceText.match(inlinePattern);
|
||
return inlineMatch ? normalize(inlineMatch[1]) : '';
|
||
};
|
||
|
||
const normalizeAmount = (value) => normalize(value).replace(/[¥,]/g, '').trim();
|
||
|
||
const buildSection = (startLabel, endLabel = '') => {
|
||
const start = text.indexOf(startLabel);
|
||
if (start < 0) return '';
|
||
const end = endLabel ? text.indexOf(endLabel, start + startLabel.length) : -1;
|
||
if (end > start) return text.slice(start, end);
|
||
return text.slice(start);
|
||
};
|
||
|
||
const lastMonthSection = buildSection('上月应付总金额(CNY)', '本月应付总金额(CNY)');
|
||
const currentMonthSection = buildSection('本月应付总金额(CNY)');
|
||
|
||
const extractAmountFromSection = (sectionText, label) => normalizeAmount(extract(label, sectionText));
|
||
|
||
let department = '';
|
||
const table = Array.from(document.querySelectorAll('table')).find((node) =>
|
||
(node.innerText || '').includes('所属部门'),
|
||
);
|
||
if (table) {
|
||
const rows = table.querySelectorAll('tbody tr');
|
||
for (const row of rows) {
|
||
const cells = row.querySelectorAll('td');
|
||
if (cells.length >= 2) {
|
||
const value = normalize(cells[1]?.innerText || cells[1]?.textContent || '');
|
||
if (value) {
|
||
department = value;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return {
|
||
customerAccount: extract('客户账号'),
|
||
customerName: extract('客户名称'),
|
||
customerType: extract('客户类型'),
|
||
tradeMode: extract('交易模式'),
|
||
customerSource: extract('客户来源'),
|
||
realNameStatus: extract('实名状态'),
|
||
email: extract('邮箱'),
|
||
relationDate: extract('关联日期'),
|
||
phone: extract('手机号'),
|
||
remark: extract('备注'),
|
||
paymentNoticeStatus: extract('代为支付告知状态'),
|
||
department,
|
||
lastMonthPayableTotalCny: extractAmountFromSection(lastMonthSection, '上月应付总金额(CNY)'),
|
||
lastMonthPrepayCny: extractAmountFromSection(lastMonthSection, '预付费金额'),
|
||
lastMonthPostpayCny: extractAmountFromSection(lastMonthSection, '后付费金额'),
|
||
currentMonthPayableTotalCny: extractAmountFromSection(currentMonthSection, '本月应付总金额(CNY)'),
|
||
currentMonthPrepayCny: extractAmountFromSection(currentMonthSection, '预付费金额'),
|
||
currentMonthPostpayCny: extractAmountFromSection(currentMonthSection, '后付费金额'),
|
||
};
|
||
});
|
||
}
|