aps更新

This commit is contained in:
ray
2026-06-18 13:03:21 +08:00
parent 4af439a6c9
commit 9bb5ab35af
13 changed files with 2623 additions and 104 deletions

View File

@@ -7,6 +7,8 @@ import { config, datasets } from './config.js';
import { sendLoginAlert, sendRuntimeErrorAlert } from './notify.js';
import {
closeDbPool,
getExistingMessageIds,
getExistingMessageFingerprints,
getLatestBillConsumptionTimeFromDb,
getLatestMessageTimeFromDb,
getLatestOrderTimeFromDb,
@@ -30,6 +32,7 @@ import {
} from './storage.js';
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
const scheduleEventFile = path.join(config.dataDir, 'runs', 'schedule-events.jsonl');
let _context = null;
let _runtimeController = null;
@@ -37,6 +40,15 @@ let _browser = null;
let _isAttachedBrowser = false;
const runningJobs = new Set();
function recordScheduleEvent(payload) {
try {
fs.mkdirSync(path.dirname(scheduleEventFile), { recursive: true });
fs.appendFileSync(scheduleEventFile, `${JSON.stringify({ at: new Date().toISOString(), ...payload })}\n`, 'utf8');
} catch (error) {
console.warn(`[schedule-event] 写入失败: ${error.message}`);
}
}
const AUTH_PAGE_KEYWORDS = [
'RAM 用户登录',
'主账号登录',
@@ -318,6 +330,12 @@ function subtractDays(dateValue, days) {
return next;
}
function subtractMonths(dateValue, months) {
const next = new Date(dateValue);
next.setMonth(next.getMonth() - months);
return next;
}
function randomIntBetween(min, max) {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
@@ -505,7 +523,7 @@ async function captureErrorArtifacts(page, metadata = {}) {
fs.writeFileSync(jsonPath, JSON.stringify(payload, null, 2));
let screenshotSaved = false;
if (page) {
if (page && !page.isClosed?.()) {
try {
await page.screenshot({ path: screenshotPath, fullPage: true, timeout: 5000, animations: 'disabled' });
screenshotSaved = true;
@@ -679,7 +697,9 @@ export async function syncBillsOnly(options = {}) {
await reportRuntimeError(error, page, { label: 'syncBillsOnly', dataset: 'bills', mode: options.incremental ? 'incremental' : 'full' });
throw error;
} finally {
if (config.closeBrowser) {
if (options.keepBrowserOpen === true) {
console.log('浏览器保持运行schedule bills');
} else if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
@@ -755,13 +775,28 @@ export async function syncMessagesOnly(options = {}) {
}
export async function scheduleSync() {
console.log(`定时任务已启动: normal=${config.cron}, hot=${config.hotCron} (${config.timezone})`);
console.log(`定时任务已启动: bills=${config.cron}, hot=${config.hotCron} (${config.timezone})`);
setInterval(() => {
console.log(`[${new Date().toISOString()}] 定时守护存活中: bills=${config.cron}, hot=${config.hotCron}, mode=${config.scheduleMode}`);
}, 60 * 1000);
cron.schedule(
config.cron,
async () => {
if (config.scheduleMode === 'hot') {
return;
return runLockedJob('schedule-shared', async () => {
try {
recordScheduleEvent({ track: 'bills', status: 'started', mode: 'bills-incremental' });
console.log(`[${new Date().toISOString()}] 开始执行账单定时同步 mode=bills-incremental`);
const summary = await syncBillsOnly({ incremental: true, keepBrowserOpen: true });
recordScheduleEvent({ track: 'bills', status: 'completed', mode: 'bills-incremental', summary });
console.log(`[${new Date().toISOString()}] 账单定时同步完成`, JSON.stringify(summary, null, 2));
} catch (error) {
recordScheduleEvent({ track: 'bills', status: 'failed', mode: 'bills-incremental', error: error.message });
console.error(`[${new Date().toISOString()}] 账单定时同步失败`, error);
}
});
}
try {
console.log(`[${new Date().toISOString()}] 开始执行同步 mode=${config.scheduleMode}`);
const summary = config.scheduleMode === 'full'
@@ -781,13 +816,22 @@ export async function scheduleSync() {
if (config.scheduleMode !== 'hot') {
return;
}
try {
console.log(`[${new Date().toISOString()}] 开始执行高频同步 mode=hot`);
const summary = await syncHot();
console.log(`[${new Date().toISOString()}] 高频同步完成`, JSON.stringify(summary, null, 2));
} catch (error) {
console.error(`[${new Date().toISOString()}] 高频同步失败`, error);
}
return runLockedJob('schedule-shared', async () => {
try {
recordScheduleEvent({ track: 'hot', status: 'started', mode: 'hot' });
console.log(`[${new Date().toISOString()}] 开始执行高频同步 mode=hot`);
const summary = await syncHot({ keepBrowserOpen: true });
if (summary?.skipped) {
recordScheduleEvent({ track: 'hot', status: 'skipped', mode: 'hot', reason: summary.reason || 'already_running' });
} else {
recordScheduleEvent({ track: 'hot', status: 'completed', mode: 'hot', summary });
}
console.log(`[${new Date().toISOString()}] 高频同步完成`, JSON.stringify(summary, null, 2));
} catch (error) {
recordScheduleEvent({ track: 'hot', status: 'failed', mode: 'hot', error: error.message });
console.error(`[${new Date().toISOString()}] 高频同步失败`, error);
}
});
},
{ timezone: config.timezone },
);
@@ -826,7 +870,9 @@ export async function syncHot(options = {}) {
await reportRuntimeError(error, page, { label: 'syncHot', dataset: 'hot', mode: 'hot' });
throw error;
} finally {
if (config.closeBrowser) {
if (options.keepBrowserOpen === true) {
console.log('浏览器保持运行schedule hot');
} else if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
@@ -1274,6 +1320,7 @@ async function syncMessages(page, options = {}) {
await runtimeCheckpoint('同步消息');
const dataset = datasets.messages;
const { incremental = false, resume = false, hot = false } = options;
const fullSyncWatermark = !incremental && !hot ? subtractMonths(new Date(), 3) : null;
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, dataset.heading);
await trySetPageSize(page, dataset.pageSize);
@@ -1283,17 +1330,36 @@ async function syncMessages(page, options = {}) {
let shouldContinueScrape = true;
let allNormalizedRecords = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : [];
const shouldStopForFullSyncPage = (pageRows) => {
if (!fullSyncWatermark) {
return false;
}
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, {});
const pageTimeStats = getMessagePageTimeStats(normalizedPageRows);
console.log(`[全量模式] 当前页时间范围: parsed=${pageTimeStats.parsed}/${pageTimeStats.total}, earliest=${pageTimeStats.earliest || 'N/A'}, latest=${pageTimeStats.latest || 'N/A'}, watermark=${formatDateTime(fullSyncWatermark)}`);
return normalizedPageRows.length > 0
&& normalizedPageRows.every((record) => !isAfterLatestMessageTime(record, fullSyncWatermark));
};
if (resumeFromPage > 0) {
console.log(`[消息续爬] 从 checkpoint 恢复: page=${resumeFromPage}, records=${allNormalizedRecords.length}`);
const moved = await moveMessagesToResumeStart(page, resumeFromPage);
if (!moved) {
console.log('[消息续爬] checkpoint 已在最后一页,无需继续抓取');
shouldContinueScrape = false;
} else if (fullSyncWatermark) {
await waitForTableRows(page);
const resumedPageData = await extractTable(page);
if (shouldStopForFullSyncPage(resumedPageData.rows)) {
console.log(`[全量模式] 当前续爬页已超出近三个月范围,停止继续抓取: page=${resumeFromPage + 1}, watermark=${formatDateTime(fullSyncWatermark)}`);
shouldContinueScrape = false;
}
}
}
let records = [];
let hotWatermark = null;
let stopByExistingPage = false;
if (hot && hasDbConfig()) {
const latestMessageTime = await getLatestMessageTimeFromDb();
const latest = parseDbDateTime(latestMessageTime);
@@ -1304,11 +1370,39 @@ async function syncMessages(page, options = {}) {
if (shouldContinueScrape) {
records = await scrapePagedTable(page, dataset, {}, {
onPage: async ({ pageNum, pageRows }) => {
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, {});
allNormalizedRecords.push(...normalizedPageRows);
if (hasDbConfig()) {
await upsertMessages(normalizedPageRows);
const detailedPageRows = await enrichMessageRowsWithDetails(page, pageRows, pageNum);
const normalizedPageRows = normalizeDatasetRecords(dataset, detailedPageRows, {});
const filteredPageRows = fullSyncWatermark
? normalizedPageRows.filter((record) => isAfterLatestMessageTime(record, fullSyncWatermark))
: normalizedPageRows;
let pageRowsToPersist = filteredPageRows;
if (hasDbConfig() && filteredPageRows.length > 0) {
const pageMsgIds = filteredPageRows.map((record) => record.msgId).filter(Boolean);
const existingIds = await getExistingMessageIds(pageMsgIds);
const fingerprintCandidates = filteredPageRows
.map((record) => String(record.receivedAt || record.gmtModified || record.gmtCreated || '').trim())
.filter(Boolean);
const existingFingerprintRows = await getExistingMessageFingerprints(fingerprintCandidates);
const existingFingerprints = new Set(
existingFingerprintRows.map((row) => buildMessageFingerprint({ title: row.title, receivedAt: row.received_at, orderNo: row.order_no })),
);
stopByExistingPage = filteredPageRows.length > 0 && filteredPageRows.every((record) => {
if (record.msgId) {
return existingIds.has(record.msgId);
}
return existingFingerprints.has(buildMessageFingerprint(record));
});
pageRowsToPersist = filteredPageRows.filter((record) => {
if (record.msgId) {
return !existingIds.has(record.msgId);
}
return !existingFingerprints.has(buildMessageFingerprint(record));
});
if (pageRowsToPersist.length > 0) {
await upsertMessages(pageRowsToPersist);
}
}
allNormalizedRecords.push(...pageRowsToPersist);
await saveMessagesCheckpoint(dataset, pageNum, allNormalizedRecords);
},
skipInitialPage: resumeFromPage > 0,
@@ -1317,13 +1411,22 @@ async function syncMessages(page, options = {}) {
if (pageNum >= config.hotMessageMaxPagesPerRun) {
return true;
}
if (stopByExistingPage) {
return true;
}
if (!hotWatermark) {
return false;
}
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, {});
const detailedPageRows = await enrichMessageRowsWithDetails(page, pageRows, pageNum);
const normalizedPageRows = normalizeDatasetRecords(dataset, detailedPageRows, {});
return normalizedPageRows.length > 0
&& normalizedPageRows.every((record) => !isAfterLatestMessageTime(record, hotWatermark));
}
: fullSyncWatermark
? async ({ pageNum, pageRows }) => {
const detailedPageRows = await enrichMessageRowsWithDetails(page, pageRows, pageNum);
return stopByExistingPage || shouldStopForFullSyncPage(detailedPageRows);
}
: undefined,
});
}
@@ -1331,6 +1434,11 @@ async function syncMessages(page, options = {}) {
if (resumeFromPage === 0) {
allNormalizedRecords = normalizeDatasetRecords(dataset, records, {});
}
if (fullSyncWatermark) {
const before = allNormalizedRecords.length;
allNormalizedRecords = allNormalizedRecords.filter((record) => isAfterLatestMessageTime(record, fullSyncWatermark));
console.log(`[全量模式] 消息仅保留近三个月: ${before} -> ${allNormalizedRecords.length} (watermark=${formatDateTime(fullSyncWatermark)})`);
}
if ((incremental || hot) && hasDbConfig()) {
try {
const latestMessageTime = await getLatestMessageTimeFromDb();
@@ -1350,7 +1458,8 @@ async function syncMessages(page, options = {}) {
}
}
return persistDataset(dataset, dedupeByHash(allNormalizedRecords), {});
const previousState = loadCurrentState(dataset.name, dataset.uniqueKey);
return persistNormalizedDataset(dataset, dedupeByHash([...(previousState.records || []), ...allNormalizedRecords]));
}
async function saveMessagesCheckpoint(dataset, pageNum, normalizedRecords) {
@@ -1629,15 +1738,53 @@ function isAfterLatestConsumptionDate(record, latestConsumptionDate) {
function isAfterLatestMessageTime(record, watermarkDate) {
const value = String(record['消息修改时间'] || record['修改时间'] || record.gmtModified || record['消息创建时间'] || record['创建时间'] || record.gmtCreated || '').trim();
if (!value) {
return true;
return false;
}
const parsed = parseDbDateTime(value);
if (!parsed) {
return true;
return false;
}
return parsed >= watermarkDate;
}
function extractMessageTime(record) {
const value = String(record['消息修改时间'] || record['修改时间'] || record.gmtModified || record['消息创建时间'] || record['创建时间'] || record.gmtCreated || '').trim();
if (!value) {
return null;
}
return parseDbDateTime(value);
}
function getMessagePageTimeStats(records) {
const parsedTimes = records
.map((record) => extractMessageTime(record))
.filter(Boolean)
.sort((a, b) => a.getTime() - b.getTime());
if (parsedTimes.length === 0) {
return {
total: records.length,
parsed: 0,
earliest: '',
latest: '',
};
}
return {
total: records.length,
parsed: parsedTimes.length,
earliest: formatDateTime(parsedTimes[0]),
latest: formatDateTime(parsedTimes[parsedTimes.length - 1]),
};
}
function buildMessageFingerprint(record) {
const title = String(record.title || record.detailTitle || record.column_1 || '').trim();
const receivedAt = String(record.receivedAt || record.gmtModified || record.gmtCreated || record.column_2 || '').trim();
const orderNo = String(record.orderNo || record.refundOrderNo || '').trim();
return `${title}__${receivedAt}__${orderNo}`;
}
async function syncOrderDetails(page, cachedOrderIds, options = {}) {
await runtimeCheckpoint('同步订单详情');
const dataset = datasets.orderDetails;
@@ -1806,6 +1953,20 @@ async function scrapePagedTable(page, dataset, context, options = {}) {
const visited = new Set();
let shouldSkipCurrentPage = skipInitialPage;
const describeStopReason = (reason) => {
if (!reason) {
return 'unknown';
}
const details = [];
if (reason.beforePage != null) {
details.push(`before=${reason.beforePage}`);
}
if (reason.afterPage != null) {
details.push(`after=${reason.afterPage}`);
}
return details.length > 0 ? `${reason.code} (${details.join(', ')})` : reason.code;
};
while (true) {
await runtimeCheckpoint(`抓取 ${dataset.name} 分页`);
await waitForTableRows(page);
@@ -1816,9 +1977,9 @@ async function scrapePagedTable(page, dataset, context, options = {}) {
if (shouldSkipCurrentPage) {
console.log(`[抓取] 跳过 checkpoint 已保存页: ${pageNum}`);
shouldSkipCurrentPage = false;
const moved = await gotoNextPage(page);
const { moved, reason } = await gotoNextPage(page);
if (!moved) {
console.log(`[抓取] checkpoint 已位于最后一页,停止`);
console.log(`[抓取] checkpoint 已停止续爬: ${describeStopReason(reason)}`);
break;
}
continue;
@@ -1839,9 +2000,9 @@ async function scrapePagedTable(page, dataset, context, options = {}) {
break;
}
const moved = await gotoNextPage(page);
const { moved, reason } = await gotoNextPage(page);
if (!moved) {
console.log(`[抓取] 翻页失败或已到最后一页,停止`);
console.log(`[抓取] 停止翻页: ${describeStopReason(reason)}`);
break;
}
}
@@ -1875,39 +2036,101 @@ async function extractTable(page) {
.replace(/\n\s+/g, '\n')
.trim();
const headerTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('thead th').length > 1);
const headerTable = headerTables.sort((a, b) => b.querySelectorAll('thead th').length - a.querySelectorAll('thead th').length)[0];
if (!headerTable) return { headers: [], rows: [] };
const headers = Array.from(headerTable.querySelectorAll('thead th')).map((cell) => normalize(cell.textContent));
const bodyTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('tbody tr').length > 0);
const bodyTable = bodyTables.sort((a, b) => {
const aSize = Math.max(...Array.from(a.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0);
const bSize = Math.max(...Array.from(b.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0);
return bSize - aSize;
})[0];
if (!bodyTable) return { headers, rows: [] };
const rows = Array.from(bodyTable.querySelectorAll('tbody tr'))
.map((row) => Array.from(row.querySelectorAll('td')).map((cell) => normalize(cell.innerText || cell.textContent)))
const toRecords = (headers, rows) => rows
.map((cells) => cells.map((cell) => normalize(cell)))
.filter((cells) => cells.some(Boolean))
.map((cells) => {
const record = {};
headers.forEach((header, index) => {
const keys = headers.length ? headers : cells.map((_, index) => `column_${index + 1}`);
keys.forEach((header, index) => {
record[header || `column_${index + 1}`] = cells[index] || '';
});
return record;
});
return { headers, rows };
const extractFromNativeTables = () => {
const headerTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('thead th').length > 1);
const headerTable = headerTables.sort((a, b) => b.querySelectorAll('thead th').length - a.querySelectorAll('thead th').length)[0];
const headers = headerTable
? Array.from(headerTable.querySelectorAll('thead th')).map((cell) => normalize(cell.textContent))
: [];
const bodyTables = Array.from(document.querySelectorAll('table')).filter((table) => table.querySelectorAll('tbody tr').length > 0);
const bodyTable = bodyTables.sort((a, b) => {
const aSize = Math.max(...Array.from(a.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0);
const bSize = Math.max(...Array.from(b.querySelectorAll('tbody tr')).map((row) => row.querySelectorAll('td').length), 0);
return bSize - aSize;
})[0];
if (!bodyTable) {
return { headers, rows: [] };
}
const rows = Array.from(bodyTable.querySelectorAll('tbody tr'))
.map((row) => Array.from(row.querySelectorAll('td')).map((cell) => normalize(cell.innerText || cell.textContent)));
return { headers, rows: toRecords(headers, rows) };
};
const extractFromNextTable = () => {
const container = document.querySelector('.next-table, .next-table-inner, [class*="next-table"]');
if (!container) {
return { headers: [], rows: [] };
}
const headers = Array.from(container.querySelectorAll('.next-table-header .next-table-cell, .next-table-header th, [role="columnheader"]'))
.map((cell) => normalize(cell.innerText || cell.textContent))
.filter(Boolean);
const rowCandidates = Array.from(container.querySelectorAll('.next-table-body .next-table-row, .next-table-row, [role="row"]'));
const rows = rowCandidates
.map((row) => {
const cells = Array.from(row.querySelectorAll('.next-table-cell, [role="gridcell"], [role="cell"], td'))
.map((cell) => normalize(cell.innerText || cell.textContent));
return cells;
})
.filter((cells) => cells.length > 0 && cells.some(Boolean));
return { headers, rows: toRecords(headers, rows) };
};
const nativeResult = extractFromNativeTables();
if (nativeResult.rows.length > 0) {
return nativeResult;
}
const nextTableResult = extractFromNextTable();
if (nextTableResult.rows.length > 0) {
return nextTableResult;
}
return nextTableResult.headers.length > 0 ? nextTableResult : nativeResult;
});
}
function isTargetClosedError(error) {
const message = String(error?.message || error || '');
return message.includes('Target page, context or browser has been closed');
}
function assertPageAvailable(page, label) {
if (!page || page.isClosed?.()) {
throw new Error(`页面在${label}前已被关闭。请检查是否手动关闭了浏览器,或浏览器是否异常退出,然后重新执行同步。`);
}
}
async function waitForTableRows(page) {
await runtimeCheckpoint('等待表格加载');
assertPageAvailable(page, '等待表格加载');
try {
await page.waitForFunction(() => document.querySelectorAll('table tbody tr').length > 0, null, { timeout: 120000 });
await page.waitForFunction(() => {
const nativeRows = document.querySelectorAll('table tbody tr').length;
const nextRows = document.querySelectorAll('.next-table-body .next-table-row, .next-table-row, [role="row"]').length;
const emptyState = document.querySelector('.next-table-empty, .next-empty, [class*="empty"], [class*="no-data"]');
return nativeRows > 0 || nextRows > 0 || Boolean(emptyState);
}, null, { timeout: 120000 });
} catch (error) {
if (isTargetClosedError(error)) {
throw new Error('等待消息表格加载时,浏览器页面已被关闭。请勿手动关闭浏览器窗口,并检查浏览器是否异常退出后重试。');
}
await raiseIfSessionExpired(page, '等待表格加载');
throw error;
}
@@ -1915,12 +2138,21 @@ async function waitForTableRows(page) {
}
async function currentPageNumber(page) {
const active = page.locator('.next-pagination-item.next-current');
if ((await active.count()) === 0) return 1;
return Number.parseInt((await active.first().innerText()).trim(), 10) || 1;
assertPageAvailable(page, '读取当前页码');
try {
const active = page.locator('.next-pagination-item.next-current');
if ((await active.count()) === 0) return 1;
return Number.parseInt((await active.first().innerText()).trim(), 10) || 1;
} catch (error) {
if (isTargetClosedError(error)) {
throw new Error('读取分页页码时,浏览器页面已被关闭。请勿手动关闭浏览器窗口,并检查浏览器是否异常退出后重试。');
}
throw error;
}
}
async function jumpToPage(page, targetPage) {
async function jumpToPage(page, targetPage, options = {}) {
const { allowSequentialFallback = true } = options;
if (targetPage <= 1) {
return true;
}
@@ -1949,19 +2181,24 @@ async function jumpToPage(page, targetPage) {
await sleep(1500);
const afterJump = await currentPageNumber(page);
if (afterJump === targetPage) {
console.log(`[账单续爬] 已跳转到第 ${targetPage}`);
console.log(`[跳页] 已跳转到第 ${targetPage}`);
return true;
}
}
console.warn(`[账单续爬] 未找到可用跳页输入框,尝试顺序翻到第 ${targetPage}`);
if (!allowSequentialFallback) {
console.warn(`[跳页] 未找到可用跳页输入框,且当前模式禁止顺序兜底: target=${targetPage}`);
return false;
}
console.warn(`[跳页] 未找到可用跳页输入框,尝试顺序翻到第 ${targetPage}`);
let guard = 0;
while (guard < targetPage + 5) {
const currentPage = await currentPageNumber(page);
if (currentPage >= targetPage) {
return currentPage === targetPage;
}
const moved = await gotoNextPage(page);
const { moved } = await gotoNextPage(page);
if (!moved) {
return false;
}
@@ -1972,38 +2209,71 @@ async function jumpToPage(page, targetPage) {
async function gotoNextPage(page) {
await runtimeCheckpoint('翻页');
assertPageAvailable(page, '翻页');
const before = await currentPageNumber(page);
// 用 Playwright locator 定位"下一页"按钮
const nextBtn = page.locator('button.next-pagination-item.next-next');
if ((await nextBtn.count()) === 0) {
console.log('[翻页] 未找到下一页按钮');
return false;
try {
// 用 Playwright locator 定位"下一页"按钮
const nextBtn = page.locator('button.next-pagination-item.next-next');
if ((await nextBtn.count()) === 0) {
return {
moved: false,
reason: { code: 'next_button_missing', beforePage: before },
};
}
const disabled = (await nextBtn.getAttribute('disabled')) != null;
if (disabled) {
return {
moved: false,
reason: { code: 'next_button_disabled', beforePage: before },
};
}
// 用 Playwright click而非 DOM click确保 React 事件正常触发
await nextBtn.click();
await sleep(2000);
await raiseIfSessionExpired(page, `翻页 ${before} -> next`);
const after = await currentPageNumber(page);
console.log(`[翻页] ${before} -> ${after}`);
if (before > 1 && after === 1) {
throw new Error(`分页从第 ${before} 页异常回退到第 1 页,疑似登录态失效或页面会话已重置。请重新执行 npm run login 后再继续同步。`);
}
if (after < before) {
throw new Error(`分页从第 ${before} 页异常回退到第 ${after} 页,疑似登录态失效或页面状态被重置。请重新执行 npm run login 后再继续同步。`);
}
if (before === after) {
const fallbackTarget = before + 1;
console.warn(`[翻页] next 点击后页码未推进,尝试跳页到 ${fallbackTarget}`);
const jumped = await jumpToPage(page, fallbackTarget, { allowSequentialFallback: false });
if (jumped) {
const afterJump = await currentPageNumber(page);
console.log(`[翻页] fallback jump ${before} -> ${afterJump}`);
return {
moved: true,
reason: { code: 'advanced_via_jump', beforePage: before, afterPage: afterJump },
};
}
return {
moved: false,
reason: { code: 'page_number_not_advanced', beforePage: before, afterPage: after },
};
}
return {
moved: true,
reason: { code: 'advanced', beforePage: before, afterPage: after },
};
} catch (error) {
if (isTargetClosedError(error)) {
throw new Error(`翻页到下一页时,浏览器页面在第 ${before} 页之后被关闭。请勿手动关闭浏览器窗口,并检查浏览器是否异常退出后重试。`);
}
throw error;
}
const disabled = (await nextBtn.getAttribute('disabled')) != null;
if (disabled) {
console.log('[翻页] 下一页按钮已禁用');
return false;
}
// 用 Playwright click而非 DOM click确保 React 事件正常触发
await nextBtn.click();
await sleep(2000);
await raiseIfSessionExpired(page, `翻页 ${before} -> next`);
const after = await currentPageNumber(page);
console.log(`[翻页] ${before} -> ${after}`);
if (before > 1 && after === 1) {
throw new Error(`分页从第 ${before} 页异常回退到第 1 页,疑似登录态失效或页面会话已重置。请重新执行 npm run login 后再继续同步。`);
}
if (after < before) {
throw new Error(`分页从第 ${before} 页异常回退到第 ${after} 页,疑似登录态失效或页面状态被重置。请重新执行 npm run login 后再继续同步。`);
}
return before !== after;
}
async function trySetPageSize(page, pageSize) {
@@ -2455,6 +2725,217 @@ async function waitForStableOrderList(page) {
await waitForTableRows(page).catch(() => null);
}
async function clickMessageDetailButton(page, rowText, rowIndex) {
const clicked = await page.evaluate(({ rowTextValue, rowIndexValue }) => {
const normalize = (value) => String(value || '').replace(/\s+/g, ' ').trim();
const target = normalize(rowTextValue);
const rows = Array.from(document.querySelectorAll('.next-table-row, table tbody tr, [role="row"]'))
.filter((row) => normalize(row.innerText || row.textContent || ''));
const row = rows[rowIndexValue];
if (!row) {
return { clicked: false, reason: 'row_not_found', rowCount: rows.length };
}
const rowTextActual = normalize(row.innerText || row.textContent || '');
const clickableNodes = Array.from(row.querySelectorAll('button, a, [role="button"], .next-btn-text'));
const preferred = clickableNodes.find((node) => {
const text = normalize(node.innerText || node.textContent || '');
return text && rowTextActual.includes(text);
}) || clickableNodes[0];
if (!preferred) {
return { clicked: false, reason: 'clickable_node_not_found', rowTextActual };
}
preferred.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
preferred.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }));
return {
clicked: true,
buttonText: normalize(preferred.innerText || preferred.textContent || ''),
rowTextActual,
matched: rowTextActual.includes(target),
};
}, { rowTextValue: rowText, rowIndexValue: rowIndex }).catch(() => ({ clicked: false, reason: 'evaluate_failed' }));
if (clicked.clicked) {
await sleep(1200);
}
return clicked;
}
async function waitForMessageDetailDrawer(page) {
await page.waitForFunction(() => {
const header = document.querySelector('.next-drawer-header');
const body = document.querySelector('.next-drawer-body');
return !!header && !!body && String(header.textContent || '').includes('消息详情');
}, null, { timeout: 15000 });
await sleep(600);
}
async function extractMessageDetail(page) {
return page.evaluate(() => {
const normalize = (value) => String(value || '').replace(/\u00a0/g, ' ').trim();
const header = normalize(document.querySelector('.next-drawer-header')?.innerText || '');
const body = normalize(document.querySelector('.next-drawer-body')?.innerText || '');
const lines = body.split(/\r?\n/).map((line) => normalize(line)).filter(Boolean);
const firstLine = lines[0] || '';
const extract = (label) => {
const line = lines.find((item) => item.startsWith(`${label}:`) || item.startsWith(`${label}`));
if (!line) return '';
return normalize(line.replace(`${label}:`, '').replace(`${label}`, ''));
};
const match = (pattern) => {
const matched = body.match(pattern);
return matched?.[1] ? normalize(matched[1]) : '';
};
const accountIdMatches = Array.from(body.matchAll(/账号ID[:]?(\d{6,})/g)).map((item) => normalize(item[1])).filter(Boolean);
const classification = (() => {
if (/退款/.test(header) || /退款/.test(body)) return 'refund';
if (/释放预警/.test(header) || /预计于【.*】释放/.test(body)) return 'release_warning';
if (/释放通知/.test(header) || /已释放/.test(body)) return 'release_notice';
if (/未支付提醒/.test(header) || /未支付/.test(body)) return 'unpaid_reminder';
if (/取消通知/.test(header) || /取消了一笔未支付订单/.test(body)) return 'order_cancel';
if (/余额-预警通知/.test(header) || /账户现金余额/.test(body)) return 'balance_warning';
if (/关联成功/.test(header) || /关联关系已完成建立/.test(body)) return 'association_success';
if (/注册成功/.test(header) || /受邀注册UID/.test(body)) return 'registration_success';
if (/变更已超期/.test(header) || /变更申请已超期/.test(body)) return 'change_overdue';
return 'general';
})();
const detailContent = lines.filter((line) => !/^(接收时间|客户账号|订单号|退款订单号|订单金额|退款金额|客户下单时间|退款时间|受邀注册UID)[:]/.test(line));
return {
detailTitle: firstLine || header,
detailContent: body,
receivedAt: extract('接收时间'),
customerName: extract('客户账号'),
customerNo: extract('客户账号') || match(/贵司的代付(?:关联)?客户【[^/]+\/(\d{6,})】/) || match(/受邀注册UID[:]?(\d{6,})/) || accountIdMatches[0] || '',
orderNo: extract('订单号') || extract('退款订单号'),
orderAmount: extract('订单金额'),
customerOrderTime: extract('客户下单时间'),
refundOrderNo: extract('退款订单号'),
refundAmount: extract('退款金额'),
refundTime: extract('退款时间'),
invitedRegisterUid: extract('受邀注册UID') || match(/受邀注册UID[:]?(\d{6,})/),
accountIds: accountIdMatches.join(','),
messageClassification: classification,
status: '未读',
title: firstLine || header,
content: detailContent.join('\n'),
};
});
}
async function closeMessageDetailDrawer(page) {
const closeButton = page.locator('.next-drawer-close, .next-dialog-close, .next-icon-close').first();
if ((await closeButton.count()) > 0) {
await closeButton.click().catch(() => null);
} else {
await page.keyboard.press('Escape').catch(() => null);
}
await page.waitForFunction(() => !document.querySelector('.next-drawer-header'), null, { timeout: 10000 }).catch(() => null);
await sleep(400);
}
async function fetchMessageApiRows(page, pageNum, pageSize) {
return page.evaluate(async ({ currentPage, currentPageSize }) => {
const response = await fetch(`/api/taskapi/msgbox/queryUserMsg.json?lv2CategoryId=0&pageNo=${currentPage}&pageSize=${currentPageSize}`, {
credentials: 'include',
});
const payload = await response.json();
return Array.isArray(payload?.data?.list) ? payload.data.list : [];
}, { currentPage: pageNum, currentPageSize: pageSize }).catch(() => []);
}
function stripHtmlTags(value) {
return String(value || '')
.replace(/<br\s*\/?>(\r?\n)?/gi, '\n')
.replace(/<\/div>/gi, '\n')
.replace(/<\/p>/gi, '\n')
.replace(/<[^>]+>/g, '')
.replace(/&nbsp;/gi, ' ')
.replace(/\r/g, '')
.split('\n')
.map((line) => line.trim())
.filter(Boolean)
.join('\n');
}
function mapApiMessageRecord(record) {
if (!record || typeof record !== 'object') {
return null;
}
return {
msgId: String(record.id || '').trim(),
title: String(record.title || '').trim(),
detailTitle: String(record.title || '').trim(),
detailContent: stripHtmlTags(record.htmlContent || record.content || ''),
content: stripHtmlTags(record.htmlContent || record.content || ''),
fromApp: String(record.fromApp || '').trim(),
bizCode: String(record.bizCode || '').trim(),
msgChannel: String(record.msgChannel || '').trim(),
categoryName: String(record.categoryName || '').trim(),
categoryId: String(record.lv3CategoryId || '').trim(),
lv1CategoryId: String(record.lv1CategoryId || '').trim(),
lv2CategoryId: String(record.lv2CategoryId || '').trim(),
lv3CategoryId: String(record.lv3CategoryId || '').trim(),
gmtCreated: record.createDate ? formatDateTime(new Date(record.createDate)) : '',
gmtModified: record.updateDate ? formatDateTime(new Date(record.updateDate)) : '',
status: Number(record.isRead) === 1 ? '已读' : '未读',
};
}
async function enrichMessageRowsWithDetails(page, pageRows, pageNum) {
const enrichedRows = [];
let detailSuccess = 0;
let detailFailed = 0;
const apiRows = await fetchMessageApiRows(page, pageNum, datasets.messages.pageSize);
for (let index = 0; index < pageRows.length; index += 1) {
const row = pageRows[index];
const rowText = String(row['消息标题'] || row['标题'] || row.title || row.column_1 || '').trim();
if (!rowText) {
enrichedRows.push(row);
continue;
}
const apiDetail = mapApiMessageRecord(apiRows[index]);
if (apiDetail?.msgId) {
detailSuccess += 1;
enrichedRows.push({ ...row, ...apiDetail });
continue;
}
const clicked = await clickMessageDetailButton(page, rowText, index);
if (!clicked.clicked) {
detailFailed += 1;
console.warn(`[消息详情] 打开失败: pageRow=${index + 1}, title="${rowText}", reason=${clicked.reason || 'unknown'}`);
enrichedRows.push(row);
continue;
}
try {
await waitForMessageDetailDrawer(page);
const detail = await extractMessageDetail(page);
detailSuccess += 1;
enrichedRows.push({ ...row, ...detail });
} catch (error) {
detailFailed += 1;
console.warn(`[消息详情] 提取失败: pageRow=${index + 1}, title="${rowText}", error=${error.message}`);
enrichedRows.push(row);
} finally {
await closeMessageDetailDrawer(page);
await waitForTableRows(page).catch(() => null);
}
}
console.log(`[消息详情] 本页详情提取: success=${detailSuccess}, failed=${detailFailed}, total=${pageRows.length}`);
return enrichedRows;
}
async function restoreOrderWindow(page, windowStart, windowEnd) {
await waitUntilReady(page, datasets.orders.heading).catch(() => null);
await setDateRange(page, windowStart, windowEnd);
@@ -2587,6 +3068,9 @@ async function extractCustomerDetail(page) {
}
}
}
if (!department) {
department = extract('所属部门');
}
return {
customerAccount: extract('客户账号'),
@@ -2595,10 +3079,10 @@ async function extractCustomerDetail(page) {
tradeMode: extract('交易模式'),
customerSource: extract('客户来源'),
realNameStatus: extract('实名状态'),
email: extract('邮箱'),
email: extract('邮箱') || extract('Email') || extract('电子邮箱'),
relationDate: extract('关联日期'),
phone: extract('手机号'),
remark: extract('备注'),
phone: extract('手机号') || extract('手机') || extract('联系电话') || extract('联系手机'),
remark: extract('备注') || extract('客户备注'),
paymentNoticeStatus: extract('代为支付告知状态'),
department,
lastMonthPayableTotalCny: extractAmountFromSection(lastMonthSection, '上月应付总金额CNY'),