diff --git a/aliyun-sync/aliyun-aps-sync/src/config.js b/aliyun-sync/aliyun-aps-sync/src/config.js index 7c11828..674cfe3 100644 --- a/aliyun-sync/aliyun-aps-sync/src/config.js +++ b/aliyun-sync/aliyun-aps-sync/src/config.js @@ -107,6 +107,7 @@ export const datasets = { uniqueKey: (record) => record.orderId || record.__hash, normalize: (record, context) => ({ orderId: record['订单号'] || '', + listPageNum: context.pageNum || '', customerAccount: (record['客户账号'] || '').replace(/\s+/g, ''), customerCategory: record['客户分类'] || '', orderType: record['订单类型'] || '', diff --git a/aliyun-sync/aliyun-aps-sync/src/sync.js b/aliyun-sync/aliyun-aps-sync/src/sync.js index e0e889c..c52c78a 100644 --- a/aliyun-sync/aliyun-aps-sync/src/sync.js +++ b/aliyun-sync/aliyun-aps-sync/src/sync.js @@ -1337,31 +1337,45 @@ async function syncOrderDetails(page, cachedOrderIds, options = {}) { const dataset = datasets.orderDetails; const resumeCheckpoint = options.resume ? loadLatestOrderDetailsCheckpoint() : null; - // 使用传入的 orderId 列表(在 syncOrders 覆盖 orders.json 之前缓存的) - const allOrderIds = cachedOrderIds || []; + const ordersState = loadCurrentState('orders', datasets.orders.uniqueKey); + const orderTargets = collectOrderDetailTargets(ordersState.records || [], cachedOrderIds || []); - if (allOrderIds.length === 0) { - console.log('[订单详情] 本地无订单数据,跳过'); + if (orderTargets.length === 0) { + console.log('[订单详情] 本地无订单定位数据,跳过'); return persistDataset(dataset, [], {}); } - console.log(`[订单详情] 共 ${allOrderIds.length} 个订单需要获取详情`); + console.log(`[订单详情] 共 ${orderTargets.length} 个订单需要获取详情`); const allDetails = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : []; const startIndex = Number.parseInt(String(resumeCheckpoint?.currentIndex || 0), 10) || 0; if (startIndex > 0) { console.log(`[订单详情续爬] 从 checkpoint 恢复: index=${startIndex}, records=${allDetails.length}`); } - const detailBaseUrl = 'https://aps.aliyun.com/?spm=5176.12818093.top-nav.ditem-fx.785716d0LKDpKT#/detail/order/~/costCenter/order/detail/'; + let currentListPage = 0; - for (let index = startIndex; index < allOrderIds.length; index += 1) { - await runtimeCheckpoint(`订单详情 ${index + 1}/${allOrderIds.length}`); - const orderId = allOrderIds[index]; - console.log(`[订单详情] ${index + 1}/${allOrderIds.length} orderId=${orderId}`); + await page.goto(datasets.orders.url, { waitUntil: 'domcontentloaded' }); + await waitUntilReady(page, datasets.orders.heading); + await trySetPageSize(page, datasets.orders.pageSize); - // 先跳 about:blank 再跳详情URL(强制 SPA 完整重新加载) - await page.goto('about:blank'); - await sleep(300); - await page.goto(`${detailBaseUrl}${orderId}?projectId=`, { waitUntil: 'domcontentloaded' }); + for (let index = startIndex; index < orderTargets.length; index += 1) { + await runtimeCheckpoint(`订单详情 ${index + 1}/${orderTargets.length}`); + const target = orderTargets[index]; + console.log(`[订单详情] ${index + 1}/${orderTargets.length} orderId=${target.orderId} page=${target.pageNum}`); + + if (target.pageNum > 0 && currentListPage !== target.pageNum) { + const reached = await jumpToOrderPage(page, target.pageNum); + if (!reached) { + console.warn(`[订单详情] 无法跳到第 ${target.pageNum} 页,跳过 ${target.orderId}`); + continue; + } + currentListPage = target.pageNum; + } + + const clicked = await clickOrderDetailFromListWithRetry(page, target); + if (!clicked) { + console.warn(`[订单详情] 列表中未找到 orderId=${target.orderId},跳过`); + continue; + } try { await page.waitForFunction( @@ -1371,13 +1385,15 @@ async function syncOrderDetails(page, cachedOrderIds, options = {}) { ); await sleep(1000); } catch { - console.warn(`[订单详情] ${orderId} 详情页加载超时,跳过`); + console.warn(`[订单详情] ${target.orderId} 详情页加载超时,跳过`); + await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null); + await recoverOrderListState(page, currentListPage).catch(() => null); continue; } const detail = await extractOrderDetail(page); if (!isValidOrderId(detail.orderId)) { - detail.orderId = orderId; + detail.orderId = target.orderId; } allDetails.push({ ...detail, __context: {} }); await saveOrderDetailsCheckpoint(dataset, index + 1, allDetails); @@ -1385,6 +1401,10 @@ async function syncOrderDetails(page, cachedOrderIds, options = {}) { const normalizedDetail = normalizeDatasetRecords(dataset, [{ ...detail, __context: {} }], {}); await upsertOrderDetails(normalizedDetail); } + + await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null); + await recoverOrderListState(page, currentListPage).catch(() => null); + currentListPage = target.pageNum; } return persistDataset(dataset, dedupeByHash(allDetails), {}); @@ -1958,6 +1978,28 @@ function collectCustomerDetailTargets(records) { return targets.sort((a, b) => a.pageNum - b.pageNum); } +function collectOrderDetailTargets(records, cachedOrderIds = []) { + const allowSet = new Set((cachedOrderIds || []).map((value) => String(value || '').trim()).filter(Boolean)); + const targets = []; + const seen = new Set(); + for (const record of records) { + const orderId = String(record.orderId || '').trim(); + const pageNum = Number.parseInt(String(record.listPageNum || 0), 10) || 0; + if (!orderId || !isValidOrderId(orderId) || pageNum <= 0) { + continue; + } + if (allowSet.size > 0 && !allowSet.has(orderId)) { + continue; + } + if (seen.has(orderId)) { + continue; + } + seen.add(orderId); + targets.push({ orderId, pageNum }); + } + return targets.sort((a, b) => a.pageNum - b.pageNum); +} + async function clickCustomerDetailFromList(page, target) { const clicked = await page.evaluate(({ accountId, loginName }) => { const normalize = (value) => String(value || '').replace(/\s+/g, '').trim(); @@ -2006,6 +2048,54 @@ async function clickCustomerDetailFromListWithRetry(page, target) { return false; } +async function clickOrderDetailFromList(page, target) { + const clicked = await page.evaluate(({ orderId }) => { + const normalize = (value) => String(value || '').replace(/\s+/g, '').trim(); + const rows = Array.from(document.querySelectorAll('table tbody tr')); + const targetRow = rows.find((row) => { + const text = normalize(row.innerText || row.textContent || ''); + return text.includes(orderId); + }); + if (!targetRow) { + return false; + } + + const detailButton = Array.from(targetRow.querySelectorAll('button, a, span')) + .find((node) => /详情/.test(String(node.textContent || '').trim())); + if (!detailButton) { + return false; + } + + detailButton.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' }); + detailButton.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true })); + return true; + }, target).catch(() => false); + + if (clicked) { + await sleep(1200); + } + return clicked; +} + +async function clickOrderDetailFromListWithRetry(page, target) { + const attempts = [target.pageNum, Math.max(1, target.pageNum - 1), target.pageNum + 1]; + for (const pageNum of attempts) { + if (pageNum > 0 && pageNum !== target.pageNum) { + const reached = await jumpToOrderPage(page, pageNum); + if (!reached) { + continue; + } + await waitForStableOrderList(page); + } + + const clicked = await clickOrderDetailFromList(page, target); + if (clicked) { + return true; + } + } + return false; +} + async function jumpToCustomerPage(page, pageNum) { const reached = await jumpToPage(page, pageNum); if (reached) { @@ -2014,12 +2104,26 @@ async function jumpToCustomerPage(page, pageNum) { return reached; } +async function jumpToOrderPage(page, pageNum) { + const reached = await jumpToPage(page, pageNum); + if (reached) { + console.log(`[订单详情] 已跳转到第 ${pageNum} 页`); + } + return reached; +} + async function waitForStableCustomerList(page) { await waitForTableRows(page).catch(() => null); await sleep(600); await waitForTableRows(page).catch(() => null); } +async function waitForStableOrderList(page) { + await waitForTableRows(page).catch(() => null); + await sleep(600); + await waitForTableRows(page).catch(() => null); +} + async function recoverCustomerListState(page, pageNum) { await waitUntilReady(page, datasets.customers.heading).catch(() => null); await trySetPageSize(page, datasets.customers.pageSize).catch(() => null); @@ -2029,6 +2133,15 @@ async function recoverCustomerListState(page, pageNum) { } } +async function recoverOrderListState(page, pageNum) { + await waitUntilReady(page, datasets.orders.heading).catch(() => null); + await trySetPageSize(page, datasets.orders.pageSize).catch(() => null); + if (pageNum > 0) { + await jumpToOrderPage(page, pageNum).catch(() => null); + await waitForStableOrderList(page).catch(() => null); + } +} + function isValidOrderId(orderId) { const value = String(orderId || '').trim(); if (!value) return false;