订单详情调整抓取方式
This commit is contained in:
@@ -107,6 +107,7 @@ export const datasets = {
|
||||
uniqueKey: (record) => record.orderId || record.__hash,
|
||||
normalize: (record, context) => ({
|
||||
orderId: record['订单号'] || '',
|
||||
listPageNum: context.pageNum || '',
|
||||
customerAccount: (record['客户账号'] || '').replace(/\s+/g, ''),
|
||||
customerCategory: record['客户分类'] || '',
|
||||
orderType: record['订单类型'] || '',
|
||||
|
||||
@@ -1337,31 +1337,45 @@ async function syncOrderDetails(page, cachedOrderIds, options = {}) {
|
||||
const dataset = datasets.orderDetails;
|
||||
const resumeCheckpoint = options.resume ? loadLatestOrderDetailsCheckpoint() : null;
|
||||
|
||||
// 使用传入的 orderId 列表(在 syncOrders 覆盖 orders.json 之前缓存的)
|
||||
const allOrderIds = cachedOrderIds || [];
|
||||
const ordersState = loadCurrentState('orders', datasets.orders.uniqueKey);
|
||||
const orderTargets = collectOrderDetailTargets(ordersState.records || [], cachedOrderIds || []);
|
||||
|
||||
if (allOrderIds.length === 0) {
|
||||
console.log('[订单详情] 本地无订单数据,跳过');
|
||||
if (orderTargets.length === 0) {
|
||||
console.log('[订单详情] 本地无订单定位数据,跳过');
|
||||
return persistDataset(dataset, [], {});
|
||||
}
|
||||
|
||||
console.log(`[订单详情] 共 ${allOrderIds.length} 个订单需要获取详情`);
|
||||
console.log(`[订单详情] 共 ${orderTargets.length} 个订单需要获取详情`);
|
||||
const allDetails = Array.isArray(resumeCheckpoint?.records) ? resumeCheckpoint.records : [];
|
||||
const startIndex = Number.parseInt(String(resumeCheckpoint?.currentIndex || 0), 10) || 0;
|
||||
if (startIndex > 0) {
|
||||
console.log(`[订单详情续爬] 从 checkpoint 恢复: index=${startIndex}, records=${allDetails.length}`);
|
||||
}
|
||||
const detailBaseUrl = 'https://aps.aliyun.com/?spm=5176.12818093.top-nav.ditem-fx.785716d0LKDpKT#/detail/order/~/costCenter/order/detail/';
|
||||
let currentListPage = 0;
|
||||
|
||||
for (let index = startIndex; index < allOrderIds.length; index += 1) {
|
||||
await runtimeCheckpoint(`订单详情 ${index + 1}/${allOrderIds.length}`);
|
||||
const orderId = allOrderIds[index];
|
||||
console.log(`[订单详情] ${index + 1}/${allOrderIds.length} orderId=${orderId}`);
|
||||
await page.goto(datasets.orders.url, { waitUntil: 'domcontentloaded' });
|
||||
await waitUntilReady(page, datasets.orders.heading);
|
||||
await trySetPageSize(page, datasets.orders.pageSize);
|
||||
|
||||
// 先跳 about:blank 再跳详情URL(强制 SPA 完整重新加载)
|
||||
await page.goto('about:blank');
|
||||
await sleep(300);
|
||||
await page.goto(`${detailBaseUrl}${orderId}?projectId=`, { waitUntil: 'domcontentloaded' });
|
||||
for (let index = startIndex; index < orderTargets.length; index += 1) {
|
||||
await runtimeCheckpoint(`订单详情 ${index + 1}/${orderTargets.length}`);
|
||||
const target = orderTargets[index];
|
||||
console.log(`[订单详情] ${index + 1}/${orderTargets.length} orderId=${target.orderId} page=${target.pageNum}`);
|
||||
|
||||
if (target.pageNum > 0 && currentListPage !== target.pageNum) {
|
||||
const reached = await jumpToOrderPage(page, target.pageNum);
|
||||
if (!reached) {
|
||||
console.warn(`[订单详情] 无法跳到第 ${target.pageNum} 页,跳过 ${target.orderId}`);
|
||||
continue;
|
||||
}
|
||||
currentListPage = target.pageNum;
|
||||
}
|
||||
|
||||
const clicked = await clickOrderDetailFromListWithRetry(page, target);
|
||||
if (!clicked) {
|
||||
console.warn(`[订单详情] 列表中未找到 orderId=${target.orderId},跳过`);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
await page.waitForFunction(
|
||||
@@ -1371,13 +1385,15 @@ async function syncOrderDetails(page, cachedOrderIds, options = {}) {
|
||||
);
|
||||
await sleep(1000);
|
||||
} catch {
|
||||
console.warn(`[订单详情] ${orderId} 详情页加载超时,跳过`);
|
||||
console.warn(`[订单详情] ${target.orderId} 详情页加载超时,跳过`);
|
||||
await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null);
|
||||
await recoverOrderListState(page, currentListPage).catch(() => null);
|
||||
continue;
|
||||
}
|
||||
|
||||
const detail = await extractOrderDetail(page);
|
||||
if (!isValidOrderId(detail.orderId)) {
|
||||
detail.orderId = orderId;
|
||||
detail.orderId = target.orderId;
|
||||
}
|
||||
allDetails.push({ ...detail, __context: {} });
|
||||
await saveOrderDetailsCheckpoint(dataset, index + 1, allDetails);
|
||||
@@ -1385,6 +1401,10 @@ async function syncOrderDetails(page, cachedOrderIds, options = {}) {
|
||||
const normalizedDetail = normalizeDatasetRecords(dataset, [{ ...detail, __context: {} }], {});
|
||||
await upsertOrderDetails(normalizedDetail);
|
||||
}
|
||||
|
||||
await page.goBack({ waitUntil: 'domcontentloaded' }).catch(() => null);
|
||||
await recoverOrderListState(page, currentListPage).catch(() => null);
|
||||
currentListPage = target.pageNum;
|
||||
}
|
||||
|
||||
return persistDataset(dataset, dedupeByHash(allDetails), {});
|
||||
@@ -1958,6 +1978,28 @@ function collectCustomerDetailTargets(records) {
|
||||
return targets.sort((a, b) => a.pageNum - b.pageNum);
|
||||
}
|
||||
|
||||
function collectOrderDetailTargets(records, cachedOrderIds = []) {
|
||||
const allowSet = new Set((cachedOrderIds || []).map((value) => String(value || '').trim()).filter(Boolean));
|
||||
const targets = [];
|
||||
const seen = new Set();
|
||||
for (const record of records) {
|
||||
const orderId = String(record.orderId || '').trim();
|
||||
const pageNum = Number.parseInt(String(record.listPageNum || 0), 10) || 0;
|
||||
if (!orderId || !isValidOrderId(orderId) || pageNum <= 0) {
|
||||
continue;
|
||||
}
|
||||
if (allowSet.size > 0 && !allowSet.has(orderId)) {
|
||||
continue;
|
||||
}
|
||||
if (seen.has(orderId)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(orderId);
|
||||
targets.push({ orderId, pageNum });
|
||||
}
|
||||
return targets.sort((a, b) => a.pageNum - b.pageNum);
|
||||
}
|
||||
|
||||
async function clickCustomerDetailFromList(page, target) {
|
||||
const clicked = await page.evaluate(({ accountId, loginName }) => {
|
||||
const normalize = (value) => String(value || '').replace(/\s+/g, '').trim();
|
||||
@@ -2006,6 +2048,54 @@ async function clickCustomerDetailFromListWithRetry(page, target) {
|
||||
return false;
|
||||
}
|
||||
|
||||
async function clickOrderDetailFromList(page, target) {
|
||||
const clicked = await page.evaluate(({ orderId }) => {
|
||||
const normalize = (value) => String(value || '').replace(/\s+/g, '').trim();
|
||||
const rows = Array.from(document.querySelectorAll('table tbody tr'));
|
||||
const targetRow = rows.find((row) => {
|
||||
const text = normalize(row.innerText || row.textContent || '');
|
||||
return text.includes(orderId);
|
||||
});
|
||||
if (!targetRow) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const detailButton = Array.from(targetRow.querySelectorAll('button, a, span'))
|
||||
.find((node) => /详情/.test(String(node.textContent || '').trim()));
|
||||
if (!detailButton) {
|
||||
return false;
|
||||
}
|
||||
|
||||
detailButton.scrollIntoView({ block: 'center', inline: 'center', behavior: 'instant' });
|
||||
detailButton.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true }));
|
||||
return true;
|
||||
}, target).catch(() => false);
|
||||
|
||||
if (clicked) {
|
||||
await sleep(1200);
|
||||
}
|
||||
return clicked;
|
||||
}
|
||||
|
||||
async function clickOrderDetailFromListWithRetry(page, target) {
|
||||
const attempts = [target.pageNum, Math.max(1, target.pageNum - 1), target.pageNum + 1];
|
||||
for (const pageNum of attempts) {
|
||||
if (pageNum > 0 && pageNum !== target.pageNum) {
|
||||
const reached = await jumpToOrderPage(page, pageNum);
|
||||
if (!reached) {
|
||||
continue;
|
||||
}
|
||||
await waitForStableOrderList(page);
|
||||
}
|
||||
|
||||
const clicked = await clickOrderDetailFromList(page, target);
|
||||
if (clicked) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async function jumpToCustomerPage(page, pageNum) {
|
||||
const reached = await jumpToPage(page, pageNum);
|
||||
if (reached) {
|
||||
@@ -2014,12 +2104,26 @@ async function jumpToCustomerPage(page, pageNum) {
|
||||
return reached;
|
||||
}
|
||||
|
||||
async function jumpToOrderPage(page, pageNum) {
|
||||
const reached = await jumpToPage(page, pageNum);
|
||||
if (reached) {
|
||||
console.log(`[订单详情] 已跳转到第 ${pageNum} 页`);
|
||||
}
|
||||
return reached;
|
||||
}
|
||||
|
||||
async function waitForStableCustomerList(page) {
|
||||
await waitForTableRows(page).catch(() => null);
|
||||
await sleep(600);
|
||||
await waitForTableRows(page).catch(() => null);
|
||||
}
|
||||
|
||||
async function waitForStableOrderList(page) {
|
||||
await waitForTableRows(page).catch(() => null);
|
||||
await sleep(600);
|
||||
await waitForTableRows(page).catch(() => null);
|
||||
}
|
||||
|
||||
async function recoverCustomerListState(page, pageNum) {
|
||||
await waitUntilReady(page, datasets.customers.heading).catch(() => null);
|
||||
await trySetPageSize(page, datasets.customers.pageSize).catch(() => null);
|
||||
@@ -2029,6 +2133,15 @@ async function recoverCustomerListState(page, pageNum) {
|
||||
}
|
||||
}
|
||||
|
||||
async function recoverOrderListState(page, pageNum) {
|
||||
await waitUntilReady(page, datasets.orders.heading).catch(() => null);
|
||||
await trySetPageSize(page, datasets.orders.pageSize).catch(() => null);
|
||||
if (pageNum > 0) {
|
||||
await jumpToOrderPage(page, pageNum).catch(() => null);
|
||||
await waitForStableOrderList(page).catch(() => null);
|
||||
}
|
||||
}
|
||||
|
||||
function isValidOrderId(orderId) {
|
||||
const value = String(orderId || '').trim();
|
||||
if (!value) return false;
|
||||
|
||||
Reference in New Issue
Block a user