python切换node的版本

This commit is contained in:
ray
2026-04-28 10:05:38 +08:00
parent a008d416a3
commit 552d840f3f
10 changed files with 1524 additions and 344 deletions

View File

@@ -3,9 +3,21 @@ import cron from 'node-cron';
import fs from 'node:fs';
import path from 'node:path';
import readline from 'node:readline';
import { execSync } from 'node:child_process';
import { config, datasets } from './config.js';
import { sendLoginAlert } from './notify.js';
import { sendLoginAlert, sendRuntimeErrorAlert } from './notify.js';
import {
closeDbPool,
getLatestBillConsumptionTimeFromDb,
getLatestMessageTimeFromDb,
getLatestOrderTimeFromDb,
hasDbConfig,
upsertBills,
upsertCustomerDetails,
upsertCustomers,
upsertMessages,
upsertOrderDetails,
upsertOrders,
} from './db.js';
import {
diffRecords,
loadCurrentState,
@@ -121,14 +133,56 @@ async function runtimeCheckpoint(label) {
await controller.waitIfPaused(label);
}
function clearStaleBrowserProfileLocks() {
const lockFiles = ['SingletonLock', 'SingletonCookie', 'SingletonSocket'];
const now = Date.now();
const staleMs = 10 * 60 * 1000;
for (const fileName of lockFiles) {
const filePath = path.join(config.userDataDir, fileName);
if (!fs.existsSync(filePath)) {
continue;
}
try {
const stat = fs.statSync(filePath);
const ageMs = now - stat.mtimeMs;
if (ageMs < staleMs) {
console.log(`[浏览器锁] 检测到活跃锁文件,保留: ${fileName}`);
continue;
}
fs.rmSync(filePath, { force: true });
console.log(`[浏览器锁] 已清理陈旧锁文件: ${fileName}`);
} catch (error) {
console.warn(`[浏览器锁] 清理 ${fileName} 失败: ${error.message}`);
}
}
}
async function getContext() {
if (_context) return _context;
_context = await chromium.launchPersistentContext(config.userDataDir, {
channel: 'chrome',
clearStaleBrowserProfileLocks();
const launchOptions = {
headless: config.headless,
acceptDownloads: true,
downloadsPath: config.downloadDir,
});
};
if (config.browserChannel) {
launchOptions.channel = config.browserChannel;
}
if (config.browserExecutablePath) {
launchOptions.executablePath = config.browserExecutablePath;
}
try {
_context = await chromium.launchPersistentContext(config.userDataDir, launchOptions);
} catch (error) {
const browserHint = config.browserExecutablePath
? `executablePath=${config.browserExecutablePath}`
: config.browserChannel
? `channel=${config.browserChannel}`
: 'bundled-chromium';
throw new Error(`浏览器启动失败(${browserHint})。请确认没有其他浏览器占用 .browser 目录,或删除 .browser 后重新执行 npm run login。原始错误: ${error.message}`);
}
await restoreStorageState(_context);
return _context;
}
@@ -185,6 +239,91 @@ function loadLatestBillsCheckpoint() {
}
}
function subtractDays(dateValue, days) {
const next = new Date(dateValue);
next.setDate(next.getDate() - days);
return next;
}
function parseDbDateTime(value) {
const normalized = String(value || '').trim();
if (!normalized) {
return null;
}
const parsed = new Date(normalized.replace(' ', 'T'));
return Number.isNaN(parsed.getTime()) ? null : parsed;
}
function formatDateTime(date) {
return `${formatDate(date)} ${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}:${String(date.getSeconds()).padStart(2, '0')}`;
}
function buildSingleDateWindow(startDate, endDate) {
return [{
windowStart: startDate,
windowEnd: endDate,
start: startDate,
end: endDate,
}];
}
async function captureErrorArtifacts(page, metadata = {}) {
const stamp = nowStamp();
const artifactDir = path.join(config.errorDir, metadata.dataset || 'general');
fs.mkdirSync(artifactDir, { recursive: true });
const jsonPath = path.join(artifactDir, `${stamp}.json`);
const screenshotPath = path.join(artifactDir, `${stamp}.png`);
const payload = {
...metadata,
capturedAt: new Date().toISOString(),
pageUrl: page?.url?.() || '',
stack: metadata.error?.stack || metadata.errorMessage || '',
};
fs.writeFileSync(jsonPath, JSON.stringify(payload, null, 2));
let screenshotSaved = false;
if (page) {
try {
await page.screenshot({ path: screenshotPath, fullPage: true });
screenshotSaved = true;
} catch (error) {
console.error('[错误截图] 保存失败:', error.message);
}
}
return {
jsonPath,
screenshotPath: screenshotSaved ? screenshotPath : '',
};
}
async function reportRuntimeError(error, page, metadata = {}) {
const artifacts = await captureErrorArtifacts(page, {
...metadata,
errorMessage: error.message,
error,
});
const subject = `[APS同步异常] ${metadata.label || metadata.dataset || 'sync'} failed`;
const text = [
`时间: ${new Date().toISOString()}`,
`任务: ${metadata.label || ''}`,
`数据集: ${metadata.dataset || ''}`,
`模式: ${metadata.mode || ''}`,
`URL: ${page?.url?.() || ''}`,
`错误: ${error.message}`,
`JSON: ${artifacts.jsonPath}`,
artifacts.screenshotPath ? `截图: ${artifacts.screenshotPath}` : '截图: 保存失败',
].join('\n');
const attachments = [{ filename: path.basename(artifacts.jsonPath), path: artifacts.jsonPath }];
if (artifacts.screenshotPath) {
attachments.push({ filename: path.basename(artifacts.screenshotPath), path: artifacts.screenshotPath });
}
await sendRuntimeErrorAlert({ subject, text, attachments });
}
async function getPageBodyPreview(page) {
return page
.evaluate(() => document.body?.innerText?.substring(0, 500) || '(空)')
@@ -258,32 +397,41 @@ export async function syncAll() {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
let page = null;
try {
const summary = { startedAt: new Date().toISOString(), datasets: {} };
const page = context.pages()[0] || (await context.newPage());
page = context.pages()[0] || (await context.newPage());
summary.datasets.customers = await syncCustomers(page);
summary.datasets.customerDetails = await syncCustomerDetails(page);
summary.datasets.orders = await syncOrders(page);
if (config.fullSync) {
summary.datasets.customers = await syncCustomers(page);
summary.datasets.customerDetails = await syncCustomerDetails(page);
}
summary.datasets.orders = await syncOrders(page, { incremental: !config.fullSync });
// syncOrders 完成后,从最新的 orders.json 读取 orderId 列表
const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey);
const orderIdsForDetail = collectValidOrderIds(latestOrders.records || []);
summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail);
summary.datasets.bills = await syncBills(page);
summary.datasets.bills = await syncBills(page, { incremental: !config.fullSync });
summary.datasets.messages = await syncMessages(page, { incremental: !config.fullSync });
summary.finishedAt = new Date().toISOString();
const stamp = nowStamp();
saveRunSummary(stamp, summary);
return summary;
} catch (error) {
await reportRuntimeError(error, page, { label: 'syncAll', dataset: 'all', mode: config.fullSync ? 'full' : 'incremental' });
throw error;
} finally {
if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
}
await closeDbPool();
runtimeController.unbind();
}
}
@@ -292,10 +440,11 @@ export async function syncBillsOnly(options = {}) {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
let page = null;
try {
const summary = { startedAt: new Date().toISOString(), datasets: {} };
const page = context.pages()[0] || (await context.newPage());
page = context.pages()[0] || (await context.newPage());
summary.datasets.bills = await syncBills(page, options);
summary.finishedAt = new Date().toISOString();
@@ -303,12 +452,45 @@ export async function syncBillsOnly(options = {}) {
const stamp = nowStamp();
saveRunSummary(stamp, summary);
return summary;
} catch (error) {
await reportRuntimeError(error, page, { label: 'syncBillsOnly', dataset: 'bills', mode: options.incremental ? 'incremental' : 'full' });
throw error;
} finally {
if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
}
await closeDbPool();
runtimeController.unbind();
}
}
export async function syncMessagesOnly(options = {}) {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
let page = null;
try {
const summary = { startedAt: new Date().toISOString(), datasets: {} };
page = context.pages()[0] || (await context.newPage());
summary.datasets.messages = await syncMessages(page, options);
summary.finishedAt = new Date().toISOString();
const stamp = nowStamp();
saveRunSummary(stamp, summary);
return summary;
} catch (error) {
await reportRuntimeError(error, page, { label: 'syncMessagesOnly', dataset: 'messages', mode: options.incremental ? 'incremental' : 'full' });
throw error;
} finally {
if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
}
await closeDbPool();
runtimeController.unbind();
}
}
@@ -319,22 +501,11 @@ export async function scheduleSync() {
config.cron,
async () => {
try {
console.log(`[${new Date().toISOString()}] 开始执行同步`);
const summary = await syncAll();
console.log(`[${new Date().toISOString()}] 开始执行同步 mode=${config.scheduleMode}`);
const summary = config.scheduleMode === 'full'
? await syncAll()
: await syncAllIncremental();
console.log(`[${new Date().toISOString()}] 同步完成`, JSON.stringify(summary, null, 2));
try {
const scriptPath = path.resolve(config.rootDir, config.dbSyncScript);
const incrementalFlag = config.fullSync ? '' : ' --incremental';
console.log(`[入库] 执行 ${scriptPath}${incrementalFlag ? ' (增量模式)' : ''}`);
const output = execSync(`python "${scriptPath}"${incrementalFlag}`, {
cwd: path.dirname(scriptPath),
encoding: 'utf-8',
timeout: 120000,
});
console.log(output);
} catch (e) {
console.error('[入库] 失败:', e.message);
}
} catch (error) {
console.error(`[${new Date().toISOString()}] 同步失败`, error);
}
@@ -343,6 +514,40 @@ export async function scheduleSync() {
);
}
export async function syncAllIncremental() {
const runtimeController = getRuntimeController();
runtimeController.bind();
const context = await getContext();
let page = null;
try {
const summary = { startedAt: new Date().toISOString(), mode: 'incremental', datasets: {} };
page = context.pages()[0] || (await context.newPage());
summary.datasets.orders = await syncOrders(page, { incremental: true });
const latestOrders = loadCurrentState('orders', datasets.orders.uniqueKey);
const orderIdsForDetail = collectValidOrderIds(latestOrders.records || []);
summary.datasets.orderDetails = await syncOrderDetails(page, orderIdsForDetail);
summary.datasets.bills = await syncBills(page, { incremental: true });
summary.datasets.messages = await syncMessages(page, { incremental: true });
summary.finishedAt = new Date().toISOString();
const stamp = nowStamp();
saveRunSummary(stamp, summary);
return summary;
} catch (error) {
await reportRuntimeError(error, page, { label: 'syncAllIncremental', dataset: 'incremental', mode: 'incremental' });
throw error;
} finally {
if (config.closeBrowser) {
await closeContextIfNeeded();
} else {
console.log('浏览器保持运行');
}
await closeDbPool();
runtimeController.unbind();
}
}
async function syncCustomers(page) {
await runtimeCheckpoint('同步客户');
const dataset = datasets.customers;
@@ -350,6 +555,10 @@ async function syncCustomers(page) {
await waitUntilReady(page, dataset.heading);
await trySetPageSize(page, dataset.pageSize);
const records = await scrapePagedTable(page, dataset, {});
if (hasDbConfig()) {
const normalizedRecords = dedupeByHash(normalizeDatasetRecords(dataset, records, {}));
await upsertCustomers(normalizedRecords);
}
return persistDataset(dataset, records, {});
}
@@ -393,20 +602,25 @@ async function syncCustomerDetails(page) {
const detail = await extractCustomerDetail(page);
allDetails.push({ ...detail, __context: { accountId } });
if (hasDbConfig()) {
const normalizedDetail = normalizeDatasetRecords(dataset, [{ ...detail, __context: { accountId } }], {});
await upsertCustomerDetails(normalizedDetail);
}
}
return persistDataset(dataset, dedupeByHash(allDetails), {});
}
async function syncOrders(page) {
async function syncOrders(page, options = {}) {
await runtimeCheckpoint('同步订单');
const dataset = datasets.orders;
const { incremental = false } = options;
let windows;
if (config.fullSync) {
if (!incremental) {
windows = buildMonthlyDateWindows(config.orderStartDate);
} else {
windows = buildIncrementalOrderWindows();
windows = await buildIncrementalOrderWindows();
}
const allRecords = [];
@@ -420,12 +634,16 @@ async function syncOrders(page) {
await trySetPageSize(page, dataset.pageSize);
const records = await scrapePagedTable(page, dataset, window);
allRecords.push(...records);
if (hasDbConfig()) {
const normalizedWindowRecords = dedupeByHash(normalizeDatasetRecords(dataset, records, window));
await upsertOrders(normalizedWindowRecords);
}
}
return persistDataset(dataset, dedupeByHash(allRecords), {});
}
function buildIncrementalOrderWindows() {
async function buildIncrementalOrderWindows() {
const configuredStartDate = normalizeConfiguredDate(config.incrementalOrderStartDate);
if (configuredStartDate) {
const windows = buildMonthlyDateWindows(configuredStartDate);
@@ -433,11 +651,27 @@ function buildIncrementalOrderWindows() {
return windows;
}
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
const dateStr = formatDate(yesterday);
console.log(`[增量模式] 订单仅查询: ${dateStr}`);
return [{ windowStart: dateStr, windowEnd: dateStr, start: dateStr, end: dateStr }];
if (!hasDbConfig()) {
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
const dateStr = formatDate(yesterday);
console.log(`[增量模式] 未配置数据库,订单仅查询: ${dateStr}`);
return buildSingleDateWindow(dateStr, dateStr);
}
const latestOrderTime = await getLatestOrderTimeFromDb();
const runAt = new Date();
const parsed = parseDbDateTime(latestOrderTime);
if (!parsed) {
const dateStr = formatDate(runAt);
console.log(`[增量模式] 数据库无订单水位,订单仅查询当天: ${dateStr}`);
return buildSingleDateWindow(dateStr, dateStr);
}
const startDate = formatDate(subtractDays(parsed, config.orderIncrementalOverlapDays));
const endDate = formatDate(runAt);
console.log(`[增量模式] 订单窗口: ${startDate} ~ ${endDate} (db_last=${latestOrderTime}, overlap=${config.orderIncrementalOverlapDays}d)`);
return buildSingleDateWindow(startDate, endDate);
}
function normalizeConfiguredDate(value) {
@@ -460,18 +694,20 @@ function normalizeConfiguredDate(value) {
async function syncBills(page, options = {}) {
await runtimeCheckpoint('同步账单');
const dataset = datasets.bills;
const { resume = false } = options;
const { resume = false, incremental = false } = options;
let months;
let latestConsumptionDate = null;
if (config.fullSync) {
if (!incremental) {
months = buildMonthList(config.billStartMonth);
} else {
latestConsumptionDate = getLatestBillConsumptionDate();
const incrementalMonth = latestConsumptionDate?.slice(0, 7)
|| `${new Date().getFullYear()}-${String(new Date().getMonth() + 1).padStart(2, '0')}`;
months = [incrementalMonth];
console.log(`[增量模式] 账单仅查询: ${incrementalMonth}${latestConsumptionDate ? `, 数据库最新消费时间: ${latestConsumptionDate}` : ''}`);
latestConsumptionDate = await getLatestBillConsumptionDate();
const startDate = latestConsumptionDate ? latestConsumptionDate.slice(0, 10) : formatDate(new Date());
const endDate = formatDate(new Date());
const startMonth = startDate.slice(0, 7);
const endMonth = endDate.slice(0, 7);
months = buildMonthList(startMonth).filter((month) => month <= endMonth);
console.log(`[增量模式] 账单窗口: ${startDate} ~ ${endDate}${latestConsumptionDate ? `, 数据库最新消费时间: ${latestConsumptionDate}` : ''}`);
}
const resumeCheckpoint = resume ? loadLatestBillsCheckpoint() : null;
@@ -514,6 +750,9 @@ async function syncBills(page, options = {}) {
onPage: async ({ pageNum, pageRows }) => {
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, { month });
monthNormalizedRecords.push(...normalizedPageRows);
if (hasDbConfig()) {
await upsertBills(normalizedPageRows);
}
let checkpointRecords = monthNormalizedRecords;
if (latestConsumptionDate) {
checkpointRecords = monthNormalizedRecords.filter((record) => isAfterLatestConsumptionDate(record, latestConsumptionDate));
@@ -538,6 +777,42 @@ async function syncBills(page, options = {}) {
return persistNormalizedDataset(dataset, dedupeByHash(allNormalizedRecords));
}
async function syncMessages(page, options = {}) {
await runtimeCheckpoint('同步消息');
const dataset = datasets.messages;
const { incremental = false } = options;
await page.goto(dataset.url, { waitUntil: 'domcontentloaded' });
await waitUntilReady(page, dataset.heading);
await trySetPageSize(page, dataset.pageSize);
let records = await scrapePagedTable(page, dataset, {}, {
onPage: hasDbConfig()
? async ({ pageRows }) => {
const normalizedPageRows = normalizeDatasetRecords(dataset, pageRows, {});
await upsertMessages(normalizedPageRows);
}
: undefined,
});
if (incremental && hasDbConfig()) {
try {
const latestMessageTime = await getLatestMessageTimeFromDb();
if (latestMessageTime) {
const latest = parseDbDateTime(latestMessageTime);
if (latest) {
const watermark = subtractDays(latest, config.messageIncrementalOverlapDays);
const before = records.length;
records = records.filter((record) => isAfterLatestMessageTime(record, watermark));
console.log(`[增量模式] 消息按时间过滤: ${before} -> ${records.length} (db_last=${latestMessageTime}, overlap=${config.messageIncrementalOverlapDays}d)`);
}
}
} catch (error) {
console.error('[增量模式] 查询数据库最新消息时间失败:', error.message);
}
}
return persistDataset(dataset, dedupeByHash(records), {});
}
async function saveBillsCheckpoint(dataset, month, pageNum, normalizedRecords) {
const normalized = dedupeByHash(normalizedRecords);
const checkpointName = `${month}-latest`;
@@ -571,16 +846,22 @@ async function moveBillsToResumeStart(page, resumeFromPage) {
return moved;
}
function getLatestBillConsumptionDate() {
const scriptPath = path.resolve(config.rootDir, config.dbSyncScript);
async function getLatestBillConsumptionDate() {
if (!hasDbConfig()) {
console.warn('[增量模式] 未配置数据库连接,无法读取账单水位,回退到当前日期');
return null;
}
try {
const output = execSync(`python "${scriptPath}" --latest-bill-consumption-time`, {
cwd: path.dirname(scriptPath),
encoding: 'utf-8',
timeout: 120000,
}).trim();
const latest = output.split(/\r?\n/).map((line) => line.trim()).filter(Boolean).at(-1) || '';
return /^\d{4}-\d{2}-\d{2}/.test(latest) ? latest.slice(0, 10) : null;
const latest = await getLatestBillConsumptionTimeFromDb();
if (!latest || !/^\d{4}-\d{2}-\d{2}/.test(latest)) {
return null;
}
const parsed = parseDbDateTime(latest);
if (!parsed) {
return latest.slice(0, 10);
}
return formatDate(subtractDays(parsed, config.billIncrementalOverlapDays));
} catch (error) {
console.error('[增量模式] 查询数据库最新账单消费时间失败:', error.message);
return null;
@@ -595,6 +876,18 @@ function isAfterLatestConsumptionDate(record, latestConsumptionDate) {
return consumeDate > latestConsumptionDate;
}
function isAfterLatestMessageTime(record, watermarkDate) {
const value = String(record['消息修改时间'] || record['修改时间'] || record.gmtModified || record['消息创建时间'] || record['创建时间'] || record.gmtCreated || '').trim();
if (!value) {
return true;
}
const parsed = parseDbDateTime(value);
if (!parsed) {
return true;
}
return parsed >= watermarkDate;
}
async function syncOrderDetails(page, cachedOrderIds) {
await runtimeCheckpoint('同步订单详情');
const dataset = datasets.orderDetails;
@@ -638,6 +931,10 @@ async function syncOrderDetails(page, cachedOrderIds) {
detail.orderId = orderId;
}
allDetails.push({ ...detail, __context: {} });
if (hasDbConfig()) {
const normalizedDetail = normalizeDatasetRecords(dataset, [{ ...detail, __context: {} }], {});
await upsertOrderDetails(normalizedDetail);
}
}
return persistDataset(dataset, dedupeByHash(allDetails), {});