""" Long-running scheduler — watches for new JSON files and syncs to MySQL. Modes: --watch : File watcher, syncs immediately when new JSON appears (default) --cron : One-shot sync, meant to be called by system cron/launchd --daemon : Combined: runs initial sync + watches for changes """ import time import sys import signal import argparse import logging from pathlib import Path from datetime import datetime from aps_db_sync import APSSyncer, DB_CONFIG, JSON_DIR LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s" logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) logger = logging.getLogger("aps_scheduler") WATCH_INTERVAL_SECONDS = 30 PROCESSED_MARKER_DIR = JSON_DIR / ".aps_sync_processed" def _update_watch_interval(value: int): global WATCH_INTERVAL_SECONDS WATCH_INTERVAL_SECONDS = value class SyncScheduler: def __init__(self, db_config: dict = None): self.db_config = db_config or DB_CONFIG self.running = True PROCESSED_MARKER_DIR.mkdir(exist_ok=True) signal.signal(signal.SIGINT, self._shutdown) signal.signal(signal.SIGTERM, self._shutdown) def _shutdown(self, signum, frame): logger.info("Shutdown signal received, stopping...") self.running = False def _marker_path(self, json_path: Path) -> Path: return PROCESSED_MARKER_DIR / f"{json_path.stem}.synced" def _is_processed(self, json_path: Path) -> bool: marker = self._marker_path(json_path) if not marker.exists(): return False marker_mtime = marker.stat().st_mtime json_mtime = json_path.stat().st_mtime return marker_mtime >= json_mtime def _mark_processed(self, json_path: Path): marker = self._marker_path(json_path) marker.write_text(datetime.now().isoformat()) def find_unprocessed_files(self) -> list[Path]: pattern = "aps_aliyun_customers_with_bills_*.json" all_files = sorted(JSON_DIR.glob(pattern), key=lambda p: p.stat().st_mtime) return [f for f in all_files if not self._is_processed(f)] def sync_file(self, json_path: Path) -> bool: logger.info("Syncing: %s", json_path.name) try: syncer = APSSyncer(db_config=self.db_config) syncer.sync_from_json(str(json_path)) self._mark_processed(json_path) return True except Exception as e: logger.error("Sync failed for %s: %s", json_path.name, e) return False def run_once(self): unprocessed = self.find_unprocessed_files() if not unprocessed: logger.info("No unprocessed JSON files found") return 0 count = 0 for f in unprocessed: if self.sync_file(f): count += 1 logger.info("Processed %d/%d files", count, len(unprocessed)) return count def run_watch(self): logger.info("Watching %s for new JSON files (interval=%ds)", JSON_DIR, WATCH_INTERVAL_SECONDS) self.run_once() while self.running: time.sleep(WATCH_INTERVAL_SECONDS) unprocessed = self.find_unprocessed_files() for f in unprocessed: if not self.running: break self.sync_file(f) logger.info("Watcher stopped") def main(): parser = argparse.ArgumentParser(description="APS Sync Scheduler") parser.add_argument("--mode", choices=["watch", "cron", "daemon"], default="watch", help="watch=file watcher, cron=one-shot, daemon=watch with initial sync") parser.add_argument("--host", default=DB_CONFIG["host"]) parser.add_argument("--port", type=int, default=DB_CONFIG["port"]) parser.add_argument("--user", default=DB_CONFIG["user"]) parser.add_argument("--password", default=DB_CONFIG["password"]) parser.add_argument("--database", default=DB_CONFIG["database"]) parser.add_argument("--interval", type=int, default=WATCH_INTERVAL_SECONDS, help="Watch interval in seconds") args = parser.parse_args() _update_watch_interval(args.interval) config = { "host": args.host, "port": args.port, "user": args.user, "password": args.password, "database": args.database, "charset": "utf8mb4", } scheduler = SyncScheduler(db_config=config) if args.mode == "cron": count = scheduler.run_once() sys.exit(0 if count >= 0 else 1) else: scheduler.run_watch() if __name__ == "__main__": main()