Spaces:
Sleeping
Sleeping
| """Report ProBas index build progress. | |
| Run this in a second terminal while `app.py` is building: | |
| python check_progress.py | |
| It reads the status file the app writes after every checkpoint wave under | |
| indexes/probas_rag/ and prints how many records are embedded, the throughput, | |
| and the ETA. The numbers update each time a wave completes (every | |
| PROBAS_CHECKPOINT_EVERY waves), which is also the point a restart resumes from. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import time | |
| from pathlib import Path | |
| CACHE_DIR = Path("indexes") / "probas_rag" | |
| def format_duration(seconds: float | None) -> str: | |
| if seconds is None: | |
| return "unknown" | |
| seconds = int(max(0, seconds)) | |
| hours, remainder = divmod(seconds, 3600) | |
| minutes, secs = divmod(remainder, 60) | |
| if hours: | |
| return f"{hours}h{minutes:02d}m{secs:02d}s" | |
| if minutes: | |
| return f"{minutes}m{secs:02d}s" | |
| return f"{secs}s" | |
| def main() -> None: | |
| if any(CACHE_DIR.glob("bundle_*.json")): | |
| print("Build COMPLETE — finished index bundle is on disk.") | |
| return | |
| status_files = sorted(CACHE_DIR.glob("status_v*_*.json")) | |
| if not status_files: | |
| print("No progress yet. The status file appears after the first wave completes.") | |
| return | |
| latest = max(status_files, key=lambda p: p.stat().st_mtime) | |
| status = json.loads(latest.read_text(encoding="utf-8")) | |
| age = time.time() - latest.stat().st_mtime | |
| print(f"State: {status.get('state', '?')}") | |
| print(f"Progress: {status.get('completed', '?')}/{status.get('total', '?')} " | |
| f"({status.get('percent', '?')}%)") | |
| print(f"Rate: {status.get('rate_per_sec', '?')} rec/s") | |
| print(f"ETA: {format_duration(status.get('eta_seconds'))}") | |
| print(f"Model: {status.get('embedding_model', '?')}") | |
| print(f"Updated: {age:.0f}s ago") | |
| if __name__ == "__main__": | |
| main() | |