playbook/antigravity-awesome-skills/skills/junta-leiloeiros/scripts/export.py

138 lines
4.4 KiB
Python

"""
Exportação de dados de leiloeiros para diferentes formatos.
Uso:
python scripts/export.py --format json
python scripts/export.py --format csv
python scripts/export.py --format jsonl
python scripts/export.py --format parquet # requer pandas + pyarrow
python scripts/export.py --format all # exporta todos os formatos
python scripts/export.py --format csv --estado SP
python scripts/export.py --output /caminho/personalizado/
"""
from __future__ import annotations
import argparse
import csv
import json
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import List, Optional
sys.path.insert(0, str(Path(__file__).parent))
from db import Database
OUTPUT_DIR = Path(__file__).parent.parent / "data" / "exports"
def export_json(records: list, output_dir: Path, suffix: str = "") -> Path:
output_dir.mkdir(parents=True, exist_ok=True)
ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
path = output_dir / f"leiloeiros{suffix}_{ts}.json"
with open(path, "w", encoding="utf-8") as f:
json.dump(
{"exported_at": datetime.now(timezone.utc).isoformat(), "total": len(records), "data": records},
f,
ensure_ascii=False,
indent=2,
)
print(f"[JSON] {len(records)} registros → {path}")
return path
def export_jsonl(records: list, output_dir: Path, suffix: str = "") -> Path:
output_dir.mkdir(parents=True, exist_ok=True)
ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
path = output_dir / f"leiloeiros{suffix}_{ts}.jsonl"
with open(path, "w", encoding="utf-8") as f:
for rec in records:
f.write(json.dumps(rec, ensure_ascii=False) + "\n")
print(f"[JSONL] {len(records)} registros → {path}")
return path
def export_csv(records: list, output_dir: Path, suffix: str = "") -> Path:
if not records:
print("[CSV] Nenhum registro para exportar.")
return None
output_dir.mkdir(parents=True, exist_ok=True)
ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
path = output_dir / f"leiloeiros{suffix}_{ts}.csv"
with open(path, "w", newline="", encoding="utf-8-sig") as f:
writer = csv.DictWriter(f, fieldnames=list(records[0].keys()), extrasaction="ignore")
writer.writeheader()
writer.writerows(records)
print(f"[CSV] {len(records)} registros → {path}")
return path
def export_parquet(records: list, output_dir: Path, suffix: str = "") -> Optional[Path]:
try:
import pandas as pd
except ImportError:
print("[PARQUET] pandas não instalado. Execute: pip install pandas pyarrow")
return None
output_dir.mkdir(parents=True, exist_ok=True)
ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
path = output_dir / f"leiloeiros{suffix}_{ts}.parquet"
df = pd.DataFrame(records)
df.to_parquet(path, index=False, engine="pyarrow")
print(f"[PARQUET] {len(records)} registros → {path}")
return path
def main():
parser = argparse.ArgumentParser(description="Exporta dados de leiloeiros")
parser.add_argument(
"--format", choices=["json", "jsonl", "csv", "parquet", "all"],
default="csv", help="Formato de exportação (default: csv)"
)
parser.add_argument(
"--estado", nargs="*", metavar="UF",
help="Filtrar por estado(s) (ex: SP RJ)"
)
parser.add_argument(
"--output", default=str(OUTPUT_DIR),
help=f"Diretório de saída (default: {OUTPUT_DIR})"
)
args = parser.parse_args()
db = Database()
db.init()
output_dir = Path(args.output)
estados = [e.upper() for e in args.estado] if args.estado else None
if estados:
all_records = []
for uf in estados:
all_records.extend(db.get_by_estado(uf))
suffix = "_" + "_".join(estados)
else:
all_records = db.get_all()
suffix = ""
if not all_records:
print("Banco vazio. Execute run_all.py primeiro.")
sys.exit(0)
fmt = args.format
if fmt in ("json", "all"):
export_json(all_records, output_dir, suffix)
if fmt in ("jsonl", "all"):
export_jsonl(all_records, output_dir, suffix)
if fmt in ("csv", "all"):
export_csv(all_records, output_dir, suffix)
if fmt in ("parquet", "all"):
export_parquet(all_records, output_dir, suffix)
if __name__ == "__main__":
main()