Доведен до ума и стабилизирован модуль ZFS
This commit is contained in:
@@ -10,6 +10,9 @@ import re
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
# Результат batch-операции: (успешно, список ошибок)
|
||||
# ошибка = (server/dataset_key, сообщение) — например "gwo2.mps.cln.su/containers/smb2"
|
||||
|
||||
from .logger import get_logger
|
||||
from .protocols import SSHProtocol
|
||||
from .ssh_base import SSHBase
|
||||
@@ -130,14 +133,18 @@ def backup_server(
|
||||
server: Dict[str, Any],
|
||||
ssh_defaults: Dict[str, Any],
|
||||
log: logging.Logger,
|
||||
) -> bool:
|
||||
collect_errors: bool = False,
|
||||
) -> Tuple[int, List[Tuple[str, str]]]:
|
||||
"""
|
||||
Выполнить бэкап для одного сервера: снапшоты, send/recv, очистка.
|
||||
Конфиг v1.1: server['pools'] — список пулов с source_pool, datasets, target_pool.
|
||||
Возвращает (успешно, список_ошибок), где ошибка = (ключ "server/dataset", сообщение).
|
||||
"""
|
||||
name = server["name"]
|
||||
pools = server["pools"]
|
||||
retention_days = int(server.get("retention_days", 30))
|
||||
success_count = 0
|
||||
errors: List[Tuple[str, str]] = []
|
||||
|
||||
port = ssh_defaults.get("port", 22)
|
||||
username = ssh_defaults.get("username", "root")
|
||||
@@ -148,27 +155,35 @@ def backup_server(
|
||||
log.info("Сервер %s: дата снапшотов %s", name, date_str)
|
||||
|
||||
ssh = SSHBase(hostname=name, port=port, username=username, pkey_file=pkey_file, host_keys=host_keys)
|
||||
ssh.connect()
|
||||
|
||||
pool_counts: Dict[str, int] = {}
|
||||
total_datasets = 0
|
||||
try:
|
||||
ssh.connect()
|
||||
except Exception as e:
|
||||
for pool_config in pools:
|
||||
for dataset in pool_config["datasets"]:
|
||||
errors.append((f"{name}/{dataset}", str(e)))
|
||||
return 0, errors
|
||||
|
||||
try:
|
||||
for pool_config in pools:
|
||||
source_pool = pool_config["source_pool"]
|
||||
datasets = pool_config["datasets"]
|
||||
target_pool = pool_config["target_pool"]
|
||||
pool_counts[source_pool] = len(datasets)
|
||||
total_datasets += len(datasets)
|
||||
|
||||
for dataset in datasets:
|
||||
full_dataset = f"{source_pool}/{dataset}"
|
||||
key = f"{name}/{dataset}"
|
||||
|
||||
def do_snapshot(fd=full_dataset, d=date_str):
|
||||
if not _create_snapshot(ssh, fd, d, log):
|
||||
raise RuntimeError(f"Snapshot {fd}@{d} failed")
|
||||
|
||||
_run_with_retry(log, name, f"snapshot {full_dataset}", do_snapshot)
|
||||
try:
|
||||
_run_with_retry(log, name, f"snapshot {full_dataset}", do_snapshot)
|
||||
except Exception as e:
|
||||
errors.append((key, str(e)))
|
||||
if not collect_errors:
|
||||
raise
|
||||
continue
|
||||
|
||||
for pool_config in pools:
|
||||
source_pool = pool_config["source_pool"]
|
||||
@@ -178,6 +193,7 @@ def backup_server(
|
||||
for dataset in datasets:
|
||||
full_dataset = f"{source_pool}/{dataset}"
|
||||
target_dataset = f"{target_pool}/{dataset}"
|
||||
key = f"{name}/{dataset}"
|
||||
|
||||
def do_send_recv(fd=full_dataset, td=target_dataset, d=date_str):
|
||||
prev_date = _get_previous_snapshot_date(ssh, td)
|
||||
@@ -188,14 +204,20 @@ def backup_server(
|
||||
if not _replicate_snapshot(ssh, fd, td, d, prev_date, log):
|
||||
raise RuntimeError(f"Replicate {fd}@{d} → {td} failed")
|
||||
|
||||
_run_with_retry(log, name, f"send/recv {full_dataset}", do_send_recv)
|
||||
try:
|
||||
_run_with_retry(log, name, f"send/recv {full_dataset}", do_send_recv)
|
||||
success_count += 1
|
||||
except Exception as e:
|
||||
errors.append((key, str(e)))
|
||||
if not collect_errors:
|
||||
raise
|
||||
continue
|
||||
|
||||
cutoff = datetime.now() - timedelta(days=retention_days)
|
||||
for pool_config in pools:
|
||||
source_pool = pool_config["source_pool"]
|
||||
datasets = pool_config["datasets"]
|
||||
target_pool = pool_config["target_pool"]
|
||||
|
||||
for dataset in datasets:
|
||||
for pool, ds in [(source_pool, dataset), (target_pool, dataset)]:
|
||||
full_ds = f"{pool}/{ds}"
|
||||
@@ -218,9 +240,9 @@ def backup_server(
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
parts = ", ".join(f"{p}:{c}" for p, c in sorted(pool_counts.items()))
|
||||
log.info("✅ %s: %s datasets backed up (%s)", name, total_datasets, parts)
|
||||
return True
|
||||
if not collect_errors:
|
||||
success_count = sum(len(pc["datasets"]) for pc in pools)
|
||||
return success_count, errors
|
||||
|
||||
finally:
|
||||
ssh.close()
|
||||
|
||||
Reference in New Issue
Block a user