Логи и конфиги переработаны, добавлен модуль ZFS и ведется работа с телеграм-ботом
This commit is contained in:
@@ -7,7 +7,16 @@
|
||||
from .ssh_base import SSHBase
|
||||
from .postgresql import PostgreSQLOperations
|
||||
from .c1_cluster import C1ClusterOperations
|
||||
from .ssh import ssh
|
||||
from .ssh import SSHClient, ssh
|
||||
from .protocols import SSHProtocol, SSHOperationsBase
|
||||
|
||||
__all__ = ['SSHBase', 'PostgreSQLOperations', 'C1ClusterOperations', 'ssh']
|
||||
__all__ = [
|
||||
'SSHBase',
|
||||
'SSHClient',
|
||||
'ssh',
|
||||
'PostgreSQLOperations',
|
||||
'C1ClusterOperations',
|
||||
'SSHProtocol',
|
||||
'SSHOperationsBase',
|
||||
]
|
||||
|
||||
|
||||
@@ -6,27 +6,30 @@
|
||||
import logging
|
||||
from typing import Optional, List, Dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from .logger import get_logger
|
||||
from .protocols import SSHOperationsBase
|
||||
|
||||
logger = get_logger("c1_cluster")
|
||||
|
||||
|
||||
class C1ClusterOperations:
|
||||
class C1ClusterOperations(SSHOperationsBase):
|
||||
"""
|
||||
Класс для операций с кластером 1С через SSH
|
||||
Класс для операций с кластером 1С через SSH.
|
||||
Реализует контракт операционного модуля (ssh: SSHProtocol).
|
||||
"""
|
||||
|
||||
def __init__(self, ssh_client, srv_1c: str = "", c1_claster_user: str = "", c1_claster_pass: str = ""):
|
||||
"""
|
||||
Инициализация модуля 1С кластера
|
||||
|
||||
Args:
|
||||
ssh_client: Экземпляр SSHBase для выполнения команд
|
||||
ssh_client: Экземпляр, реализующий SSHProtocol (SSHBase, SSHClient).
|
||||
srv_1c: Имя LXC контейнера с 1С
|
||||
c1_claster_user: Пользователь кластера 1С
|
||||
c1_claster_pass: Пароль кластера 1С
|
||||
"""
|
||||
self.ssh = ssh_client
|
||||
self.srv_1c = srv_1c
|
||||
self.c1_claster_user = c1_claster_user
|
||||
self.c1_claster_pass = c1_claster_pass
|
||||
super().__init__(ssh_client)
|
||||
self.srv_1c: str = srv_1c
|
||||
self.c1_claster_user: str = c1_claster_user
|
||||
self.c1_claster_pass: str = c1_claster_pass
|
||||
|
||||
def set_srv_1c(self, srv_1c: str) -> None:
|
||||
"""
|
||||
|
||||
283
modules/logger.py
Normal file
283
modules/logger.py
Normal file
@@ -0,0 +1,283 @@
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Единый логгер для проекта SSH Client (PRD v1.8).
|
||||
Поддержка файла, консоли и Telegram через TelegramHandler.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import queue
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
# Имя корневого логгера проекта
|
||||
ROOT_LOGGER_NAME = "ssh_client"
|
||||
|
||||
# Общий формат логов
|
||||
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
# Кастомный уровень STATUS (между INFO и WARNING)
|
||||
STATUS_LEVEL = 25
|
||||
logging.addLevelName(STATUS_LEVEL, "STATUS")
|
||||
|
||||
|
||||
def status(self, message: str, *args, **kwargs) -> None:
|
||||
if self.isEnabledFor(STATUS_LEVEL):
|
||||
self._log(STATUS_LEVEL, message, args, **kwargs)
|
||||
|
||||
|
||||
logging.Logger.status = status # type: ignore
|
||||
|
||||
_initialized = False
|
||||
|
||||
|
||||
class TelegramHandler(logging.Handler):
|
||||
"""
|
||||
Отправляет логи в Telegram через очередь (асинхронно).
|
||||
Retry при ошибках, разбивка длинных сообщений, /start для верификации.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]) -> None:
|
||||
super().__init__()
|
||||
self._config = config
|
||||
self._queue: queue.Queue = queue.Queue(maxsize=1000)
|
||||
self._bot_token = config.get("bot_token", "")
|
||||
self._chat_id = str(config.get("chat_id", ""))
|
||||
self._allowed_users: List[Any] = config.get("allowed_users", [])
|
||||
self._allowed_levels: List[str] = [str(x).upper() for x in config.get("allowed_levels", ["ERROR"])]
|
||||
retry = config.get("retry", {})
|
||||
self._retry_attempts = retry.get("attempts", 3)
|
||||
self._retry_delay = retry.get("delay", 5)
|
||||
opts = config.get("options", {})
|
||||
self._max_length = opts.get("max_message_length", 4096)
|
||||
self._parse_mode = opts.get("parse_mode", "HTML")
|
||||
self._disable_notification = opts.get("disable_notification", False)
|
||||
self._enable_commands = config.get("enable_commands", False)
|
||||
self._stop = threading.Event()
|
||||
self._worker = threading.Thread(target=self._process_queue, daemon=True)
|
||||
self._worker.start()
|
||||
if self._enable_commands:
|
||||
self._poll_thread = threading.Thread(target=self._poll_commands, daemon=True)
|
||||
self._poll_thread.start()
|
||||
|
||||
def _level_allowed(self, record: logging.LogRecord) -> bool:
|
||||
levelname = getattr(record, "levelname", record.levelname)
|
||||
return str(levelname).upper() in self._allowed_levels
|
||||
|
||||
def emit(self, record: logging.LogRecord) -> None:
|
||||
if not self._bot_token or not self._chat_id:
|
||||
return
|
||||
if not self._level_allowed(record):
|
||||
return
|
||||
try:
|
||||
msg = self.format(record)
|
||||
# Экранируем HTML при parse_mode=HTML
|
||||
if self._parse_mode == "HTML":
|
||||
msg = msg.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
self._queue.put_nowait(msg)
|
||||
except queue.Full:
|
||||
sys.stderr.write(f"[TelegramHandler] Очередь переполнена, сообщение пропущено\n")
|
||||
except Exception:
|
||||
self.handleError(record)
|
||||
|
||||
def _process_queue(self) -> None:
|
||||
while not self._stop.is_set():
|
||||
try:
|
||||
msg = self._queue.get(timeout=0.5)
|
||||
if msg:
|
||||
self._send_with_retry(msg)
|
||||
except queue.Empty:
|
||||
continue
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"[TelegramHandler] Ошибка отправки: {e}\n")
|
||||
|
||||
def _split_message(self, text: str) -> List[str]:
|
||||
if len(text) <= self._max_length:
|
||||
return [text]
|
||||
parts = []
|
||||
while text:
|
||||
chunk = text[: self._max_length]
|
||||
idx = chunk.rfind("\n")
|
||||
if idx > self._max_length // 2:
|
||||
chunk, text = chunk[: idx + 1], text[idx + 1 :]
|
||||
else:
|
||||
text = text[self._max_length :]
|
||||
parts.append(chunk)
|
||||
return parts
|
||||
|
||||
def _send_with_retry(self, text: str) -> None:
|
||||
parts = self._split_message(text)
|
||||
for part in parts:
|
||||
for attempt in range(1, self._retry_attempts + 1):
|
||||
try:
|
||||
self._send_to_telegram(part)
|
||||
break
|
||||
except Exception as e:
|
||||
if attempt == self._retry_attempts:
|
||||
sys.stderr.write(f"[TelegramHandler] Не удалось отправить после {attempt} попыток: {e}\n")
|
||||
return
|
||||
time.sleep(self._retry_delay)
|
||||
|
||||
def _send_to_telegram(self, text: str) -> None:
|
||||
import requests
|
||||
|
||||
url = f"https://api.telegram.org/bot{self._bot_token}/sendMessage"
|
||||
data = {
|
||||
"chat_id": self._chat_id,
|
||||
"text": text,
|
||||
"parse_mode": self._parse_mode,
|
||||
"disable_notification": self._disable_notification,
|
||||
}
|
||||
resp = requests.post(url, data=data, timeout=10)
|
||||
resp.raise_for_status()
|
||||
j = resp.json()
|
||||
if not j.get("ok"):
|
||||
raise RuntimeError(j.get("description", "Unknown Telegram API error"))
|
||||
|
||||
def _poll_commands(self) -> None:
|
||||
import requests
|
||||
|
||||
url = f"https://api.telegram.org/bot{self._bot_token}/getUpdates"
|
||||
offset = 0
|
||||
while not self._stop.is_set():
|
||||
try:
|
||||
r = requests.get(url, params={"offset": offset, "timeout": 30}, timeout=35)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
if not data.get("ok"):
|
||||
time.sleep(5)
|
||||
continue
|
||||
for upd in data.get("result", []):
|
||||
offset = upd["update_id"] + 1
|
||||
msg = upd.get("message", {})
|
||||
text = msg.get("text", "").strip()
|
||||
if text != "/start":
|
||||
continue
|
||||
user = msg.get("from", {})
|
||||
user_id = user.get("id")
|
||||
username = user.get("username", "")
|
||||
chat_id = msg.get("chat", {}).get("id")
|
||||
if self._authenticate_user(user_id, username):
|
||||
reply = "✅ Доступ разрешён. Вы будете получать уведомления."
|
||||
else:
|
||||
reply = "❌ Доступ запрещён."
|
||||
self._send_reply(chat_id, reply)
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"[TelegramHandler] Polling error: {e}\n")
|
||||
time.sleep(5)
|
||||
|
||||
def _send_reply(self, chat_id: Any, text: str) -> None:
|
||||
import requests
|
||||
|
||||
url = f"https://api.telegram.org/bot{self._bot_token}/sendMessage"
|
||||
requests.post(url, data={"chat_id": chat_id, "text": text}, timeout=10)
|
||||
|
||||
def _authenticate_user(self, user_id: Optional[int], username: str) -> bool:
|
||||
for u in self._allowed_users:
|
||||
if isinstance(u, int) and u == user_id:
|
||||
return True
|
||||
if isinstance(u, str) and (u == username or str(u) == str(user_id)):
|
||||
return True
|
||||
return False
|
||||
|
||||
def close(self) -> None:
|
||||
self._stop.set()
|
||||
super().close()
|
||||
|
||||
|
||||
def _parse_level(level: str) -> int:
|
||||
m = {
|
||||
"DEBUG": logging.DEBUG,
|
||||
"INFO": logging.INFO,
|
||||
"STATUS": STATUS_LEVEL,
|
||||
"WARNING": logging.WARNING,
|
||||
"ERROR": logging.ERROR,
|
||||
"CRITICAL": logging.CRITICAL,
|
||||
}
|
||||
return m.get(str(level).upper(), logging.INFO)
|
||||
|
||||
|
||||
def setup_root_logger(
|
||||
level: Union[int, str] = logging.INFO,
|
||||
log_file: Optional[str] = None,
|
||||
script_dir: Optional[str] = None,
|
||||
telegram_config: Optional[Dict[str, Any]] = None,
|
||||
) -> logging.Logger:
|
||||
"""
|
||||
Настроить корневой логгер проекта.
|
||||
|
||||
Args:
|
||||
level: Уровень логирования (int или "INFO", "DEBUG" и т.д.)
|
||||
log_file: Путь к файлу логов (опционально)
|
||||
script_dir: Директория скрипта для fallback (опционально)
|
||||
telegram_config: Настройки Telegram (если None — Telegram отключён)
|
||||
|
||||
Returns:
|
||||
Настроенный логгер
|
||||
"""
|
||||
global _initialized
|
||||
logger = logging.getLogger(ROOT_LOGGER_NAME)
|
||||
formatter = logging.Formatter(LOG_FORMAT, datefmt=LOG_DATE_FORMAT)
|
||||
|
||||
if isinstance(level, str):
|
||||
level = _parse_level(level)
|
||||
|
||||
if not _initialized:
|
||||
logger.setLevel(level)
|
||||
ch = logging.StreamHandler()
|
||||
ch.setFormatter(formatter)
|
||||
logger.addHandler(ch)
|
||||
_initialized = True
|
||||
|
||||
if log_file:
|
||||
has_file = any(
|
||||
isinstance(h, logging.FileHandler)
|
||||
and getattr(h, "baseFilename", "").endswith(os.path.basename(log_file))
|
||||
for h in logger.handlers
|
||||
)
|
||||
if not has_file:
|
||||
try:
|
||||
log_path = log_file
|
||||
if script_dir and not os.path.isabs(log_path):
|
||||
log_path = os.path.normpath(os.path.join(script_dir, log_file))
|
||||
fh = logging.FileHandler(log_path, encoding="utf-8")
|
||||
fh.setFormatter(formatter)
|
||||
logger.addHandler(fh)
|
||||
except OSError:
|
||||
if script_dir:
|
||||
fallback = os.path.join(script_dir, "logs", "zfs_backup.log")
|
||||
try:
|
||||
os.makedirs(os.path.dirname(fallback), exist_ok=True)
|
||||
fh = logging.FileHandler(fallback, encoding="utf-8")
|
||||
fh.setFormatter(formatter)
|
||||
logger.addHandler(fh)
|
||||
logger.warning("Не удалось писать в %s, используется %s", log_file, fallback)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
if telegram_config and telegram_config.get("enabled"):
|
||||
if not any(isinstance(h, TelegramHandler) for h in logger.handlers):
|
||||
try:
|
||||
th = TelegramHandler(telegram_config)
|
||||
th.setLevel(_parse_level(telegram_config.get("log_level", "INFO")))
|
||||
th.setFormatter(formatter)
|
||||
logger.addHandler(th)
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"[logger] Не удалось инициализировать TelegramHandler: {e}\n")
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
def get_logger(name: str) -> logging.Logger:
|
||||
"""
|
||||
Получить логгер для модуля.
|
||||
"""
|
||||
root = logging.getLogger(ROOT_LOGGER_NAME)
|
||||
if not root.handlers:
|
||||
setup_root_logger()
|
||||
if name.startswith("ssh_client"):
|
||||
return logging.getLogger(name)
|
||||
return logging.getLogger(f"{ROOT_LOGGER_NAME}.{name}")
|
||||
@@ -7,21 +7,24 @@ import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from .logger import get_logger
|
||||
from .protocols import SSHOperationsBase
|
||||
|
||||
logger = get_logger("postgresql")
|
||||
|
||||
|
||||
class PostgreSQLOperations:
|
||||
class PostgreSQLOperations(SSHOperationsBase):
|
||||
"""
|
||||
Класс для операций с PostgreSQL через SSH
|
||||
Класс для операций с PostgreSQL через SSH.
|
||||
Реализует контракт операционного модуля (ssh: SSHProtocol).
|
||||
"""
|
||||
def __init__(self, ssh_client):
|
||||
|
||||
def __init__(self, ssh_client) -> None:
|
||||
"""
|
||||
Инициализация модуля PostgreSQL
|
||||
|
||||
Args:
|
||||
ssh_client: Экземпляр SSHBase для выполнения команд
|
||||
ssh_client: Экземпляр, реализующий SSHProtocol (SSHBase, SSHClient).
|
||||
"""
|
||||
self.ssh = ssh_client
|
||||
super().__init__(ssh_client)
|
||||
|
||||
def bases_list(self, srv_pgsql: str) -> List[str]:
|
||||
"""
|
||||
|
||||
52
modules/protocols.py
Normal file
52
modules/protocols.py
Normal file
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Протоколы и интерфейсы для модулей SSH Client.
|
||||
Обеспечивают единый контракт для SSH-операций и операционных модулей.
|
||||
"""
|
||||
from typing import List, Protocol, runtime_checkable
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class SSHProtocol(Protocol):
|
||||
"""
|
||||
Протокол SSH-клиента: connect, cmd, close.
|
||||
Реализуется SSHBase и SSHClient.
|
||||
"""
|
||||
|
||||
def connect(self) -> None:
|
||||
"""Подключение к удалённому серверу."""
|
||||
...
|
||||
|
||||
def cmd(
|
||||
self,
|
||||
command: str,
|
||||
sleep: float = 0.1,
|
||||
out_to_print: bool = False,
|
||||
suppress_warnings: bool = False,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Выполнение команды на удалённом сервере.
|
||||
|
||||
Returns:
|
||||
[stdout, stderr]
|
||||
"""
|
||||
...
|
||||
|
||||
def close(self) -> None:
|
||||
"""Закрытие соединения."""
|
||||
...
|
||||
|
||||
|
||||
class SSHOperationsBase:
|
||||
"""
|
||||
Базовый класс для операционных модулей (PostgreSQL, 1C, ZFS и т.д.).
|
||||
Ожидает ssh_client, реализующий SSHProtocol.
|
||||
"""
|
||||
|
||||
def __init__(self, ssh_client: SSHProtocol) -> None:
|
||||
"""
|
||||
Args:
|
||||
ssh_client: Экземпляр, реализующий SSHProtocol (SSHBase, SSHClient).
|
||||
"""
|
||||
self.ssh: SSHProtocol = ssh_client
|
||||
@@ -9,10 +9,11 @@ from .postgresql import PostgreSQLOperations
|
||||
from .c1_cluster import C1ClusterOperations
|
||||
|
||||
|
||||
class ssh(SSHBase, PostgreSQLOperations, C1ClusterOperations):
|
||||
class SSHClient(SSHBase, PostgreSQLOperations, C1ClusterOperations):
|
||||
"""
|
||||
Класс SSH клиента с поддержкой PostgreSQL и 1С кластера
|
||||
|
||||
SSH-клиент с поддержкой PostgreSQL и кластера 1С.
|
||||
Рекомендуемый класс для использования (PEP8: PascalCase).
|
||||
|
||||
Наследует функциональность от:
|
||||
- SSHBase: базовые SSH операции (connect, cmd, close)
|
||||
- PostgreSQLOperations: операции с PostgreSQL
|
||||
@@ -47,3 +48,7 @@ class ssh(SSHBase, PostgreSQLOperations, C1ClusterOperations):
|
||||
"""
|
||||
self.set_srv_1c(srv_1c)
|
||||
self.set_cluster_credentials(c1_claster_user, c1_claster_pass)
|
||||
|
||||
|
||||
# Совместимость: alias для обратной совместимости (legacy)
|
||||
ssh = SSHClient
|
||||
|
||||
@@ -9,17 +9,9 @@ import time
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
# Настройка логирования
|
||||
logger = logging.getLogger(__name__)
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.INFO)
|
||||
from .logger import get_logger
|
||||
|
||||
logger = get_logger("ssh_base")
|
||||
|
||||
# Ожидаемые «ошибки» ZFS при бэкапе — не логируем WARNING при suppress_warnings=True
|
||||
KNOWN_ZFS_ERRORS = (
|
||||
|
||||
226
modules/zfs_backup_ops.py
Normal file
226
modules/zfs_backup_ops.py
Normal file
@@ -0,0 +1,226 @@
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Операционный модуль ZFS Backup.
|
||||
Реализует логику снапшотов, репликации и очистки для удалённых ZFS-пулов.
|
||||
Используется точкой входа zfs_backup.py (cron/CLI).
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .logger import get_logger
|
||||
from .protocols import SSHProtocol
|
||||
from .ssh_base import SSHBase
|
||||
|
||||
logger = get_logger("zfs_backup")
|
||||
|
||||
MAX_RETRIES = 3
|
||||
SNAPSHOT_DATE_FMT = "%d-%m-%Y" # dd-mm-yyyy
|
||||
|
||||
|
||||
def _execute(ssh: SSHProtocol, command: str, log: logging.Logger) -> Tuple[str, str]:
|
||||
"""Выполнить команду по SSH. При ошибке — исключение."""
|
||||
stdout, stderr = ssh.cmd(command)
|
||||
if stderr and stderr.strip():
|
||||
raise RuntimeError(f"Команда завершилась с ошибкой: {stderr.strip()}")
|
||||
return stdout, stderr
|
||||
|
||||
|
||||
def _create_snapshot(
|
||||
ssh: SSHProtocol,
|
||||
full_dataset: str,
|
||||
date_str: str,
|
||||
log: logging.Logger,
|
||||
) -> bool:
|
||||
"""Создать ZFS-снапшот. При 'dataset already exists' — успех (skip)."""
|
||||
cmd = f"/usr/sbin/zfs snapshot {full_dataset}@{date_str}"
|
||||
stdout, stderr = ssh.cmd(cmd, suppress_warnings=True)
|
||||
stderr_lower = (stderr or "").lower()
|
||||
|
||||
if "dataset already exists" in stderr_lower:
|
||||
pool_name = full_dataset.split("/")[0]
|
||||
snapshot_name = "/".join(full_dataset.split("/")[1:])
|
||||
log.info(
|
||||
"В пуле %s снимок %s@%s уже существует, действие пропущено",
|
||||
pool_name,
|
||||
snapshot_name,
|
||||
date_str,
|
||||
)
|
||||
return True
|
||||
|
||||
if stderr and stderr.strip():
|
||||
log.error("❌ Создание снимка failed: %s", stderr.strip())
|
||||
return False
|
||||
|
||||
log.info("✅ Снимок %s@%s создан", full_dataset, date_str)
|
||||
return True
|
||||
|
||||
|
||||
def _replicate_snapshot(
|
||||
ssh: SSHProtocol,
|
||||
source_dataset: str,
|
||||
target_dataset: str,
|
||||
date_str: str,
|
||||
prev_date: Optional[str],
|
||||
log: logging.Logger,
|
||||
) -> bool:
|
||||
"""Репликация снапшота. При 'not an earlier snapshot' — успех (skip)."""
|
||||
if prev_date:
|
||||
send_cmd = f"/usr/sbin/zfs send -i @{prev_date} {source_dataset}@{date_str}"
|
||||
else:
|
||||
send_cmd = f"/usr/sbin/zfs send {source_dataset}@{date_str}"
|
||||
full_cmd = f"{send_cmd} | /usr/sbin/zfs recv -F {target_dataset}"
|
||||
stdout, stderr = ssh.cmd(full_cmd, suppress_warnings=True)
|
||||
|
||||
if stderr and "not an earlier snapshot from the same fs" in stderr:
|
||||
pool_name = target_dataset.split("/")[0]
|
||||
dataset_name = "/".join(target_dataset.split("/")[1:])
|
||||
log.info("В пуле %s снимок %s@%s уже существует", pool_name, dataset_name, date_str)
|
||||
return True
|
||||
|
||||
if stderr and stderr.strip():
|
||||
log.error("❌ Репликация failed: %s", stderr.strip())
|
||||
return False
|
||||
|
||||
log.info("✅ Репликация %s → %s", source_dataset, target_dataset)
|
||||
return True
|
||||
|
||||
|
||||
def _get_previous_snapshot_date(ssh: SSHProtocol, target_dataset: str) -> Optional[str]:
|
||||
"""Дата последнего снапшота target_dataset (dd-mm-yyyy) или None."""
|
||||
cmd = f"/usr/sbin/zfs list -t snapshot -H -o name {target_dataset} 2>/dev/null || true"
|
||||
stdout, _ = ssh.cmd(cmd)
|
||||
lines = [s.strip() for s in stdout.splitlines() if s.strip()]
|
||||
pattern = re.compile(r"@(\d{2}-\d{2}-\d{4})$")
|
||||
dates: List[str] = []
|
||||
for line in lines:
|
||||
m = pattern.search(line)
|
||||
if m:
|
||||
dates.append(m.group(1))
|
||||
if not dates:
|
||||
return None
|
||||
dates.sort(key=lambda s: datetime.strptime(s, "%d-%m-%Y"))
|
||||
return dates[-1]
|
||||
|
||||
|
||||
def _run_with_retry(
|
||||
log: logging.Logger,
|
||||
server_name: str,
|
||||
operation_name: str,
|
||||
func,
|
||||
*args,
|
||||
**kwargs,
|
||||
) -> Any:
|
||||
"""Выполнить операцию до MAX_RETRIES раз при ошибке."""
|
||||
last_exc = None
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
last_exc = e
|
||||
log.warning("❌ %s: failed %s (retry %s/%s): %s", server_name, operation_name, attempt, MAX_RETRIES, e)
|
||||
if attempt == MAX_RETRIES:
|
||||
raise
|
||||
raise last_exc
|
||||
|
||||
|
||||
def backup_server(
|
||||
server: Dict[str, Any],
|
||||
ssh_defaults: Dict[str, Any],
|
||||
log: logging.Logger,
|
||||
) -> bool:
|
||||
"""
|
||||
Выполнить бэкап для одного сервера: снапшоты, send/recv, очистка.
|
||||
Конфиг v1.1: server['pools'] — список пулов с source_pool, datasets, target_pool.
|
||||
"""
|
||||
name = server["name"]
|
||||
pools = server["pools"]
|
||||
retention_days = int(server.get("retention_days", 30))
|
||||
|
||||
port = ssh_defaults.get("port", 22)
|
||||
username = ssh_defaults.get("username", "root")
|
||||
pkey_file = ssh_defaults.get("pkey_file", "/root/.ssh/id_rsa")
|
||||
host_keys = ssh_defaults.get("host_keys", "~/.ssh/known_hosts")
|
||||
|
||||
date_str = datetime.now().strftime(SNAPSHOT_DATE_FMT)
|
||||
log.info("Сервер %s: дата снапшотов %s", name, date_str)
|
||||
|
||||
ssh = SSHBase(hostname=name, port=port, username=username, pkey_file=pkey_file, host_keys=host_keys)
|
||||
ssh.connect()
|
||||
|
||||
pool_counts: Dict[str, int] = {}
|
||||
total_datasets = 0
|
||||
|
||||
try:
|
||||
for pool_config in pools:
|
||||
source_pool = pool_config["source_pool"]
|
||||
datasets = pool_config["datasets"]
|
||||
target_pool = pool_config["target_pool"]
|
||||
pool_counts[source_pool] = len(datasets)
|
||||
total_datasets += len(datasets)
|
||||
|
||||
for dataset in datasets:
|
||||
full_dataset = f"{source_pool}/{dataset}"
|
||||
|
||||
def do_snapshot(fd=full_dataset, d=date_str):
|
||||
if not _create_snapshot(ssh, fd, d, log):
|
||||
raise RuntimeError(f"Snapshot {fd}@{d} failed")
|
||||
|
||||
_run_with_retry(log, name, f"snapshot {full_dataset}", do_snapshot)
|
||||
|
||||
for pool_config in pools:
|
||||
source_pool = pool_config["source_pool"]
|
||||
datasets = pool_config["datasets"]
|
||||
target_pool = pool_config["target_pool"]
|
||||
|
||||
for dataset in datasets:
|
||||
full_dataset = f"{source_pool}/{dataset}"
|
||||
target_dataset = f"{target_pool}/{dataset}"
|
||||
|
||||
def do_send_recv(fd=full_dataset, td=target_dataset, d=date_str):
|
||||
prev_date = _get_previous_snapshot_date(ssh, td)
|
||||
if prev_date:
|
||||
log.info("Инкрементальная передача %s (от %s)", fd, prev_date)
|
||||
else:
|
||||
log.info("Полная передача %s", fd)
|
||||
if not _replicate_snapshot(ssh, fd, td, d, prev_date, log):
|
||||
raise RuntimeError(f"Replicate {fd}@{d} → {td} failed")
|
||||
|
||||
_run_with_retry(log, name, f"send/recv {full_dataset}", do_send_recv)
|
||||
|
||||
cutoff = datetime.now() - timedelta(days=retention_days)
|
||||
for pool_config in pools:
|
||||
source_pool = pool_config["source_pool"]
|
||||
datasets = pool_config["datasets"]
|
||||
target_pool = pool_config["target_pool"]
|
||||
|
||||
for dataset in datasets:
|
||||
for pool, ds in [(source_pool, dataset), (target_pool, dataset)]:
|
||||
full_ds = f"{pool}/{ds}"
|
||||
cmd = f"/usr/sbin/zfs list -t snapshot -H -o name {full_ds} 2>/dev/null || true"
|
||||
stdout, _ = ssh.cmd(cmd)
|
||||
pattern = re.compile(r"@(\d{2}-\d{2}-\d{4})$")
|
||||
for line in stdout.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
m = pattern.search(line)
|
||||
if not m:
|
||||
continue
|
||||
try:
|
||||
snap_date = datetime.strptime(m.group(1), "%d-%m-%Y")
|
||||
if snap_date < cutoff:
|
||||
destroy_cmd = f"/usr/sbin/zfs destroy {line}"
|
||||
_execute(ssh, destroy_cmd, log)
|
||||
log.info("Удалён старый снапшот: %s", line)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
parts = ", ".join(f"{p}:{c}" for p, c in sorted(pool_counts.items()))
|
||||
log.info("✅ %s: %s datasets backed up (%s)", name, total_datasets, parts)
|
||||
return True
|
||||
|
||||
finally:
|
||||
ssh.close()
|
||||
Reference in New Issue
Block a user