Compare commits

..

3 commits

Author SHA1 Message Date
Chebart
86385261fe added docker-compose file 2025-10-25 16:19:27 +03:00
Chebart
c1603b5e12 fix bugs discussed at the meeting 2025-10-25 16:17:17 +03:00
Chebart
9632552cff added files to master-node related to custom modules 2025-10-23 13:30:24 +03:00
27 changed files with 708 additions and 63 deletions

View file

@ -0,0 +1,9 @@
Перед запуском убедитесь, что вы задали токены SWARM_TOKEN_MANAGER и SWARM_TOKEN_WORKER в вашем окружении:
```sh
export SWARM_TOKEN_MANAGER=your-manager-token-here
export SWARM_TOKEN_WORKER=your-worker-token-here
```
Запустите плейбук командой:
```sh
ansible-playbook -i inventory.yml site.yml
```

View file

@ -0,0 +1,4 @@
docker_version: '20.10'
swarm_token_manager: "{{ lookup('env', 'SWARM_TOKEN_MANAGER') }}"
swarm_token_worker: "{{ lookup('env', 'SWARM_TOKEN_WORKER') }}"
manager_ip: '{{ hostvars["manager1"]["ansible_host"] }}'

View file

@ -0,0 +1,11 @@
all:
hosts:
manager1:
ansible_host: x.x.x.x
worker1:
ansible_host: y.y.y.y
worker2:
ansible_host: z.z.z.z
vars:
ansible_user: root
ansible_become: true

View file

@ -0,0 +1,17 @@
---
- name: Update apt cache
become: yes
apt:
update_cache: yes
- name: Install necessary packages for Docker installation
become: yes
apt:
name: "{{ item }}"
state: present
loop:
- apt-transport-https
- ca-certificates
- curl
- gnupg-agent
- software-properties-common

View file

@ -0,0 +1,25 @@
---
- name: Add Docker official repository key
become: yes
apt_key:
url: https://download.docker.com/linux/debian/gpg
state: present
- name: Add Docker stable repository
become: yes
apt_repository:
repo: deb [arch=amd64] https://download.docker.com/linux/debian {{ ansible_distribution_release }} stable
state: present
- name: Install Docker CE
become: yes
apt:
name: "docker-ce={{ docker_version }}*"
state: latest
- name: Start and enable Docker service
become: yes
systemd:
name: docker
enabled: yes
state: started

View file

@ -0,0 +1,24 @@
---
- block:
- name: Initialize the Docker Swarm on Manager node
become: yes
command: >
docker swarm init
--advertise-addr {{ manager_ip }}
register: result
when: inventory_hostname == 'manager1'
- set_fact:
join_command: "{{ result.stdout_lines[-1].split(':')[1].strip() }}"
when: inventory_hostname == 'manager1' and result is success
- debug:
msg: "Swarm initialized successfully with token: {{ join_command }}"
when: inventory_hostname == 'manager1' and result is success
- block:
- name: Join Worker nodes to Swarm cluster
become: yes
shell: |
echo "{{ join_command }}" | docker swarm join --token {{ swarm_token_worker }} {{ manager_ip }}:2377
when: inventory_hostname != 'manager1'

View file

@ -0,0 +1,13 @@
---
- name: Setup Docker Swarm Cluster
hosts: all
gather_facts: true
become: true
pre_tasks:
- include_role:
name: common
roles:
- role: docker
- role: swarm

View file

@ -0,0 +1,7 @@
kube_version: v1.25.3
docker_version: 20.10.21
etcd_version: 3.5.6
cni_plugin_version: v3.24.0
helm_version: v3.11.3
pod_network_cidr: 192.168.0.0/16
service_cluster_ip_range: 10.96.0.0/12

View file

@ -0,0 +1,12 @@
all:
children:
masters:
hosts:
master1:
ansible_host: x.x.x.x
workers:
hosts:
worker1:
ansible_host: a.a.a.a
worker2:
ansible_host: b.b.b.b

View file

@ -0,0 +1,6 @@
---
- name: Deploy Kafka & Zookeeper clusters
hosts: all
become: true
roles:
- kafka

View file

@ -0,0 +1,6 @@
---
- name: Install Kubernetes components
hosts: all
become: true
roles:
- k8s

View file

@ -0,0 +1,6 @@
---
- name: Prepare servers for Kubernetes
hosts: all
become: true
roles:
- prepare

View file

@ -0,0 +1,8 @@
---
- name: Download Calico manifest
get_url:
url: "https://docs.projectcalico.org/manifests/calico.yaml"
dest: /tmp/calico.yaml
- name: Apply Calico configuration
command: kubectl apply -f /tmp/calico.yaml

View file

@ -0,0 +1,52 @@
---
- name: Install Docker
package:
name: docker.io
version: "{{ docker_version }}"
state: present
- name: Install Kubelet/Kubectl/Kubeadm
package:
name: ["kubelet","kubectl","kubeadm"]
version: "{{ kube_version }}-00"
state: present
- name: Pull images for Kubernetes
command: kubeadm config images pull --kubernetes-version "{{ kube_version }}"
- name: Initiate Kubernetes cluster on Master node
command: >-
kubeadm init
--apiserver-advertise-address={{ ansible_default_ipv4.address }}
--pod-network-cidr={{ pod_network_cidr }}
--ignore-preflight-errors=Swap
register: output
changed_when: "'Kubeadm completed the bootstrap of control plane node' in output.stdout"
run_once: true
delegate_to: master1
- name: Copy admin credentials from master
fetch:
src: "/etc/kubernetes/admin.conf"
dest: "./admin.conf"
flat: yes
run_once: true
delegate_to: master1
- name: Create .kube directory if not exists
file:
path: ~/.kube
state: directory
mode: '0755'
- name: Copy kubeconfig to local machine
copy:
src: ./admin.conf
dest: ~/.kube/config
owner: "{{ ansible_user_id }}"
group: "{{ ansible_user_gid }}"
mode: '0644'
- name: Deploy Calico network plugin
import_role:
name: calico

View file

@ -0,0 +1,14 @@
---
- name: Render Kafka deployment YAML files
template:
src: "{{ item }}.j2"
dest: "/tmp/{{ item }}.yaml"
loop:
- kafka
- zookeeper
- name: Deploy Kafka using Kubernetes resources
command: kubectl apply -f /tmp/{{ item }}.yaml
loop:
- kafka
- zookeeper

View file

@ -0,0 +1,35 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: kafka
spec:
replicas: 2
selector:
matchLabels:
app: kafka
template:
metadata:
labels:
app: kafka
spec:
containers:
- name: kafka
image: kafka:latest
env:
- name: KAFKA_ZOOKEEPER_CONNECT
value: zookeeper-headless:2181
ports:
- containerPort: 9092
---
apiVersion: v1
kind: Service
metadata:
name: kafka-service
spec:
type: NodePort
selector:
app: kafka
ports:
- protocol: TCP
port: 9092
targetPort: 9092

View file

@ -0,0 +1,31 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: zookeeper
spec:
replicas: 2
selector:
matchLabels:
app: zookeeper
template:
metadata:
labels:
app: zookeeper
spec:
containers:
- name: zookeeper
image: zoo:latest
ports:
- containerPort: 2181
---
apiVersion: v1
kind: Service
metadata:
name: zookeeper-headless
spec:
clusterIP: None
selector:
app: zookeeper
ports:
- port: 2181
targetPort: 2181

View file

@ -0,0 +1,34 @@
---
- name: Ensure OS packages are up-to-date
apt:
upgrade: dist
update_cache: yes
- name: Disable swap
sysctl:
name: vm.swappiness
value: 0
state: present
reload: yes
- name: Enable kernel modules required by Kubernetes
modprobe:
name: "{{ item }}"
state: present
loop:
- overlay
- br_netfilter
- name: Load iptables rules permanently
lineinfile:
path: /etc/modules-load.d/kubernetes.conf
create: yes
regexp: '^{{ item }}$'
line: '{{ item }}'
loop:
- overlay
- br_netfilter
- name: Set SELinux in permissive mode
selinux:
state: permissive

View file

@ -1,30 +1,8 @@
FROM python:3.12-slim FROM python:3.12-slim-bookworm:latest
RUN apt-get update
RUN apt-get install -y --no-install-recommends \
build-essential \
cmake \
pkg-config \
git \
libsm6 \
libxext6 \
ffmpeg \
ibgtk2.0-dev \
libavcodec-dev \
libavformat-dev \
libswscale-dev
COPY ./docker_requirements.txt /requirements.txt COPY ./docker_requirements.txt /requirements.txt
RUN pip install -r /requirements.txt RUN pip install -r /requirements.txt
COPY . /app COPY . /app
WORKDIR /app WORKDIR /app
ENTRYPOINT ["python"] ENTRYPOINT ["python"]
CMD ["src/app.py"] CMD ["src/app.py"]
# ENTRYPOINT ["sh", "src/entrypoint.sh"] # WSGI - gunicorn prod run # ENTRYPOINT ["sh", "src/entrypoint.sh"] # WSGI - gunicorn prod run

View file

@ -1,10 +0,0 @@
Build master-node image (on Windows)
```shell
docker buildx build -t master-node .
```
Run master-node
```shell
docker run -p 5010:5010 master-node
```

View file

@ -0,0 +1,13 @@
services:
master-node:
container_name: master_node_container
image: master_node_image
build:
context: ./
dockerfile: Dockerfile
volumes:
- ./src:/app/src
- /var/run/docker.sock:/var/run/docker.sock
networks:
- defaults
restart: unless-stopped

View file

@ -5,11 +5,11 @@ requests-oauthlib == 1.3.0
Flask == 3.0.2 Flask == 3.0.2
Flask-HTTPAuth == 4.8.0 Flask-HTTPAuth == 4.8.0
Flask-APScheduler == 1.13.1 Flask-APScheduler == 1.13.1
numpy == 1.26.2
pandas == 1.4.2 pandas == 1.4.2
Werkzeug == 3.0.1 Werkzeug == 3.0.1
paramiko == 3.4.0 paramiko == 3.4.0
tabulate == 0.9.0 tabulate == 0.9.0
psycopg2-binary == 2.9.9 psycopg2-binary == 2.9.9
pydantic==2.6.3 pydantic==2.6.3
PyYAML == 6.0
websockets==12.0 websockets==12.0

View file

@ -1,4 +1,5 @@
import os import os
import requests
import configparser import configparser
import datetime import datetime
@ -7,7 +8,7 @@ from flask import (
flash, flash,
request, request,
redirect, redirect,
url_for, url_for,
render_template, render_template,
jsonify, jsonify,
make_response, make_response,
@ -18,62 +19,76 @@ from pydantic import ValidationError
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
from werkzeug.security import generate_password_hash, check_password_hash from werkzeug.security import generate_password_hash, check_password_hash
from logger import LoggerFactory
from cluster import Cluster
from cluster_state import AutoState
from custom_modules import reset_to_initial_state, create_custom_containers, stop_all_custom_containers
# --------------------------
# Проинициализируем константы
# --------------------------
ABS_PATH = os.path.dirname(os.path.realpath(__file__)) ABS_PATH = os.path.dirname(os.path.realpath(__file__))
MAX_CONTENT_PATH = 1000000000 MAX_CONTENT_PATH = 1000000000
CONFIG = os.path.join(ABS_PATH, "config.ini") CONFIG = os.path.join(ABS_PATH, "config.ini")
SCHEDULER_API_ENABLED = True SCHEDULER_API_ENABLED = True
SHOW_LOG = True SHOW_LOG = True
from logger import LoggerFactory
LoggerFactory.setting( LoggerFactory.setting(
log_level=os.getenv("LOG_LEVEL", "INFO"), log_level=os.getenv("LOG_LEVEL", "INFO"),
log_format="[%(asctime)s] %(levelname)s (%(name)s - %(funcName)s): %(message)s", log_format="[%(asctime)s] %(levelname)s (%(name)s - %(funcName)s): %(message)s",
show=True, show=True,
) )
from cluster import Cluster
from cluster_state import AutoState
app = Flask(__name__, template_folder=os.path.join(ABS_PATH, "templates"))
app.config["MAX_CONTENT_PATH"] = MAX_CONTENT_PATH
app.config["SCHEDULER_API_ENABLED"] = SCHEDULER_API_ENABLED
app.config["SESSION_TYPE"] = "filesystem"
auth = HTTPBasicAuth() auth = HTTPBasicAuth()
scheduler = APScheduler() scheduler = APScheduler()
app_logger = LoggerFactory.get_logger("APP") app_logger = LoggerFactory.get_logger("APP")
AUTO = AutoState(debug=False) AUTO = AutoState(debug=False)
# @scheduler.task("interval", id="cluster_state", seconds=30, misfire_grace_time=900)
# def cluster_state():
# AUTO.check_cluster_state()
# # app_logger.debug("Finished with auto cluster state")
def get_config() -> dict: def get_config() -> dict:
config = configparser.ConfigParser() config = configparser.ConfigParser()
config.read(CONFIG) config.read(CONFIG)
auth_data = dict(config.items("API")) auth_data = dict(config.items("API"))
return auth_data return auth_data
USER = get_config()["user"] USER = get_config()["user"]
PASSWORD = get_config()["password"] PASSWORD = get_config()["password"]
users = { users = {
USER: generate_password_hash(PASSWORD), USER: generate_password_hash(PASSWORD),
} }
@auth.verify_password @auth.verify_password
def verify_password(username, password): def verify_password(username, password):
if username in users and check_password_hash(users.get(username), password): if username in users and check_password_hash(users.get(username), password):
return username return username
# --------------------------
# Создадим приложение
# --------------------------
app = Flask(__name__, template_folder=os.path.join(ABS_PATH, "templates"))
app.config["MAX_CONTENT_PATH"] = MAX_CONTENT_PATH
app.config["SCHEDULER_API_ENABLED"] = SCHEDULER_API_ENABLED
app.config["SESSION_TYPE"] = "filesystem"
# --------------------------
# Настроим хуки и ручки приложения
# --------------------------
@app.before_first_request
def startup():
# Приведем систему в начальное состояние
# TODO: запуск плейбука, который проверит кодовую базу
# желательно, чтобы он скопировал код и указал путь к корневой директории
# в переменной окружения ML_PATH
reset_to_initial_state()
app_logger.info("master-node запущена!")
@app.teardown_appcontext
def shutdown(exception=None):
# Остановим все контейнеры
stop_all_custom_containers()
app_logger.info("master-node остановлена!")
# curl -u <user>:<pass> -d "playbook=ping_workers&args=hosts=workers" -v http://localhost:5010/api/v1.0/run_ansible # curl -u <user>:<pass> -d "playbook=ping_workers&args=hosts=workers" -v http://localhost:5010/api/v1.0/run_ansible
@app.route("/api/v1.0/run_ansible", methods=["POST"]) @app.route("/api/v1.0/run_ansible", methods=["POST"])
@ -100,8 +115,69 @@ def run_ansible():
app_logger.error(data) app_logger.error(data)
return make_response(jsonify(data), 400) return make_response(jsonify(data), 400)
@app.route('/api/v1.0/interact_with_custom_modules', methods=['POST'])
@auth.login_required
def interact_with_custom_modules():
# Получим данные запроса
data = request.get_json()
# TODO: настроить адрес бекенда
back_url = "https://api.statanly.com:8443"
def run_app(): try:
# TODO: получим токен авторизации
token = requests.post(
f"{back_url}/api/auth/login",
data = {"username": "admin@eatom.ru", "password": "admin"}
).json()['access_token']
# выполним необходимую операция с кастомными модулями
if data["request_type"] == "get_all_modules":
response = requests.get(
f"{back_url}/api/custom-modules",
headers = {"Authorization": f"Bearer {token}"}
)
elif data["request_type"] == "change_status":
response = requests.patch(
f"{back_url}/api/custom-modules/{data['module_id']}",
json = {"status": data["status"]}
)
data = {
"response": response.json(),
"message": "",
"code": "SUCCESS"
}
return make_response(jsonify(data), 200)
except Exception as e:
data = {
"response": "",
"message": f"Cannot interact with custom modules: {e}",
"code": "FAILED"
}
return make_response(jsonify(data), 400)
# --------------------------
# Методы, которыми управляет scheduler
# --------------------------
#@scheduler.task(
# "interval",
# id="cluster_state",
# seconds=30,
# misfire_grace_time=900
#)
#def cluster_state():
# AUTO.check_cluster_state()
# app_logger.debug("Finished with auto cluster state")
scheduler.task(
"interval",
id="cluster_state",
seconds=30,
misfire_grace_time=900
)(create_custom_containers)
if __name__ == "__main__":
port = int(os.environ.get("PORT", 5010)) port = int(os.environ.get("PORT", 5010))
app_logger.info(ABS_PATH) app_logger.info(ABS_PATH)
app.secret_key = get_config()["key"] app.secret_key = get_config()["key"]
@ -112,7 +188,3 @@ def run_app():
app.run(debug=True, host="0.0.0.0", port=port) app.run(debug=True, host="0.0.0.0", port=port)
elif mode == "prod": elif mode == "prod":
app.run(port=port) app.run(port=port)
if __name__ == "__main__":
run_app()

View file

@ -0,0 +1 @@
from .manager import reset_to_initial_state, create_custom_containers, stop_all_custom_containers

View file

@ -0,0 +1,58 @@
import os
def load_env_files(
files: list
)-> dict:
"""Извлечение переменных окружения из файлов"""
env = {}
for file in files:
with open(file) as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"):
key, value = line.split("=", 1)
env[key] = value
return env
def get_env_vars():
# Проверяем наличие кодовой базы
try:
ml_path = os.environ["ML_PATH"]
except KeyError:
raise RuntimeError("Не указан путь к кодовой базе мл модулей")
# Получаем переменные
env_files = [f"{ml_path}/.env", f"{ml_path}/docker/.env.dev", f"{ml_path}/docker/.env.compose_vars"]
all_envs = load_env_files(env_files)
# Формируем аргументы для сборки и запуска контейнеров
build_args = {
"path": f"{ml_path}/src_ml/",
"dockerfile": "docker/custom_modules_api/Dockerfile",
"tag": "",
"buildargs": {
"CUDA_VERSION": all_envs["CUDA_VERSION"],
"TORCH_VERSION": all_envs["TORCH_VERSION"]
}
}
run_envs = {
"API_HOST": '0.0.0.0',
"API_PORT": 8000,
"PROCESSED_INSTANCE_LOG_PERIOD": all_envs['PROCESSED_INSTANCE_LOG_PERIOD'] or 60,
"ALL_BOOTSTRAP_SERVERS": f"{all_envs['INTERNAL_BROKER_NAME']}:9090",
"VIDEO_FRAMES_BOOTSTRAP_SERVERS": f"{all_envs['INTERNAL_BROKER_NAME']}:9090",
"VIDEO_FRAMES_SCHEMA_REGISTRY_URL": f"http://{all_envs['INTERNAL_SCHEMA_REGISTRY_NAME']}:8081",
"ML_RESULT_BOOTSTRAP_SERVERS": f"{all_envs['INTERNAL_BROKER_NAME']}:9090",
"ML_RESULT_SCHEMA_REGISTRY_URL": f"http://{all_envs['INTERNAL_SCHEMA_REGISTRY_NAME']}:8081",
# Настройки подключения к базе настроек
"REMOTE_SETTINGS_REDIS_SERVER_HOST": all_envs['ML_SETTINGS_REDIS_NAME'],
"REMOTE_SETTINGS_REDIS_SERVER_PORT": all_envs['ML_SETTINGS_REDIS_PORT'] or 6379,
"REMOTE_SETTINGS_REDIS_SERVER_PASSWORD": all_envs['ML_SETTINGS_REDIS_PASSWORD', 'password123'],
"REMOTE_SETTINGS_MODEL_CONTEXT_PREFIX": 'models_',
# Доступ к хранению весов модели в MinIO
"WEIGHTS_MINIO_SERVERS": f"{all_envs['FRAME_MINIO_HOST']}:{all_envs['FRAME_MINIO_PORT']}",
"WEIGHTS_MINIO_USER": all_envs['MINIO_ROOT_USER', 'minioadmin'],
"WEIGHTS_MINIO_PASSWORD": all_envs['MINIO_ROOT_PASSWORD', 'minioadmin']
}
return all_envs, build_args, run_envs

View file

@ -0,0 +1,124 @@
from multiprocessing import Process
import os
import requests
from .manager_methods import start_container, stop_container
from .get_env_vars import get_env_vars
from ..logger import LoggerFactory
manager_logger = LoggerFactory.get_logger("CustomModuleManager")
def get_all_modules():
response = requests.post(
f"http://localhost:{int(os.environ.get('PORT', 5010))}/api/v1.0/interact_with_custom_modules",
data = {"request_type": "get_all_modules", "module_id": None, "status": None}
)
if response.status_code != 200:
manager_logger.warning(f"Не удалось получить информацию о кастомных модулях. Код: {response['message']}")
return
return response.json()
def reset_to_initial_state():
# получим информацию о кастомных модулей
custom_modules = get_all_modules()
if not custom_modules:
return
# получим переменные окружения
try:
all_envs, _, _ = get_env_vars()
except Exception as e:
manager_logger.warning(f"Не удалось получить переменные окружения: {e}")
return
# проходимся по всем модулям (id, title, is_SIZ, status, model)
processed_modules = []
for module in custom_modules:
# инициализируем переменные контейнера
container_name = f"{all_envs['COMPOSE_PROJECT_NAME']}_custom_module{module['id']}"
# останавливаем контейнер в отдельном процессе
p = Process(target=stop_container,
args=(
module['id'],
container_name,
)
)
p.start()
processed_modules.append(p)
# Завершаем все запущенные процессы
for p in processed_modules:
p.join()
def create_custom_containers():
# получим информацию о кастомных модулей
custom_modules = get_all_modules()
if not custom_modules:
return
# получим переменные окружения
try:
all_envs, build_args, run_envs = get_env_vars()
except Exception as e:
manager_logger.warning(f"Не удалось получить переменные окружения: {e}")
return
# проходимся по всем модулям (id, title, is_SIZ, status, model)
processed_modules = []
for module in custom_modules:
if module["status"] in ["остановлен", "не создан"] and module["model"]["weights"].strip():
# инициализируем переменные контейнера
image_name = f"statanly/{all_envs['PROJECT_NAME']}/ml/custom_module{module['id']}/{all_envs['BUILD_NAME']}:latest"
container_name = f"{all_envs['COMPOSE_PROJECT_NAME']}_custom_module{module['id']}"
build_args["tag"] = image_name
# запускаем контейнер в отдельном процессе
p = Process(target=start_container,
args=(
module['id'],
image_name,
build_args,
container_name,
all_envs["GLOBAL_NET_NAME"],
run_envs,
)
)
p.start()
processed_modules.append(p)
# Завершаем все запущенные процессы
for p in processed_modules:
p.join()
def stop_all_custom_containers():
# получим информацию о кастомных модулей
custom_modules = get_all_modules()
if not custom_modules:
return
# получим переменные окружения
try:
all_envs, _, _ = get_env_vars()
except Exception as e:
manager_logger.warning(f"Не удалось получить переменные окружения: {e}")
return
# проходимся по всем модулям (id, title, is_SIZ, status, model)
processed_modules = []
for module in custom_modules:
if module["status"] in ["работает"]:
# инициализируем переменные контейнера
container_name = f"{all_envs['COMPOSE_PROJECT_NAME']}_custom_module{module['id']}"
# останавливаем контейнер в отдельном процессе
p = Process(target=stop_container,
args=(
module['id'],
container_name,
)
)
p.start()
processed_modules.append(p)
# Завершаем все запущенные процессы
for p in processed_modules:
p.join()

View file

@ -0,0 +1,95 @@
import requests
import docker
import os
from ..logger import LoggerFactory
manager_logger = LoggerFactory.get_logger("CustomModuleManager")
def change_module_status(
module_id: int,
status: str
):
_ = requests.post(
f"http://localhost:{int(os.environ.get('PORT', 5010))}/api/v1.0/interact_with_custom_modules",
data = {"request_type": "change_status", "model_id": module_id, "status": status}
)
def build_image(
client: docker.DockerClient,
path: str,
dockerfile: str,
tag: str,
buildargs: dict
):
"""Сборка docker контейнеров"""
_, logs = client.images.build(
path = path,
dockerfile = dockerfile,
tag = tag,
buildargs = buildargs,
decode = True
)
for chunk in logs:
if 'stream' in chunk:
manager_logger.info(chunk['stream'].strip())
def start_container(
module_id: int,
image_name: str,
build_args: dict,
container_name: str,
network_name: str,
env_vars: dict,
gpus = True,
detach = True
):
"""Запуск docker контейнеров"""
# инициализируем клиента
client = docker.from_env()
# Соберем контейнер
try:
change_module_status(module_id, status = "работает")
build_image(client, **build_args)
manager_logger.info(f"Контейнер '{container_name}' успешно собран")
except Exception as e:
change_module_status(module_id, status = "не создан")
manager_logger.error(f"Ошибка при сборке контейнера: {e}")
return
# Укажем устройства исполнения
device_requests = [docker.types.DeviceRequest(count=-1, capabilities=[['gpu']])] if gpus else None
# Запустим контейнер
try:
container = client.containers.run(
image = image_name,
name = container_name,
network = client.networks.get(network_name),
environment = env_vars,
device_requests=device_requests,
detach = detach,
)
manager_logger.info(f"Контейнер '{container_name}' с ID: {container.short_id} запустился")
except Exception as e:
change_module_status(module_id, status = "остановлен")
manager_logger.error(f"Ошибка при запуске контейнера: {e}")
def stop_container(
module_id: int,
name: str
):
"""Удаление docker контейнеров по имени"""
# инициализируем клиента
client = docker.from_env()
# Остановим контейнер
try:
container = client.containers.get(name)
container.stop()
container.remove()
manager_logger.info(f"Контейнер '{name}' был остановлен")
change_module_status(module_id, status = "остановлен")
except docker.errors.NotFound:
manager_logger.warning(f"Контейнер '{name}' не найден")