first commit
This commit is contained in:
121
config/config.yaml
Normal file
121
config/config.yaml
Normal file
@@ -0,0 +1,121 @@
|
||||
# ServerGuard 配置文件
|
||||
|
||||
# 应用设置
|
||||
app:
|
||||
name: "ServerGuard"
|
||||
version: "1.0.0"
|
||||
description: "服务器硬件健康诊断系统"
|
||||
|
||||
# 日志设置
|
||||
logging:
|
||||
level: INFO # DEBUG, INFO, WARNING, ERROR
|
||||
file: "/var/log/serverguard.log"
|
||||
max_size_mb: 100
|
||||
backup_count: 5
|
||||
console_output: true
|
||||
|
||||
# 报告设置
|
||||
report:
|
||||
default_format: "text" # text, json, csv, html
|
||||
output_directory: "./reports"
|
||||
include_timestamp: true
|
||||
max_report_size_mb: 10
|
||||
|
||||
# 检测模块设置
|
||||
modules:
|
||||
# CPU 检测设置
|
||||
cpu:
|
||||
enabled: true
|
||||
temperature_warning: 85 # 温度警告阈值(摄氏度)
|
||||
temperature_critical: 95 # 温度危险阈值(摄氏度)
|
||||
stress_test:
|
||||
duration_seconds: 300 # 压力测试持续时间
|
||||
check_mce: true # 检查 MCE 错误
|
||||
|
||||
# 内存检测设置
|
||||
memory:
|
||||
enabled: true
|
||||
memtester:
|
||||
enabled: true
|
||||
memory_percent: 70 # 使用可用内存的百分比进行测试
|
||||
stress_test:
|
||||
duration_seconds: 300
|
||||
check_ecc: true # 检查 ECC 错误
|
||||
|
||||
# 存储检测设置
|
||||
storage:
|
||||
enabled: true
|
||||
smart_check: true
|
||||
check_reallocated_sectors: true
|
||||
reallocated_threshold: 1 # 重映射扇区警告阈值
|
||||
temperature_warning: 60 # 硬盘温度警告阈值
|
||||
temperature_critical: 70 # 硬盘温度危险阈值
|
||||
run_io_test: false # 是否运行 I/O 性能测试(耗时)
|
||||
io_test_size_mb: 100
|
||||
check_raid: true # 检查 RAID 状态
|
||||
|
||||
# 传感器检测设置
|
||||
sensors:
|
||||
enabled: true
|
||||
lm_sensors: true
|
||||
ipmi: true
|
||||
check_fans: true
|
||||
fan_min_rpm: 500 # 风扇最低转速警告阈值
|
||||
voltage_tolerance: 0.1 # 电压偏差容忍度(比例)
|
||||
|
||||
# GPU 检测设置
|
||||
gpu:
|
||||
enabled: true
|
||||
check_nvidia: true
|
||||
check_amd: true
|
||||
check_intel: true
|
||||
temperature_warning: 85
|
||||
|
||||
# 日志分析设置
|
||||
log_analyzer:
|
||||
enabled: true
|
||||
check_dmesg: true
|
||||
check_journalctl: true
|
||||
max_lines: 5000
|
||||
lookback_days: 7 # 分析最近几天的日志
|
||||
|
||||
# 告警设置
|
||||
alerts:
|
||||
enabled: false
|
||||
smtp:
|
||||
host: ""
|
||||
port: 587
|
||||
username: ""
|
||||
password: ""
|
||||
use_tls: true
|
||||
from_address: "serverguard@example.com"
|
||||
to_addresses: []
|
||||
|
||||
webhook:
|
||||
enabled: false
|
||||
url: ""
|
||||
headers: {}
|
||||
|
||||
# 告警阈值
|
||||
thresholds:
|
||||
cpu_temperature: 85
|
||||
memory_usage_percent: 90
|
||||
disk_usage_percent: 90
|
||||
hardware_error_count: 1
|
||||
|
||||
# 压力测试设置(全面诊断模式)
|
||||
stress_test:
|
||||
cpu:
|
||||
enabled: true
|
||||
workers: 0 # 0 表示使用所有核心
|
||||
timeout_seconds: 300
|
||||
|
||||
memory:
|
||||
enabled: true
|
||||
workers: 4
|
||||
timeout_seconds: 300
|
||||
|
||||
io:
|
||||
enabled: false # I/O 压力测试可能很危险,默认关闭
|
||||
workers: 4
|
||||
timeout_seconds: 300
|
||||
Reference in New Issue
Block a user