4.5.3 系统服务管理 #
现代 Linux 系统主要使用 systemd 作为系统和服务管理器。systemd 提供了强大的服务管理功能,包括服务启动、停止、重启、状态监控、日志管理等。本节将详细介绍如何将 Go 程序集成到 systemd 中,创建专业的系统服务。
systemd 基础概念 #
systemd 单元类型 #
systemd 使用单元(Unit)来管理系统资源,常见的单元类型包括:
package main
import (
"fmt"
"os/exec"
"strings"
)
// SystemdUnitType systemd 单元类型
type SystemdUnitType struct {
Name string
Extension string
Description string
Examples []string
}
func main() {
unitTypes := []SystemdUnitType{
{
Name: "Service",
Extension: ".service",
Description: "系统服务和应用程序",
Examples: []string{"nginx.service", "mysql.service", "sshd.service"},
},
{
Name: "Socket",
Extension: ".socket",
Description: "套接字激活的服务",
Examples: []string{"docker.socket", "systemd-journald.socket"},
},
{
Name: "Target",
Extension: ".target",
Description: "单元组,类似于运行级别",
Examples: []string{"multi-user.target", "graphical.target"},
},
{
Name: "Timer",
Extension: ".timer",
Description: "定时器,类似于 cron",
Examples: []string{"logrotate.timer", "systemd-tmpfiles-clean.timer"},
},
{
Name: "Mount",
Extension: ".mount",
Description: "文件系统挂载点",
Examples: []string{"tmp.mount", "home.mount"},
},
{
Name: "Path",
Extension: ".path",
Description: "路径监控",
Examples: []string{"systemd-ask-password-console.path"},
},
}
fmt.Println("=== systemd 单元类型 ===")
for _, unit := range unitTypes {
fmt.Printf("\n%s (%s):\n", unit.Name, unit.Extension)
fmt.Printf(" 描述: %s\n", unit.Description)
fmt.Printf(" 示例: %s\n", strings.Join(unit.Examples, ", "))
}
// 显示当前系统的服务状态
fmt.Println("\n=== 当前系统服务状态 ===")
showSystemdStatus()
}
func showSystemdStatus() {
// 获取系统服务列表
cmd := exec.Command("systemctl", "list-units", "--type=service", "--state=running", "--no-pager", "--no-legend")
output, err := cmd.Output()
if err != nil {
fmt.Printf("获取服务列表失败: %v\n", err)
return
}
lines := strings.Split(string(output), "\n")
count := 0
for _, line := range lines {
if strings.TrimSpace(line) != "" && count < 5 {
fields := strings.Fields(line)
if len(fields) >= 4 {
fmt.Printf(" %s - %s\n", fields[0], fields[3])
count++
}
}
}
if count == 5 {
fmt.Println(" ... (更多服务)")
}
}
systemd 服务状态 #
systemd 服务具有多种状态,了解这些状态对于服务管理很重要:
package main
import (
"fmt"
"os/exec"
"strings"
)
// ServiceState 服务状态
type ServiceState struct {
Name string
Description string
Color string
}
func main() {
states := []ServiceState{
{"active (running)", "服务正在运行", "绿色"},
{"active (exited)", "服务已成功执行并退出", "绿色"},
{"active (waiting)", "服务正在等待事件", "绿色"},
{"inactive (dead)", "服务未运行", "白色"},
{"activating (start)", "服务正在启动", "黄色"},
{"deactivating (stop)", "服务正在停止", "黄色"},
{"failed", "服务启动失败", "红色"},
{"maintenance", "服务处于维护模式", "红色"},
}
fmt.Println("=== systemd 服务状态 ===")
for _, state := range states {
fmt.Printf("%-25s: %s (%s)\n", state.Name, state.Description, state.Color)
}
// 演示服务状态查询
fmt.Println("\n=== 服务状态查询示例 ===")
demonstrateServiceStatus()
}
func demonstrateServiceStatus() {
services := []string{"sshd", "systemd-journald", "dbus"}
for _, service := range services {
cmd := exec.Command("systemctl", "is-active", service)
output, _ := cmd.Output()
status := strings.TrimSpace(string(output))
cmd2 := exec.Command("systemctl", "is-enabled", service)
output2, _ := cmd2.Output()
enabled := strings.TrimSpace(string(output2))
fmt.Printf("服务: %-20s 状态: %-10s 开机启动: %s\n", service, status, enabled)
}
}
创建 systemd 服务 #
基本服务配置 #
让我们创建一个完整的 Go 应用程序并将其配置为 systemd 服务:
// main.go - Go 应用程序
package main
import (
"context"
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"os/signal"
"syscall"
"time"
)
// Config 应用配置
type Config struct {
Port int `json:"port"`
LogLevel string `json:"log_level"`
DataDir string `json:"data_dir"`
}
// Application 应用程序结构
type Application struct {
config *Config
server *http.Server
logger *log.Logger
}
// NewApplication 创建应用程序实例
func NewApplication(configFile string) (*Application, error) {
config, err := loadConfig(configFile)
if err != nil {
return nil, err
}
logger := log.New(os.Stdout, "[myservice] ", log.LstdFlags)
return &Application{
config: config,
logger: logger,
}, nil
}
// loadConfig 加载配置文件
func loadConfig(configFile string) (*Config, error) {
data, err := os.ReadFile(configFile)
if err != nil {
return nil, err
}
var config Config
if err := json.Unmarshal(data, &config); err != nil {
return nil, err
}
// 设置默认值
if config.Port == 0 {
config.Port = 8080
}
if config.LogLevel == "" {
config.LogLevel = "INFO"
}
if config.DataDir == "" {
config.DataDir = "/var/lib/myservice"
}
return &config, nil
}
// Start 启动应用程序
func (app *Application) Start() error {
app.logger.Printf("启动服务,端口: %d", app.config.Port)
// 创建 HTTP 服务器
mux := http.NewServeMux()
mux.HandleFunc("/", app.handleRoot)
mux.HandleFunc("/health", app.handleHealth)
mux.HandleFunc("/status", app.handleStatus)
app.server = &http.Server{
Addr: fmt.Sprintf(":%d", app.config.Port),
Handler: mux,
}
// 启动服务器
go func() {
if err := app.server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
app.logger.Fatalf("服务器启动失败: %v", err)
}
}()
app.logger.Println("服务启动成功")
// 等待信号
return app.waitForSignal()
}
// handleRoot 处理根路径
func (app *Application) handleRoot(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "MyService is running!\nTime: %s\n", time.Now().Format("2006-01-02 15:04:05"))
}
// handleHealth 健康检查
func (app *Application) handleHealth(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
fmt.Fprintf(w, "OK")
}
// handleStatus 状态信息
func (app *Application) handleStatus(w http.ResponseWriter, r *http.Request) {
status := map[string]interface{}{
"service": "myservice",
"version": "1.0.0",
"uptime": time.Since(startTime).String(),
"pid": os.Getpid(),
"config": app.config,
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(status)
}
// waitForSignal 等待系统信号
func (app *Application) waitForSignal() error {
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP)
for {
sig := <-sigChan
app.logger.Printf("接收到信号: %s", sig)
switch sig {
case syscall.SIGINT, syscall.SIGTERM:
app.logger.Println("开始优雅关闭...")
return app.shutdown()
case syscall.SIGHUP:
app.logger.Println("重新加载配置...")
// 这里可以添加配置重载逻辑
}
}
}
// shutdown 优雅关闭
func (app *Application) shutdown() error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
app.logger.Println("关闭 HTTP 服务器...")
if err := app.server.Shutdown(ctx); err != nil {
app.logger.Printf("服务器关闭失败: %v", err)
return err
}
app.logger.Println("服务已关闭")
return nil
}
var startTime = time.Now()
func main() {
if len(os.Args) < 2 {
fmt.Printf("用法: %s <config-file>\n", os.Args[0])
os.Exit(1)
}
app, err := NewApplication(os.Args[1])
if err != nil {
log.Fatalf("创建应用程序失败: %v", err)
}
if err := app.Start(); err != nil {
log.Fatalf("应用程序运行失败: %v", err)
}
}
systemd 服务单元文件 #
创建 systemd 服务单元文件:
# /etc/systemd/system/myservice.service
[Unit]
Description=My Go Service
Documentation=https://github.com/mycompany/myservice
After=network.target
Wants=network.target
[Service]
Type=simple
User=myservice
Group=myservice
WorkingDirectory=/opt/myservice
ExecStart=/opt/myservice/bin/myservice /etc/myservice/config.json
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
RestartSec=5
StandardOutput=journal
StandardError=journal
SyslogIdentifier=myservice
# 安全设置
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/var/lib/myservice /var/log/myservice
# 资源限制
LimitNOFILE=65536
LimitNPROC=4096
# 环境变量
Environment=GO_ENV=production
Environment=LOG_LEVEL=info
[Install]
WantedBy=multi-user.target
服务安装脚本 #
创建服务安装和管理脚本:
#!/bin/bash
# install-service.sh
set -e
SERVICE_NAME="myservice"
SERVICE_USER="myservice"
SERVICE_GROUP="myservice"
INSTALL_DIR="/opt/myservice"
CONFIG_DIR="/etc/myservice"
DATA_DIR="/var/lib/myservice"
LOG_DIR="/var/log/myservice"
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# 日志函数
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 检查是否为 root 用户
check_root() {
if [[ $EUID -ne 0 ]]; then
log_error "此脚本需要 root 权限运行"
exit 1
fi
}
# 创建用户和组
create_user() {
if ! id "$SERVICE_USER" &>/dev/null; then
log_info "创建用户: $SERVICE_USER"
useradd --system --no-create-home --shell /bin/false "$SERVICE_USER"
else
log_info "用户 $SERVICE_USER 已存在"
fi
}
# 创建目录
create_directories() {
log_info "创建目录结构..."
mkdir -p "$INSTALL_DIR/bin"
mkdir -p "$CONFIG_DIR"
mkdir -p "$DATA_DIR"
mkdir -p "$LOG_DIR"
# 设置权限
chown -R "$SERVICE_USER:$SERVICE_GROUP" "$INSTALL_DIR"
chown -R "$SERVICE_USER:$SERVICE_GROUP" "$DATA_DIR"
chown -R "$SERVICE_USER:$SERVICE_GROUP" "$LOG_DIR"
chmod 755 "$INSTALL_DIR"
chmod 755 "$CONFIG_DIR"
chmod 750 "$DATA_DIR"
chmod 750 "$LOG_DIR"
}
# 安装二进制文件
install_binary() {
if [[ ! -f "myservice" ]]; then
log_error "找不到二进制文件 myservice"
exit 1
fi
log_info "安装二进制文件..."
cp myservice "$INSTALL_DIR/bin/"
chmod 755 "$INSTALL_DIR/bin/myservice"
chown "$SERVICE_USER:$SERVICE_GROUP" "$INSTALL_DIR/bin/myservice"
}
# 安装配置文件
install_config() {
log_info "安装配置文件..."
cat > "$CONFIG_DIR/config.json" << EOF
{
"port": 8080,
"log_level": "info",
"data_dir": "$DATA_DIR"
}
EOF
chmod 644 "$CONFIG_DIR/config.json"
chown root:root "$CONFIG_DIR/config.json"
}
# 安装 systemd 服务文件
install_systemd_service() {
log_info "安装 systemd 服务文件..."
cat > "/etc/systemd/system/$SERVICE_NAME.service" << EOF
[Unit]
Description=My Go Service
Documentation=https://github.com/mycompany/myservice
After=network.target
Wants=network.target
[Service]
Type=simple
User=$SERVICE_USER
Group=$SERVICE_GROUP
WorkingDirectory=$INSTALL_DIR
ExecStart=$INSTALL_DIR/bin/myservice $CONFIG_DIR/config.json
ExecReload=/bin/kill -HUP \$MAINPID
Restart=always
RestartSec=5
StandardOutput=journal
StandardError=journal
SyslogIdentifier=$SERVICE_NAME
# 安全设置
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=$DATA_DIR $LOG_DIR
# 资源限制
LimitNOFILE=65536
LimitNPROC=4096
# 环境变量
Environment=GO_ENV=production
Environment=LOG_LEVEL=info
[Install]
WantedBy=multi-user.target
EOF
# 重新加载 systemd
systemctl daemon-reload
}
# 启用并启动服务
enable_service() {
log_info "启用服务..."
systemctl enable "$SERVICE_NAME"
log_info "启动服务..."
systemctl start "$SERVICE_NAME"
# 检查服务状态
sleep 2
if systemctl is-active --quiet "$SERVICE_NAME"; then
log_info "服务启动成功"
systemctl status "$SERVICE_NAME" --no-pager
else
log_error "服务启动失败"
systemctl status "$SERVICE_NAME" --no-pager
exit 1
fi
}
# 卸载服务
uninstall_service() {
log_info "卸载服务..."
# 停止并禁用服务
if systemctl is-active --quiet "$SERVICE_NAME"; then
systemctl stop "$SERVICE_NAME"
fi
if systemctl is-enabled --quiet "$SERVICE_NAME"; then
systemctl disable "$SERVICE_NAME"
fi
# 删除服务文件
rm -f "/etc/systemd/system/$SERVICE_NAME.service"
systemctl daemon-reload
# 删除文件和目录
rm -rf "$INSTALL_DIR"
rm -rf "$CONFIG_DIR"
rm -rf "$DATA_DIR"
rm -rf "$LOG_DIR"
# 删除用户
if id "$SERVICE_USER" &>/dev/null; then
userdel "$SERVICE_USER"
fi
log_info "服务卸载完成"
}
# 显示服务状态
show_status() {
echo "=== 服务状态 ==="
systemctl status "$SERVICE_NAME" --no-pager
echo -e "\n=== 服务日志 (最近10行) ==="
journalctl -u "$SERVICE_NAME" -n 10 --no-pager
}
# 主函数
main() {
case "${1:-install}" in
install)
check_root
create_user
create_directories
install_binary
install_config
install_systemd_service
enable_service
log_info "安装完成!"
;;
uninstall)
check_root
uninstall_service
;;
status)
show_status
;;
*)
echo "用法: $0 {install|uninstall|status}"
exit 1
;;
esac
}
main "$@"
高级服务配置 #
服务依赖和启动顺序 #
# /etc/systemd/system/myservice.service
[Unit]
Description=My Go Service with Dependencies
Documentation=https://github.com/mycompany/myservice
# 网络依赖
After=network.target network-online.target
Wants=network-online.target
# 数据库依赖
After=mysql.service postgresql.service
Wants=mysql.service
# Redis 依赖
After=redis.service
Requires=redis.service
# 确保在多用户模式之前启动
Before=multi-user.target
[Service]
Type=notify
User=myservice
Group=myservice
WorkingDirectory=/opt/myservice
# 启动命令
ExecStart=/opt/myservice/bin/myservice /etc/myservice/config.json
# 启动前检查
ExecStartPre=/opt/myservice/bin/myservice --check-config /etc/myservice/config.json
# 重载配置
ExecReload=/bin/kill -HUP $MAINPID
# 停止命令
ExecStop=/bin/kill -TERM $MAINPID
# 重启策略
Restart=always
RestartSec=10
StartLimitInterval=60
StartLimitBurst=3
# 超时设置
TimeoutStartSec=60
TimeoutStopSec=30
# 输出设置
StandardOutput=journal
StandardError=journal
SyslogIdentifier=myservice
# 安全设置
NoNewPrivileges=true
PrivateTmp=true
PrivateDevices=true
ProtectSystem=strict
ProtectHome=true
ProtectKernelTunables=true
ProtectControlGroups=true
RestrictRealtime=true
RestrictSUIDSGID=true
# 网络设置
PrivateNetwork=false
RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX
# 文件系统访问
ReadWritePaths=/var/lib/myservice /var/log/myservice /tmp
ReadOnlyPaths=/etc/myservice
# 资源限制
LimitNOFILE=65536
LimitNPROC=4096
LimitCORE=0
# 环境变量
Environment=GO_ENV=production
Environment=LOG_LEVEL=info
EnvironmentFile=-/etc/myservice/environment
[Install]
WantedBy=multi-user.target
Also=myservice-backup.timer
支持 systemd 通知的 Go 服务 #
package main
import (
"context"
"fmt"
"log"
"net"
"net/http"
"os"
"os/signal"
"syscall"
"time"
"github.com/coreos/go-systemd/v22/daemon"
"github.com/coreos/go-systemd/v22/journal"
)
// SystemdService 支持 systemd 的服务
type SystemdService struct {
server *http.Server
logger *log.Logger
startTime time.Time
watchdog *time.Ticker
}
// NewSystemdService 创建新的 systemd 服务
func NewSystemdService() *SystemdService {
// 使用 systemd journal 作为日志输出
logger := log.New(os.Stdout, "", 0)
return &SystemdService{
logger: logger,
startTime: time.Now(),
}
}
// Start 启动服务
func (s *SystemdService) Start() error {
s.logInfo("服务启动中...")
// 创建 HTTP 服务器
mux := http.NewServeMux()
mux.HandleFunc("/", s.handleRoot)
mux.HandleFunc("/health", s.handleHealth)
mux.HandleFunc("/metrics", s.handleMetrics)
s.server = &http.Server{
Addr: ":8080",
Handler: mux,
}
// 启动服务器
listener, err := net.Listen("tcp", s.server.Addr)
if err != nil {
return fmt.Errorf("监听端口失败: %v", err)
}
go func() {
if err := s.server.Serve(listener); err != nil && err != http.ErrServerClosed {
s.logError("服务器运行失败: %v", err)
}
}()
// 通知 systemd 服务已就绪
if supported, err := daemon.SdNotify(false, daemon.SdNotifyReady); err != nil {
s.logError("通知 systemd 失败: %v", err)
} else if supported {
s.logInfo("已通知 systemd 服务就绪")
}
// 启动 watchdog
s.startWatchdog()
s.logInfo("服务启动完成,监听端口 :8080")
// 等待信号
return s.waitForSignal()
}
// startWatchdog 启动 systemd watchdog
func (s *SystemdService) startWatchdog() {
// 获取 watchdog 间隔
interval, err := daemon.SdWatchdogEnabled(false)
if err != nil {
s.logError("获取 watchdog 间隔失败: %v", err)
return
}
if interval == 0 {
s.logInfo("Watchdog 未启用")
return
}
s.logInfo("启动 watchdog,间隔: %v", interval)
// 设置 watchdog 定时器(间隔的一半)
s.watchdog = time.NewTicker(interval / 2)
go func() {
for range s.watchdog.C {
// 检查服务健康状态
if s.isHealthy() {
// 通知 systemd 服务正常
daemon.SdNotify(false, daemon.SdNotifyWatchdog)
} else {
s.logError("服务健康检查失败")
// 不发送 watchdog 通知,让 systemd 重启服务
}
}
}()
}
// isHealthy 检查服务健康状态
func (s *SystemdService) isHealthy() bool {
// 这里可以添加具体的健康检查逻辑
// 例如:检查数据库连接、检查关键组件状态等
// 简单的健康检查:尝试连接自己的健康检查端点
client := &http.Client{Timeout: 5 * time.Second}
resp, err := client.Get("http://localhost:8080/health")
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}
// handleRoot 处理根路径
func (s *SystemdService) handleRoot(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "SystemdService is running!\nUptime: %v\n", time.Since(s.startTime))
}
// handleHealth 健康检查
func (s *SystemdService) handleHealth(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
fmt.Fprintf(w, "OK")
}
// handleMetrics 指标端点
func (s *SystemdService) handleMetrics(w http.ResponseWriter, r *http.Request) {
metrics := map[string]interface{}{
"uptime_seconds": time.Since(s.startTime).Seconds(),
"pid": os.Getpid(),
"goroutines": runtime.NumGoroutine(),
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(metrics)
}
// waitForSignal 等待系统信号
func (s *SystemdService) waitForSignal() error {
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP)
for {
sig := <-sigChan
s.logInfo("接收到信号: %s", sig)
switch sig {
case syscall.SIGINT, syscall.SIGTERM:
s.logInfo("开始优雅关闭...")
return s.shutdown()
case syscall.SIGHUP:
s.logInfo("重新加载配置...")
s.reloadConfig()
}
}
}
// shutdown 优雅关闭
func (s *SystemdService) shutdown() error {
// 通知 systemd 服务正在停止
daemon.SdNotify(false, daemon.SdNotifyStopping)
// 停止 watchdog
if s.watchdog != nil {
s.watchdog.Stop()
}
// 关闭 HTTP 服务器
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
s.logInfo("关闭 HTTP 服务器...")
if err := s.server.Shutdown(ctx); err != nil {
s.logError("服务器关闭失败: %v", err)
return err
}
s.logInfo("服务已关闭")
return nil
}
// reloadConfig 重新加载配置
func (s *SystemdService) reloadConfig() {
s.logInfo("重新加载配置...")
// 这里添加配置重载逻辑
// 通知 systemd 配置已重载
daemon.SdNotify(false, daemon.SdNotifyReloading)
time.Sleep(1 * time.Second) // 模拟重载时间
daemon.SdNotify(false, daemon.SdNotifyReady)
}
// 日志方法
func (s *SystemdService) logInfo(format string, args ...interface{}) {
msg := fmt.Sprintf(format, args...)
s.logger.Printf("[INFO] %s", msg)
// 发送到 systemd journal
journal.Send(msg, journal.PriInfo, nil)
}
func (s *SystemdService) logError(format string, args ...interface{}) {
msg := fmt.Sprintf(format, args...)
s.logger.Printf("[ERROR] %s", msg)
// 发送到 systemd journal
journal.Send(msg, journal.PriErr, nil)
}
func main() {
service := NewSystemdService()
if err := service.Start(); err != nil {
log.Fatalf("服务启动失败: %v", err)
}
}
带 Watchdog 的服务配置 #
# /etc/systemd/system/myservice.service
[Unit]
Description=My Go Service with Watchdog
After=network.target
[Service]
Type=notify
User=myservice
Group=myservice
WorkingDirectory=/opt/myservice
ExecStart=/opt/myservice/bin/myservice
Restart=always
RestartSec=10
# Watchdog 配置
WatchdogSec=30
NotifyAccess=main
# 健康检查失败时的处理
StartLimitInterval=60
StartLimitBurst=3
[Install]
WantedBy=multi-user.target
服务监控和日志 #
日志管理 #
package main
import (
"fmt"
"log/syslog"
"os"
"os/exec"
"strings"
)
// LogManager 日志管理器
type LogManager struct {
serviceName string
syslogger *syslog.Writer
}
// NewLogManager 创建日志管理器
func NewLogManager(serviceName string) (*LogManager, error) {
// 连接到系统日志
syslogger, err := syslog.New(syslog.LOG_INFO|syslog.LOG_DAEMON, serviceName)
if err != nil {
return nil, err
}
return &LogManager{
serviceName: serviceName,
syslogger: syslogger,
}, nil
}
// LogInfo 记录信息日志
func (lm *LogManager) LogInfo(message string) {
lm.syslogger.Info(message)
}
// LogError 记录错误日志
func (lm *LogManager) LogError(message string) {
lm.syslogger.Err(message)
}
// LogWarning 记录警告日志
func (lm *LogManager) LogWarning(message string) {
lm.syslogger.Warning(message)
}
// GetLogs 获取服务日志
func (lm *LogManager) GetLogs(lines int) ([]string, error) {
cmd := exec.Command("journalctl", "-u", lm.serviceName, "-n", fmt.Sprintf("%d", lines), "--no-pager")
output, err := cmd.Output()
if err != nil {
return nil, err
}
return strings.Split(string(output), "\n"), nil
}
// FollowLogs 实时跟踪日志
func (lm *LogManager) FollowLogs() error {
cmd := exec.Command("journalctl", "-u", lm.serviceName, "-f")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
return cmd.Run()
}
// RotateLogs 轮转日志
func (lm *LogManager) RotateLogs() error {
cmd := exec.Command("systemctl", "kill", "-s", "USR1", lm.serviceName)
return cmd.Run()
}
func main() {
lm, err := NewLogManager("myservice")
if err != nil {
fmt.Printf("创建日志管理器失败: %v\n", err)
return
}
defer lm.syslogger.Close()
// 演示日志功能
fmt.Println("=== 日志管理示例 ===")
// 记录不同级别的日志
lm.LogInfo("服务启动")
lm.LogWarning("这是一个警告")
lm.LogError("这是一个错误")
// 获取最近的日志
logs, err := lm.GetLogs(10)
if err != nil {
fmt.Printf("获取日志失败: %v\n", err)
return
}
fmt.Println("\n最近的日志:")
for _, line := range logs {
if strings.TrimSpace(line) != "" {
fmt.Println(line)
}
}
}
服务监控脚本 #
#!/bin/bash
# monitor-service.sh
SERVICE_NAME="myservice"
ALERT_EMAIL="[email protected]"
LOG_FILE="/var/log/service-monitor.log"
# 日志函数
log_message() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE"
}
# 检查服务状态
check_service_status() {
if systemctl is-active --quiet "$SERVICE_NAME"; then
return 0
else
return 1
fi
}
# 检查服务健康状态
check_service_health() {
local health_url="http://localhost:8080/health"
local response=$(curl -s -o /dev/null -w "%{http_code}" "$health_url" 2>/dev/null)
if [[ "$response" == "200" ]]; then
return 0
else
return 1
fi
}
# 获取服务指标
get_service_metrics() {
local metrics_url="http://localhost:8080/metrics"
curl -s "$metrics_url" 2>/dev/null
}
# 重启服务
restart_service() {
log_message "重启服务: $SERVICE_NAME"
systemctl restart "$SERVICE_NAME"
# 等待服务启动
sleep 5
if check_service_status; then
log_message "服务重启成功"
return 0
else
log_message "服务重启失败"
return 1
fi
}
# 发送告警
send_alert() {
local subject="$1"
local message="$2"
echo "$message" | mail -s "$subject" "$ALERT_EMAIL"
log_message "发送告警: $subject"
}
# 主监控逻辑
monitor_service() {
local restart_count=0
local max_restarts=3
while true; do
if ! check_service_status; then
log_message "服务 $SERVICE_NAME 未运行"
if [[ $restart_count -lt $max_restarts ]]; then
if restart_service; then
restart_count=0
else
((restart_count++))
send_alert "服务重启失败" "服务 $SERVICE_NAME 重启失败,尝试次数: $restart_count"
fi
else
send_alert "服务持续失败" "服务 $SERVICE_NAME 已达到最大重启次数,需要人工干预"
break
fi
elif ! check_service_health; then
log_message "服务 $SERVICE_NAME 健康检查失败"
if [[ $restart_count -lt $max_restarts ]]; then
if restart_service; then
restart_count=0
else
((restart_count++))
fi
fi
else
# 服务正常运行
restart_count=0
# 记录指标
metrics=$(get_service_metrics)
if [[ -n "$metrics" ]]; then
log_message "服务指标: $metrics"
fi
fi
# 等待下次检查
sleep 60
done
}
# 显示服务状态
show_status() {
echo "=== 服务状态 ==="
systemctl status "$SERVICE_NAME" --no-pager
echo -e "\n=== 健康检查 ==="
if check_service_health; then
echo "健康状态: 正常"
else
echo "健康状态: 异常"
fi
echo -e "\n=== 服务指标 ==="
get_service_metrics
echo -e "\n=== 最近日志 ==="
journalctl -u "$SERVICE_NAME" -n 10 --no-pager
}
# 主函数
main() {
case "${1:-monitor}" in
monitor)
log_message "开始监控服务: $SERVICE_NAME"
monitor_service
;;
status)
show_status
;;
restart)
restart_service
;;
*)
echo "用法: $0 {monitor|status|restart}"
exit 1
;;
esac
}
main "$@"
小结 #
系统服务管理是现代应用部署的重要环节。通过本节学习,我们掌握了:
- systemd 基础:了解了 systemd 的基本概念和服务状态
- 服务配置:学会了创建和配置 systemd 服务单元文件
- 高级特性:掌握了服务依赖、安全设置、资源限制等高级配置
- 监控管理:实现了服务监控、日志管理和自动化运维
这些技术使我们能够将 Go 应用程序专业地部署为系统服务,确保服务的稳定性和可维护性。