Skip to content

日志体系 EFK / Loki

日志架构

Pod 容器日志(stdout/stderr)
    │ 节点日志文件 /var/log/containers/

日志采集器(DaemonSet)
├── Fluentd / Fluent Bit(EFK 方案)
└── Promtail(Loki 方案)


日志存储
├── Elasticsearch(EFK)
└── Loki(PLG)


可视化
├── Kibana(EFK)
└── Grafana(PLG)

Loki 方案(推荐,轻量)

bash
# 安装 Loki Stack
helm repo add grafana https://grafana.github.io/helm-charts
helm install loki-stack grafana/loki-stack \
  --namespace monitoring \
  --set grafana.enabled=false \  # 使用已有 Grafana
  --set promtail.enabled=true \
  --set loki.persistence.enabled=true \
  --set loki.persistence.size=50Gi

Promtail 配置

yaml
# Promtail 自动发现 Pod 日志
scrape_configs:
- job_name: kubernetes-pods
  kubernetes_sd_configs:
  - role: pod
  pipeline_stages:
  # 解析 JSON 日志
  - json:
      expressions:
        level: level
        msg: message
        timestamp: time
  # 添加标签
  - labels:
      level:
  # 时间戳
  - timestamp:
      source: timestamp
      format: RFC3339Nano
  relabel_configs:
  - source_labels: [__meta_kubernetes_pod_label_app]
    target_label: app
  - source_labels: [__meta_kubernetes_namespace]
    target_label: namespace
  - source_labels: [__meta_kubernetes_pod_name]
    target_label: pod
  - source_labels: [__meta_kubernetes_pod_container_name]
    target_label: container

LogQL 查询

text
# 查询 production 命名空间的错误日志
{namespace="production"} |= "ERROR"

# 解析 JSON 日志并过滤
{app="my-app"} | json | level="error" | line_format "{{.msg}}"

# 统计错误率
sum(rate({namespace="production"} |= "ERROR" [5m])) by (app)

# 查看慢请求
{app="api-server"} | json | duration > 1s

# 统计日志量
sum(count_over_time({namespace="production"}[1h])) by (app)

EFK 方案(大规模)

bash
# 安装 ECK(Elastic Cloud on Kubernetes)
kubectl create -f https://download.elastic.co/downloads/eck/2.11.0/crds.yaml
kubectl apply -f https://download.elastic.co/downloads/eck/2.11.0/operator.yaml

# 创建 Elasticsearch 集群
cat <<EOF | kubectl apply -f -
apiVersion: elasticsearch.k8s.elastic.co/v1
kind: Elasticsearch
metadata:
  name: elasticsearch
  namespace: logging
spec:
  version: 8.12.0
  nodeSets:
  - name: default
    count: 3
    config:
      node.store.allow_mmap: false
    volumeClaimTemplates:
    - metadata:
        name: elasticsearch-data
      spec:
        accessModes: [ReadWriteOnce]
        resources:
          requests:
            storage: 100Gi
EOF

Fluent Bit 配置(轻量采集器)

yaml
# ConfigMap for Fluent Bit
apiVersion: v1
kind: ConfigMap
metadata:
  name: fluent-bit-config
  namespace: logging
data:
  fluent-bit.conf: |
    [SERVICE]
        Flush         5
        Log_Level     info
        Parsers_File  parsers.conf

    [INPUT]
        Name              tail
        Path              /var/log/containers/*.log
        Parser            docker
        Tag               kube.*
        Refresh_Interval  5
        Mem_Buf_Limit     50MB
        Skip_Long_Lines   On

    [FILTER]
        Name                kubernetes
        Match               kube.*
        Kube_URL            https://kubernetes.default.svc:443
        Kube_CA_File        /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        Kube_Token_File     /var/run/secrets/kubernetes.io/serviceaccount/token
        Merge_Log           On
        Keep_Log            Off
        K8S-Logging.Parser  On
        K8S-Logging.Exclude On

    [OUTPUT]
        Name            es
        Match           *
        Host            elasticsearch-es-http.logging
        Port            9200
        HTTP_User       elastic
        HTTP_Passwd     ${ELASTIC_PASSWORD}
        tls             On
        tls.verify      Off
        Index           k8s-logs
        Suppress_Type_Name On

结构化日志最佳实践

go
// 使用 zap 输出结构化 JSON 日志
import "go.uber.org/zap"

logger, _ := zap.NewProduction()
defer logger.Sync()

logger.Info("处理请求",
    zap.String("method", "GET"),
    zap.String("path", "/api/users"),
    zap.Int("status", 200),
    zap.Duration("duration", 50*time.Millisecond),
    zap.String("user_id", "user-123"),
    zap.String("trace_id", traceID),
)

// 输出:
// {"level":"info","ts":1234567890,"msg":"处理请求","method":"GET","path":"/api/users","status":200,"duration":0.05,"user_id":"user-123","trace_id":"abc123"}

本站内容由 褚成志 整理编写,仅供学习参考