日志体系 EFK / Loki
日志架构
Pod 容器日志(stdout/stderr)
│ 节点日志文件 /var/log/containers/
▼
日志采集器(DaemonSet)
├── Fluentd / Fluent Bit(EFK 方案)
└── Promtail(Loki 方案)
│
▼
日志存储
├── Elasticsearch(EFK)
└── Loki(PLG)
│
▼
可视化
├── Kibana(EFK)
└── Grafana(PLG)Loki 方案(推荐,轻量)
bash
# 安装 Loki Stack
helm repo add grafana https://grafana.github.io/helm-charts
helm install loki-stack grafana/loki-stack \
--namespace monitoring \
--set grafana.enabled=false \ # 使用已有 Grafana
--set promtail.enabled=true \
--set loki.persistence.enabled=true \
--set loki.persistence.size=50GiPromtail 配置
yaml
# Promtail 自动发现 Pod 日志
scrape_configs:
- job_name: kubernetes-pods
kubernetes_sd_configs:
- role: pod
pipeline_stages:
# 解析 JSON 日志
- json:
expressions:
level: level
msg: message
timestamp: time
# 添加标签
- labels:
level:
# 时间戳
- timestamp:
source: timestamp
format: RFC3339Nano
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_app]
target_label: app
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
- source_labels: [__meta_kubernetes_pod_container_name]
target_label: containerLogQL 查询
text
# 查询 production 命名空间的错误日志
{namespace="production"} |= "ERROR"
# 解析 JSON 日志并过滤
{app="my-app"} | json | level="error" | line_format "{{.msg}}"
# 统计错误率
sum(rate({namespace="production"} |= "ERROR" [5m])) by (app)
# 查看慢请求
{app="api-server"} | json | duration > 1s
# 统计日志量
sum(count_over_time({namespace="production"}[1h])) by (app)EFK 方案(大规模)
bash
# 安装 ECK(Elastic Cloud on Kubernetes)
kubectl create -f https://download.elastic.co/downloads/eck/2.11.0/crds.yaml
kubectl apply -f https://download.elastic.co/downloads/eck/2.11.0/operator.yaml
# 创建 Elasticsearch 集群
cat <<EOF | kubectl apply -f -
apiVersion: elasticsearch.k8s.elastic.co/v1
kind: Elasticsearch
metadata:
name: elasticsearch
namespace: logging
spec:
version: 8.12.0
nodeSets:
- name: default
count: 3
config:
node.store.allow_mmap: false
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
spec:
accessModes: [ReadWriteOnce]
resources:
requests:
storage: 100Gi
EOFFluent Bit 配置(轻量采集器)
yaml
# ConfigMap for Fluent Bit
apiVersion: v1
kind: ConfigMap
metadata:
name: fluent-bit-config
namespace: logging
data:
fluent-bit.conf: |
[SERVICE]
Flush 5
Log_Level info
Parsers_File parsers.conf
[INPUT]
Name tail
Path /var/log/containers/*.log
Parser docker
Tag kube.*
Refresh_Interval 5
Mem_Buf_Limit 50MB
Skip_Long_Lines On
[FILTER]
Name kubernetes
Match kube.*
Kube_URL https://kubernetes.default.svc:443
Kube_CA_File /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
Kube_Token_File /var/run/secrets/kubernetes.io/serviceaccount/token
Merge_Log On
Keep_Log Off
K8S-Logging.Parser On
K8S-Logging.Exclude On
[OUTPUT]
Name es
Match *
Host elasticsearch-es-http.logging
Port 9200
HTTP_User elastic
HTTP_Passwd ${ELASTIC_PASSWORD}
tls On
tls.verify Off
Index k8s-logs
Suppress_Type_Name On结构化日志最佳实践
go
// 使用 zap 输出结构化 JSON 日志
import "go.uber.org/zap"
logger, _ := zap.NewProduction()
defer logger.Sync()
logger.Info("处理请求",
zap.String("method", "GET"),
zap.String("path", "/api/users"),
zap.Int("status", 200),
zap.Duration("duration", 50*time.Millisecond),
zap.String("user_id", "user-123"),
zap.String("trace_id", traceID),
)
// 输出:
// {"level":"info","ts":1234567890,"msg":"处理请求","method":"GET","path":"/api/users","status":200,"duration":0.05,"user_id":"user-123","trace_id":"abc123"}