feat(observability): add in-process metrics endpoint for qps latency and error-rate
This commit is contained in:
@@ -2,6 +2,8 @@ import { Module } from '@nestjs/common';
|
|||||||
import { TypeOrmModule } from '@nestjs/typeorm';
|
import { TypeOrmModule } from '@nestjs/typeorm';
|
||||||
import { buildTypeOrmConfig } from './database.config';
|
import { buildTypeOrmConfig } from './database.config';
|
||||||
import { HealthController } from './common/health.controller';
|
import { HealthController } from './common/health.controller';
|
||||||
|
import { MetricsController } from './common/metrics.controller';
|
||||||
|
import { MetricsService } from './common/metrics.service';
|
||||||
import { AuthModule } from './auth/auth.module';
|
import { AuthModule } from './auth/auth.module';
|
||||||
import { NodesModule } from './nodes/nodes.module';
|
import { NodesModule } from './nodes/nodes.module';
|
||||||
import { AuditModule } from './audit/audit.module';
|
import { AuditModule } from './audit/audit.module';
|
||||||
@@ -13,6 +15,7 @@ import { AuditModule } from './audit/audit.module';
|
|||||||
AuthModule,
|
AuthModule,
|
||||||
NodesModule,
|
NodesModule,
|
||||||
],
|
],
|
||||||
controllers: [HealthController],
|
controllers: [HealthController, MetricsController],
|
||||||
|
providers: [MetricsService],
|
||||||
})
|
})
|
||||||
export class AppModule {}
|
export class AppModule {}
|
||||||
|
|||||||
12
Fabric.Backend.Center/src/common/metrics.controller.ts
Normal file
12
Fabric.Backend.Center/src/common/metrics.controller.ts
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
import { Controller, Get } from '@nestjs/common';
|
||||||
|
import { MetricsService } from './metrics.service';
|
||||||
|
|
||||||
|
@Controller('metrics')
|
||||||
|
export class MetricsController {
|
||||||
|
constructor(private readonly metrics: MetricsService) {}
|
||||||
|
|
||||||
|
@Get()
|
||||||
|
get() {
|
||||||
|
return this.metrics.snapshot();
|
||||||
|
}
|
||||||
|
}
|
||||||
35
Fabric.Backend.Center/src/common/metrics.service.ts
Normal file
35
Fabric.Backend.Center/src/common/metrics.service.ts
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
import { Injectable } from '@nestjs/common';
|
||||||
|
|
||||||
|
type Bucket = {
|
||||||
|
requests: number;
|
||||||
|
errors: number;
|
||||||
|
totalDurationMs: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class MetricsService {
|
||||||
|
private readonly bucket: Bucket = { requests: 0, errors: 0, totalDurationMs: 0 };
|
||||||
|
private startedAt = Date.now();
|
||||||
|
|
||||||
|
record(statusCode: number, durationMs: number): void {
|
||||||
|
this.bucket.requests += 1;
|
||||||
|
if (statusCode >= 400) this.bucket.errors += 1;
|
||||||
|
this.bucket.totalDurationMs += durationMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot() {
|
||||||
|
const uptimeSec = Math.max(1, Math.floor((Date.now() - this.startedAt) / 1000));
|
||||||
|
const qps = this.bucket.requests / uptimeSec;
|
||||||
|
const avgLatencyMs = this.bucket.requests > 0 ? this.bucket.totalDurationMs / this.bucket.requests : 0;
|
||||||
|
const errorRate = this.bucket.requests > 0 ? this.bucket.errors / this.bucket.requests : 0;
|
||||||
|
|
||||||
|
return {
|
||||||
|
requests: this.bucket.requests,
|
||||||
|
errors: this.bucket.errors,
|
||||||
|
qps,
|
||||||
|
avgLatencyMs,
|
||||||
|
errorRate,
|
||||||
|
uptimeSec,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,30 +1,36 @@
|
|||||||
import { randomUUID } from 'crypto';
|
import { randomUUID } from 'crypto';
|
||||||
import { NextFunction, Request, Response } from 'express';
|
import { NextFunction, Request, Response } from 'express';
|
||||||
|
import { MetricsService } from './metrics.service';
|
||||||
|
|
||||||
type ReqWithId = Request & { requestId?: string };
|
type ReqWithId = Request & { requestId?: string };
|
||||||
|
|
||||||
export function requestContextMiddleware(req: ReqWithId, res: Response, next: NextFunction): void {
|
export function createRequestContextMiddleware(service: 'center' | 'guild', metrics: MetricsService) {
|
||||||
const headerId = req.headers['x-request-id'];
|
return (req: ReqWithId, res: Response, next: NextFunction): void => {
|
||||||
const requestId =
|
const headerId = req.headers['x-request-id'];
|
||||||
(Array.isArray(headerId) ? headerId[0] : headerId) || randomUUID();
|
const requestId =
|
||||||
|
(Array.isArray(headerId) ? headerId[0] : headerId) || randomUUID();
|
||||||
|
|
||||||
req.requestId = requestId;
|
req.requestId = requestId;
|
||||||
res.setHeader('x-request-id', requestId);
|
res.setHeader('x-request-id', requestId);
|
||||||
|
|
||||||
const startedAt = Date.now();
|
const startedAt = Date.now();
|
||||||
res.on('finish', () => {
|
res.on('finish', () => {
|
||||||
const log = {
|
const durationMs = Date.now() - startedAt;
|
||||||
level: 'info',
|
metrics.record(res.statusCode, durationMs);
|
||||||
service: 'center',
|
|
||||||
requestId,
|
|
||||||
method: req.method,
|
|
||||||
path: req.originalUrl,
|
|
||||||
statusCode: res.statusCode,
|
|
||||||
durationMs: Date.now() - startedAt,
|
|
||||||
timestamp: new Date().toISOString(),
|
|
||||||
};
|
|
||||||
console.log(JSON.stringify(log));
|
|
||||||
});
|
|
||||||
|
|
||||||
next();
|
const log = {
|
||||||
|
level: 'info',
|
||||||
|
service,
|
||||||
|
requestId,
|
||||||
|
method: req.method,
|
||||||
|
path: req.originalUrl,
|
||||||
|
statusCode: res.statusCode,
|
||||||
|
durationMs,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
};
|
||||||
|
console.log(JSON.stringify(log));
|
||||||
|
});
|
||||||
|
|
||||||
|
next();
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,8 @@ import { ValidationPipe } from '@nestjs/common';
|
|||||||
import { NestFactory } from '@nestjs/core';
|
import { NestFactory } from '@nestjs/core';
|
||||||
import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger';
|
import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger';
|
||||||
import { AppModule } from './app.module';
|
import { AppModule } from './app.module';
|
||||||
import { requestContextMiddleware } from './common/request-context.middleware';
|
import { createRequestContextMiddleware } from './common/request-context.middleware';
|
||||||
|
import { MetricsService } from './common/metrics.service';
|
||||||
|
|
||||||
function requireEnv(name: string): string {
|
function requireEnv(name: string): string {
|
||||||
const value = process.env[name];
|
const value = process.env[name];
|
||||||
@@ -29,7 +30,8 @@ async function bootstrap() {
|
|||||||
|
|
||||||
const app = await NestFactory.create(AppModule);
|
const app = await NestFactory.create(AppModule);
|
||||||
app.setGlobalPrefix('api');
|
app.setGlobalPrefix('api');
|
||||||
app.use(requestContextMiddleware);
|
const metrics = app.get(MetricsService);
|
||||||
|
app.use(createRequestContextMiddleware('center', metrics));
|
||||||
app.useGlobalPipes(
|
app.useGlobalPipes(
|
||||||
new ValidationPipe({
|
new ValidationPipe({
|
||||||
whitelist: true,
|
whitelist: true,
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ import { APP_GUARD } from '@nestjs/core';
|
|||||||
import { TypeOrmModule } from '@nestjs/typeorm';
|
import { TypeOrmModule } from '@nestjs/typeorm';
|
||||||
import { buildTypeOrmConfig } from './database.config';
|
import { buildTypeOrmConfig } from './database.config';
|
||||||
import { HealthController } from './common/health.controller';
|
import { HealthController } from './common/health.controller';
|
||||||
|
import { MetricsController } from './common/metrics.controller';
|
||||||
|
import { MetricsService } from './common/metrics.service';
|
||||||
import { ApiKeyGuard } from './common/api-key.guard';
|
import { ApiKeyGuard } from './common/api-key.guard';
|
||||||
import { GuildsModule } from './guilds/guilds.module';
|
import { GuildsModule } from './guilds/guilds.module';
|
||||||
import { ChannelsModule } from './channels/channels.module';
|
import { ChannelsModule } from './channels/channels.module';
|
||||||
@@ -19,8 +21,9 @@ import { RealtimeModule } from './realtime/realtime.module';
|
|||||||
ChannelsModule,
|
ChannelsModule,
|
||||||
MessagingModule,
|
MessagingModule,
|
||||||
],
|
],
|
||||||
controllers: [HealthController],
|
controllers: [HealthController, MetricsController],
|
||||||
providers: [
|
providers: [
|
||||||
|
MetricsService,
|
||||||
{
|
{
|
||||||
provide: APP_GUARD,
|
provide: APP_GUARD,
|
||||||
useClass: ApiKeyGuard,
|
useClass: ApiKeyGuard,
|
||||||
|
|||||||
12
Fabric.Backend.Guild/src/common/metrics.controller.ts
Normal file
12
Fabric.Backend.Guild/src/common/metrics.controller.ts
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
import { Controller, Get } from '@nestjs/common';
|
||||||
|
import { MetricsService } from './metrics.service';
|
||||||
|
|
||||||
|
@Controller('metrics')
|
||||||
|
export class MetricsController {
|
||||||
|
constructor(private readonly metrics: MetricsService) {}
|
||||||
|
|
||||||
|
@Get()
|
||||||
|
get() {
|
||||||
|
return this.metrics.snapshot();
|
||||||
|
}
|
||||||
|
}
|
||||||
35
Fabric.Backend.Guild/src/common/metrics.service.ts
Normal file
35
Fabric.Backend.Guild/src/common/metrics.service.ts
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
import { Injectable } from '@nestjs/common';
|
||||||
|
|
||||||
|
type Bucket = {
|
||||||
|
requests: number;
|
||||||
|
errors: number;
|
||||||
|
totalDurationMs: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class MetricsService {
|
||||||
|
private readonly bucket: Bucket = { requests: 0, errors: 0, totalDurationMs: 0 };
|
||||||
|
private startedAt = Date.now();
|
||||||
|
|
||||||
|
record(statusCode: number, durationMs: number): void {
|
||||||
|
this.bucket.requests += 1;
|
||||||
|
if (statusCode >= 400) this.bucket.errors += 1;
|
||||||
|
this.bucket.totalDurationMs += durationMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot() {
|
||||||
|
const uptimeSec = Math.max(1, Math.floor((Date.now() - this.startedAt) / 1000));
|
||||||
|
const qps = this.bucket.requests / uptimeSec;
|
||||||
|
const avgLatencyMs = this.bucket.requests > 0 ? this.bucket.totalDurationMs / this.bucket.requests : 0;
|
||||||
|
const errorRate = this.bucket.requests > 0 ? this.bucket.errors / this.bucket.requests : 0;
|
||||||
|
|
||||||
|
return {
|
||||||
|
requests: this.bucket.requests,
|
||||||
|
errors: this.bucket.errors,
|
||||||
|
qps,
|
||||||
|
avgLatencyMs,
|
||||||
|
errorRate,
|
||||||
|
uptimeSec,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,30 +1,36 @@
|
|||||||
import { randomUUID } from 'crypto';
|
import { randomUUID } from 'crypto';
|
||||||
import { NextFunction, Request, Response } from 'express';
|
import { NextFunction, Request, Response } from 'express';
|
||||||
|
import { MetricsService } from './metrics.service';
|
||||||
|
|
||||||
type ReqWithId = Request & { requestId?: string };
|
type ReqWithId = Request & { requestId?: string };
|
||||||
|
|
||||||
export function requestContextMiddleware(req: ReqWithId, res: Response, next: NextFunction): void {
|
export function createRequestContextMiddleware(service: 'center' | 'guild', metrics: MetricsService) {
|
||||||
const headerId = req.headers['x-request-id'];
|
return (req: ReqWithId, res: Response, next: NextFunction): void => {
|
||||||
const requestId =
|
const headerId = req.headers['x-request-id'];
|
||||||
(Array.isArray(headerId) ? headerId[0] : headerId) || randomUUID();
|
const requestId =
|
||||||
|
(Array.isArray(headerId) ? headerId[0] : headerId) || randomUUID();
|
||||||
|
|
||||||
req.requestId = requestId;
|
req.requestId = requestId;
|
||||||
res.setHeader('x-request-id', requestId);
|
res.setHeader('x-request-id', requestId);
|
||||||
|
|
||||||
const startedAt = Date.now();
|
const startedAt = Date.now();
|
||||||
res.on('finish', () => {
|
res.on('finish', () => {
|
||||||
const log = {
|
const durationMs = Date.now() - startedAt;
|
||||||
level: 'info',
|
metrics.record(res.statusCode, durationMs);
|
||||||
service: 'guild',
|
|
||||||
requestId,
|
|
||||||
method: req.method,
|
|
||||||
path: req.originalUrl,
|
|
||||||
statusCode: res.statusCode,
|
|
||||||
durationMs: Date.now() - startedAt,
|
|
||||||
timestamp: new Date().toISOString(),
|
|
||||||
};
|
|
||||||
console.log(JSON.stringify(log));
|
|
||||||
});
|
|
||||||
|
|
||||||
next();
|
const log = {
|
||||||
|
level: 'info',
|
||||||
|
service,
|
||||||
|
requestId,
|
||||||
|
method: req.method,
|
||||||
|
path: req.originalUrl,
|
||||||
|
statusCode: res.statusCode,
|
||||||
|
durationMs,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
};
|
||||||
|
console.log(JSON.stringify(log));
|
||||||
|
});
|
||||||
|
|
||||||
|
next();
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,12 +3,14 @@ import { ValidationPipe } from '@nestjs/common';
|
|||||||
import { NestFactory } from '@nestjs/core';
|
import { NestFactory } from '@nestjs/core';
|
||||||
import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger';
|
import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger';
|
||||||
import { AppModule } from './app.module';
|
import { AppModule } from './app.module';
|
||||||
import { requestContextMiddleware } from './common/request-context.middleware';
|
import { createRequestContextMiddleware } from './common/request-context.middleware';
|
||||||
|
import { MetricsService } from './common/metrics.service';
|
||||||
|
|
||||||
async function bootstrap() {
|
async function bootstrap() {
|
||||||
const app = await NestFactory.create(AppModule);
|
const app = await NestFactory.create(AppModule);
|
||||||
app.setGlobalPrefix('api');
|
app.setGlobalPrefix('api');
|
||||||
app.use(requestContextMiddleware);
|
const metrics = app.get(MetricsService);
|
||||||
|
app.use(createRequestContextMiddleware('guild', metrics));
|
||||||
app.useGlobalPipes(
|
app.useGlobalPipes(
|
||||||
new ValidationPipe({
|
new ValidationPipe({
|
||||||
whitelist: true,
|
whitelist: true,
|
||||||
|
|||||||
@@ -96,7 +96,7 @@
|
|||||||
- [x] `docker-compose.prod.yml`(去掉 `DB_SYNC=true`)
|
- [x] `docker-compose.prod.yml`(去掉 `DB_SYNC=true`)
|
||||||
- [x] DB migration 机制(TypeORM migration)
|
- [x] DB migration 机制(TypeORM migration)
|
||||||
- [x] 结构化日志 + request id
|
- [x] 结构化日志 + request id
|
||||||
- [ ] 基础监控指标(QPS、延迟、错误率)
|
- [x] 基础监控指标(QPS、延迟、错误率)
|
||||||
- [ ] 备份与恢复流程文档
|
- [ ] 备份与恢复流程文档
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
Reference in New Issue
Block a user