Gracefully shutdown agent (#3895)

This commit is contained in:
6543
2024-07-13 16:06:20 -07:00
committed by GitHub
parent d1c0f781f1
commit 30cd800110
8 changed files with 273 additions and 95 deletions

View File

@@ -23,6 +23,8 @@ import (
"go.woodpecker-ci.org/woodpecker/v2/pipeline/rpc/proto"
)
const authClientTimeout = time.Second * 5
type AuthClient struct {
client proto.WoodpeckerAuthClient
conn *grpc.ClientConn
@@ -39,8 +41,8 @@ func NewAuthGrpcClient(conn *grpc.ClientConn, agentToken string, agentID int64)
return client
}
func (c *AuthClient) Auth() (string, int64, error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) //nolint:mnd
func (c *AuthClient) Auth(ctx context.Context) (string, int64, error) {
ctx, cancel := context.WithTimeout(ctx, authClientTimeout)
defer cancel()
req := &proto.AuthRequest{

View File

@@ -30,15 +30,12 @@ type AuthInterceptor struct {
}
// NewAuthInterceptor returns a new auth interceptor.
func NewAuthInterceptor(
authClient *AuthClient,
refreshDuration time.Duration,
) (*AuthInterceptor, error) {
func NewAuthInterceptor(ctx context.Context, authClient *AuthClient, refreshDuration time.Duration) (*AuthInterceptor, error) {
interceptor := &AuthInterceptor{
authClient: authClient,
}
err := interceptor.scheduleRefreshToken(refreshDuration)
err := interceptor.scheduleRefreshToken(ctx, refreshDuration)
if err != nil {
return nil, err
}
@@ -78,21 +75,26 @@ func (interceptor *AuthInterceptor) attachToken(ctx context.Context) context.Con
return metadata.AppendToOutgoingContext(ctx, "token", interceptor.accessToken)
}
func (interceptor *AuthInterceptor) scheduleRefreshToken(refreshDuration time.Duration) error {
err := interceptor.refreshToken()
func (interceptor *AuthInterceptor) scheduleRefreshToken(ctx context.Context, refreshInterval time.Duration) error {
err := interceptor.refreshToken(ctx)
if err != nil {
return err
}
go func() {
wait := refreshDuration
wait := refreshInterval
for {
time.Sleep(wait)
err := interceptor.refreshToken()
if err != nil {
wait = time.Second
} else {
wait = refreshDuration
select {
case <-ctx.Done():
return
case <-time.After(wait):
err := interceptor.refreshToken(ctx)
if err != nil {
wait = time.Second
} else {
wait = refreshInterval
}
}
}
}()
@@ -100,8 +102,8 @@ func (interceptor *AuthInterceptor) scheduleRefreshToken(refreshDuration time.Du
return nil
}
func (interceptor *AuthInterceptor) refreshToken() error {
accessToken, _, err := interceptor.authClient.Auth()
func (interceptor *AuthInterceptor) refreshToken(ctx context.Context) error {
accessToken, _, err := interceptor.authClient.Auth(ctx)
if err != nil {
return err
}

View File

@@ -17,7 +17,6 @@ package rpc
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
@@ -90,8 +89,10 @@ func (c *client) Next(ctx context.Context, f rpc.Filter) (*rpc.Workflow, error)
case codes.Canceled:
if ctx.Err() != nil {
// expected as context was canceled
log.Debug().Err(err).Msgf("grpc error: next(): context canceled")
return nil, nil
}
log.Error().Err(err).Msgf("grpc error: next(): code: %v", status.Code(err))
return nil, err
case
codes.Aborted,
@@ -105,10 +106,11 @@ func (c *client) Next(ctx context.Context, f rpc.Filter) (*rpc.Workflow, error)
// https://github.com/woodpecker-ci/woodpecker/issues/717#issuecomment-1049365104
log.Trace().Err(err).Msg("grpc: to many keepalive pings without sending data")
} else {
log.Error().Err(err).Msgf("grpc error: next(): code: %v", status.Code(err))
log.Warn().Err(err).Msgf("grpc error: next(): code: %v", status.Code(err))
}
default:
return nil, fmt.Errorf("grpc error: next(): code: %v: %w", status.Code(err), err)
log.Error().Err(err).Msgf("grpc error: next(): code: %v", status.Code(err))
return nil, err
}
select {
@@ -143,9 +145,15 @@ func (c *client) Wait(ctx context.Context, id string) (err error) {
break
}
log.Error().Err(err).Msgf("grpc error: wait(): code: %v", status.Code(err))
switch status.Code(err) {
case codes.Canceled:
if ctx.Err() != nil {
// expected as context was canceled
log.Debug().Err(err).Msgf("grpc error: wait(): context canceled")
return nil
}
log.Error().Err(err).Msgf("grpc error: wait(): code: %v", status.Code(err))
return err
case
codes.Aborted,
codes.DataLoss,
@@ -153,7 +161,9 @@ func (c *client) Wait(ctx context.Context, id string) (err error) {
codes.Internal,
codes.Unavailable:
// non-fatal errors
log.Warn().Err(err).Msgf("grpc error: wait(): code: %v", status.Code(err))
default:
log.Error().Err(err).Msgf("grpc error: wait(): code: %v", status.Code(err))
return err
}
@@ -184,6 +194,14 @@ func (c *client) Init(ctx context.Context, workflowID string, state rpc.Workflow
log.Error().Err(err).Msgf("grpc error: init(): code: %v", status.Code(err))
switch status.Code(err) {
case codes.Canceled:
if ctx.Err() != nil {
// expected as context was canceled
log.Debug().Err(err).Msgf("grpc error: init(): context canceled")
return nil
}
log.Error().Err(err).Msgf("grpc error: init(): code: %v", status.Code(err))
return err
case
codes.Aborted,
codes.DataLoss,
@@ -191,7 +209,9 @@ func (c *client) Init(ctx context.Context, workflowID string, state rpc.Workflow
codes.Internal,
codes.Unavailable:
// non-fatal errors
log.Warn().Err(err).Msgf("grpc error: init(): code: %v", status.Code(err))
default:
log.Error().Err(err).Msgf("grpc error: init(): code: %v", status.Code(err))
return err
}
@@ -222,6 +242,14 @@ func (c *client) Done(ctx context.Context, workflowID string, state rpc.Workflow
log.Error().Err(err).Msgf("grpc error: done(): code: %v", status.Code(err))
switch status.Code(err) {
case codes.Canceled:
if ctx.Err() != nil {
// expected as context was canceled
log.Debug().Err(err).Msgf("grpc error: done(): context canceled")
return nil
}
log.Error().Err(err).Msgf("grpc error: done(): code: %v", status.Code(err))
return err
case
codes.Aborted,
codes.DataLoss,
@@ -229,7 +257,9 @@ func (c *client) Done(ctx context.Context, workflowID string, state rpc.Workflow
codes.Internal,
codes.Unavailable:
// non-fatal errors
log.Warn().Err(err).Msgf("grpc error: done(): code: %v", status.Code(err))
default:
log.Error().Err(err).Msgf("grpc error: done(): code: %v", status.Code(err))
return err
}
@@ -256,6 +286,14 @@ func (c *client) Extend(ctx context.Context, id string) (err error) {
log.Error().Err(err).Msgf("grpc error: extend(): code: %v", status.Code(err))
switch status.Code(err) {
case codes.Canceled:
if ctx.Err() != nil {
// expected as context was canceled
log.Debug().Err(err).Msgf("grpc error: extend(): context canceled")
return nil
}
log.Error().Err(err).Msgf("grpc error: extend(): code: %v", status.Code(err))
return err
case
codes.Aborted,
codes.DataLoss,
@@ -263,7 +301,9 @@ func (c *client) Extend(ctx context.Context, id string) (err error) {
codes.Internal,
codes.Unavailable:
// non-fatal errors
log.Warn().Err(err).Msgf("grpc error: extend(): code: %v", status.Code(err))
default:
log.Error().Err(err).Msgf("grpc error: extend(): code: %v", status.Code(err))
return err
}
@@ -297,6 +337,14 @@ func (c *client) Update(ctx context.Context, id string, state rpc.StepState) (er
log.Error().Err(err).Msgf("grpc error: update(): code: %v", status.Code(err))
switch status.Code(err) {
case codes.Canceled:
if ctx.Err() != nil {
// expected as context was canceled
log.Debug().Err(err).Msgf("grpc error: update(): context canceled")
return nil
}
log.Error().Err(err).Msgf("grpc error: update(): code: %v", status.Code(err))
return err
case
codes.Aborted,
codes.DataLoss,
@@ -304,7 +352,9 @@ func (c *client) Update(ctx context.Context, id string, state rpc.StepState) (er
codes.Internal,
codes.Unavailable:
// non-fatal errors
log.Warn().Err(err).Msgf("grpc error: update(): code: %v", status.Code(err))
default:
log.Error().Err(err).Msgf("grpc error: update(): code: %v", status.Code(err))
return err
}
@@ -333,9 +383,15 @@ func (c *client) Log(ctx context.Context, logEntry *rpc.LogEntry) (err error) {
break
}
log.Error().Err(err).Msgf("grpc error: log(): code: %v", status.Code(err))
switch status.Code(err) {
case codes.Canceled:
if ctx.Err() != nil {
// expected as context was canceled
log.Debug().Err(err).Msgf("grpc error: log(): context canceled")
return nil
}
log.Error().Err(err).Msgf("grpc error: log(): code: %v", status.Code(err))
return err
case
codes.Aborted,
codes.DataLoss,
@@ -343,7 +399,9 @@ func (c *client) Log(ctx context.Context, logEntry *rpc.LogEntry) (err error) {
codes.Internal,
codes.Unavailable:
// non-fatal errors
log.Warn().Err(err).Msgf("grpc error: log(): code: %v", status.Code(err))
default:
log.Error().Err(err).Msgf("grpc error: log(): code: %v", status.Code(err))
return err
}
@@ -383,6 +441,14 @@ func (c *client) ReportHealth(ctx context.Context) (err error) {
return nil
}
switch status.Code(err) {
case codes.Canceled:
if ctx.Err() != nil {
// expected as context was canceled
log.Debug().Err(err).Msgf("grpc error: report_health(): context canceled")
return nil
}
log.Error().Err(err).Msgf("grpc error: report_health(): code: %v", status.Code(err))
return err
case
codes.Aborted,
codes.DataLoss,
@@ -390,7 +456,9 @@ func (c *client) ReportHealth(ctx context.Context) (err error) {
codes.Internal,
codes.Unavailable:
// non-fatal errors
log.Warn().Err(err).Msgf("grpc error: report_health(): code: %v", status.Code(err))
default:
log.Error().Err(err).Msgf("grpc error: report_health(): code: %v", status.Code(err))
return err
}

View File

@@ -49,7 +49,7 @@ func NewRunner(workEngine rpc.Peer, f rpc.Filter, h string, state *State, backen
}
}
func (r *Runner) Run(runnerCtx context.Context) error { //nolint:contextcheck
func (r *Runner) Run(runnerCtx, shutdownCtx context.Context) error { //nolint:contextcheck
log.Debug().Msg("request next execution")
meta, _ := metadata.FromOutgoingContext(runnerCtx)
@@ -178,7 +178,11 @@ func (r *Runner) Run(runnerCtx context.Context) error { //nolint:contextcheck
Str("error", state.Error).
Msg("updating workflow status")
if err := r.client.Done(runnerCtx, workflow.ID, state); err != nil {
doneCtx := runnerCtx
if doneCtx.Err() != nil {
doneCtx = shutdownCtx
}
if err := r.client.Done(doneCtx, workflow.ID, state); err != nil {
logger.Error().Err(err).Msg("updating workflow status failed")
} else {
logger.Debug().Msg("updating workflow status complete")