Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to removeCheckoutDir #2946

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 23 additions & 25 deletions internal/job/checkout.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
Expand Down Expand Up @@ -53,48 +54,45 @@ func (e *Executor) configureHTTPSInsteadOfSSH(ctx context.Context) error {
)
}

func (e *Executor) removeCheckoutDir() error {
func (e *Executor) removeCheckoutDir(ctx context.Context) error {
checkoutPath, _ := e.shell.Env.Get("BUILDKITE_BUILD_CHECKOUT_PATH")

// on windows, sometimes removing large dirs can fail for various reasons
// for instance having files open
// see https://github.com/golang/go/issues/20841
for range 10 {
return roko.NewRetrier(
roko.WithMaxAttempts(10),
roko.WithStrategy(roko.Constant(10*time.Second)),
).DoWithContext(ctx, func(r *roko.Retrier) error {
e.shell.Commentf("Removing %s", checkoutPath)
if err := os.RemoveAll(checkoutPath); err != nil {
e.shell.Errorf("Failed to remove \"%s\" (%s)", checkoutPath, err)
} else {
if _, err := os.Stat(checkoutPath); os.IsNotExist(err) {
return nil
} else {
e.shell.Errorf("Failed to remove %s", checkoutPath)
}
if err := hardRemoveAll(checkoutPath); err != nil {
e.shell.Errorf("Failed to remove %q (%s)", checkoutPath, err)
return err
}
e.shell.Commentf("Waiting 10 seconds")
<-time.After(time.Second * 10)
}

return fmt.Errorf("Failed to remove %s", checkoutPath)
if _, err := os.Stat(checkoutPath); err != nil && !errors.Is(err, fs.ErrNotExist) {
e.shell.Errorf("Failed to remove %q", checkoutPath)
return err
}
return nil
})
}

func (e *Executor) createCheckoutDir() error {
checkoutPath, _ := e.shell.Env.Get("BUILDKITE_BUILD_CHECKOUT_PATH")

if !utils.FileExists(checkoutPath) {
e.shell.Commentf("Creating \"%s\"", checkoutPath)
// Actual file permissions will be reduced by umask, and won't be 0777 unless the user has manually changed the umask to 000
e.shell.Commentf("Creating %q", checkoutPath)
// Actual file permissions will be reduced by umask, and won't be 0777
// unless the user has manually changed the umask to 000
if err := os.MkdirAll(checkoutPath, 0777); err != nil {
return err
}
}

if e.shell.Getwd() != checkoutPath {
if err := e.shell.Chdir(checkoutPath); err != nil {
return err
}
if e.shell.Getwd() == checkoutPath {
return nil
}

return nil
return e.shell.Chdir(checkoutPath)
}

// CheckoutPhase creates the build directory and makes sure we're running the
Expand All @@ -115,7 +113,7 @@ func (e *Executor) CheckoutPhase(ctx context.Context) error {
// Remove the checkout directory if BUILDKITE_CLEAN_CHECKOUT is present
if e.CleanCheckout {
e.shell.Headerf("Cleaning pipeline checkout")
if err = e.removeCheckoutDir(); err != nil {
if err = e.removeCheckoutDir(ctx); err != nil {
return err
}
}
Expand Down Expand Up @@ -199,7 +197,7 @@ func (e *Executor) CheckoutPhase(ctx context.Context) error {
// This removes the checkout dir, which means the next checkout
// will be a lot slower (clone vs fetch), but hopefully will
// allow the agent to self-heal
if err := e.removeCheckoutDir(); err != nil {
if err := e.removeCheckoutDir(ctx); err != nil {
e.shell.Printf("Failed to remove checkout dir while cleaning up after a checkout error.")
}

Expand Down
10 changes: 10 additions & 0 deletions internal/job/remove_all_nonunix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
//go:build !unix

package job

import "os"

// hardRemoveAll only does more than os.RemoveAll on Unix-likes.
func hardRemoveAll(path string) error {
return os.RemoveAll(path)
}
44 changes: 44 additions & 0 deletions internal/job/remove_all_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//go:build unix

package job

import (
"errors"
"io/fs"
"os"
"path/filepath"
"testing"
)

func TestHardRemoveAll(t *testing.T) {
container, err := os.MkdirTemp("", "TestHardRemoveAll")
if err != nil {
t.Fatalf("os.MkdirTemp(TestHardRemoveAll) error = %v", err)
}
t.Cleanup(func() { os.RemoveAll(container) }) // lol but if hardRemoveAll doesn't work...

dirA := filepath.Join(container, "a")
dirB := filepath.Join(dirA, "b")
fileC := filepath.Join(dirB, "c")
if err := os.MkdirAll(dirB, 0o777); err != nil {
t.Fatalf("os.MkdirAll(c%q, 0o777) = %v", dirB, err)
}
if err := os.WriteFile(fileC, []byte("hello!\n"), 0o664); err != nil {
t.Fatalf("os.WriteFile(%q, hello!, 0o664) = %v", fileC, err)
}

// break directory perms
if err := os.Chmod(dirB, 0o666); err != nil {
t.Fatalf("os.Chmod(%q, 0o666) = %v", dirB, err)
}
if err := os.Chmod(dirA, 0o444); err != nil {
t.Fatalf("os.Chmod(%q, 0o444) = %v", dirA, err)
}

if err := hardRemoveAll(dirA); err != nil {
t.Errorf("hardRemoveAll(%q) = %v", dirA, err)
}
if _, err := os.Stat(dirA); !errors.Is(err, fs.ErrNotExist) {
t.Errorf("os.Stat(%q) = %v, want %v", dirA, err, fs.ErrNotExist)
}
}
53 changes: 53 additions & 0 deletions internal/job/remove_all_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//go:build unix

package job

import (
"os"
"path/filepath"

"golang.org/x/sys/unix"
)

// hardRemoveAll tries very hard to remove all items from the directory at path.
// In addition to calling os.RemoveAll, it fixes missing +x bits on directories.
func hardRemoveAll(path string) error {
for {
err := os.RemoveAll(path)
if err == nil { // If os.RemoveAll worked, then exit early.
return nil
}
// os.RemoveAll documents its only non-nil error as *os.PathError.
pathErr, ok := err.(*os.PathError)
if !ok {
return err
}

// Did we not have permission to open something within a directory?
if pathErr.Err != unix.EACCES {
return err
}
dir := filepath.Dir(pathErr.Path)

// Check that the EACCES was caused by mode on the directory.
// (Note that this is a TOCTOU race, but we're not changing
// owner uid/gid, and if something else is concurrently writing
// files they can probably chmod +wx their files themselves)
di, statErr := os.Lstat(dir)
if statErr != nil {
return statErr
}
if !di.IsDir() {
return err
}
if unix.Faccessat(0, dir, unix.W_OK|unix.X_OK, unix.AT_EACCESS) != unix.EACCES {
// Some other failure?
return err
}
// Try to fix it with chmod +x dir
if err := os.Chmod(dir, 0o777); err != nil {
return err
}
// Now retry os.RemoveAll.
}
}