From 762fa721d38d8b99ac4c8a6050f206484bedefe4 Mon Sep 17 00:00:00 2001 From: Marin Nozhchev Date: Tue, 30 Sep 2025 11:29:36 +0300 Subject: [PATCH 1/2] feat: cache downloaded packages --- download/download.go | 131 ++++++++++++++++++++------------------ download/download_test.go | 3 +- download/github.go | 42 ++++++++++++ download/preview.go | 7 +- 4 files changed, 115 insertions(+), 68 deletions(-) create mode 100644 download/github.go diff --git a/download/download.go b/download/download.go index db93b36..8d8cc6f 100644 --- a/download/download.go +++ b/download/download.go @@ -3,11 +3,11 @@ package download import ( "archive/zip" - "errors" "fmt" "io" "net/http" "os" + "path/filepath" "runtime" "strings" @@ -18,8 +18,8 @@ import ( ) const ( - LatestVersion = "latest" - duckDbReleasesRoot = "https://github.com/duckdb/duckdb/releases" + LatestVersion = "latest" + PreviewVersion = "preview" ) type BinType int @@ -29,38 +29,67 @@ const ( BinTypeCli ) +// Prefix is found in the beginning of some archive and file names in DuckDB packages +func (typ BinType) Prefix() string { + var prefix string + switch typ { + case BinTypeCli: + prefix = "duckdb_cli" + case BinTypeDynLib: + prefix = "libduckdb" + default: + panic("unhandled spec type") + } + return prefix +} + +// Spec defines the desired DuckDB binary and download options +// Use DefaultSpec() to get a recommended configuration. The zero value is also valid. type Spec struct { // Type of binary to download (enum) Type BinType // DuckDB version, defaults to latest + // Supported values are either plain semantic version with optional 'v' prefix - e.g. 1.2.2, v1.3.2, + // or "latest" - latest release version + // or "preview" - latest preview version from https://duckdb.org/docs/installation/?version=main Version string - // Target OS defaults to runtime.GOOS + // Target OS, defaults to runtime.GOOS OS string - // Target arch defaults to runtime.GOARCH + // Target arch defaults, to runtime.GOARCH Arch string - // Overwrite forces downloading a file even if there is an existing appropriate in the working directory - // The definition of "appropriate" will evolve over time - for now, all existing files are accepted + // CacheDownload enables caching the bundle downloaded from the Internet in the temp directory, + // if the server supports it by exposing Etag and Content-Length headers. + // CacheDownload is independent of the Overwrite setting. + CacheDownload bool + + // Overwrite forces overwriting the final file even if there is an existing appropriate in the working directory + // The definition of "appropriate" will evolve over time - for now, all existing files are accepted. Overwrite bool } +// DefaultSpec creates a recommended spec for downloading releases +// The zero-value of Spec is also a valid configuration. +// NB: Changes to the default spec are not considered breaking changes and may happen in a +// minor release. They won't happen in patch releases. func DefaultSpec() Spec { return Spec{ - Type: BinTypeDynLib, - Version: LatestVersion, - OS: runtime.GOOS, - Arch: runtime.GOARCH, + Type: BinTypeDynLib, + Version: LatestVersion, + CacheDownload: true, + OS: runtime.GOOS, + Arch: runtime.GOARCH, } } type Result struct { OutputFile string - // Download may be false if there was an existing appropriate file and Spec.Overwrite was false + // OutputWritten may be false if there was an existing appropriate file and Spec.Overwrite was false // See Spec.Overwrite for details. - Downloaded bool + OutputWritten bool } // Do downloads a DuckDB release @@ -75,15 +104,17 @@ func Do(spec Spec) (Result, error) { if !spec.Overwrite && existsAppropriate(entryName) { return res, nil } - res.Downloaded = true + res.OutputWritten = true path := getZipDownloadUrl(spec) - tmpFile, err := fetchZip(path) + tmpFile, err := fetchZip(path, spec.CacheDownload) if err != nil { return res, err } - defer func() { - _ = os.Remove(tmpFile) - }() + if !spec.CacheDownload { + defer func() { + _ = os.Remove(tmpFile) + }() + } return res, processZip(spec, entryName, tmpFile) } @@ -108,24 +139,6 @@ func existsAppropriate(fileName string) bool { return err == nil && fi.Mode().IsRegular() } -func getGithubURL(spec Spec) string { - archivePrefix := getPrefixByType(spec.Type) - return fmt.Sprintf("%s/download/%s/%s-%s-%s.zip", duckDbReleasesRoot, spec.Version, archivePrefix, spec.OS, spec.Arch) -} - -func getPrefixByType(typ BinType) string { - var prefix string - switch typ { - case BinTypeCli: - prefix = "duckdb_cli" - case BinTypeDynLib: - prefix = "libduckdb" - default: - panic("unhandled spec type") - } - return prefix -} - func normalizeSpec(spec Spec) (Spec, error) { spec.Arch = strings.ToLower(spec.Arch) spec.OS = strings.ToLower(spec.OS) @@ -157,30 +170,6 @@ func normalizeSpec(spec Spec) (Spec, error) { return spec, err } -func getLatestVersionPath() (string, error) { - redirectErr := errors.New("redirect") - client := http.Client{ - CheckRedirect: func(req *http.Request, via []*http.Request) error { - return redirectErr - }, - } - const latestUrl = duckDbReleasesRoot + "/latest" - resp, err := client.Head(latestUrl) - if errors.Is(err, redirectErr) { - location := resp.Header.Get("Location") - prefix := duckDbReleasesRoot + "/tag/" - if !strings.HasPrefix(location, prefix) { - return "", fmt.Errorf("unexpected release redirect location: %s", location) - } - return location[len(prefix):], nil - } - if err != nil { - return "", fmt.Errorf("HEAD failed for %s: %w", latestUrl, err) - } - _ = resp.Body.Close() - return "", fmt.Errorf("redirect expected for %s but got code %d", latestUrl, resp.StatusCode) -} - func extractOne(zipFile string, name string) error { zipReader, err := zip.OpenReader(zipFile) if err != nil { @@ -254,7 +243,10 @@ func getCliName(targetOS string) string { return name } -func fetchZip(url string) (string, error) { +func fetchZip(url string, useEtag bool) (string, error) { + // It *may* be more efficient (for whom?) to issue a HEAD request first for the ETag and Content-Length. + // We can't use If-None-Match because we don't know in advance which cached file is for which spec. + // We could encode the entire spec in the cached file name but the complexity would not be worth it. resp, err := http.Get(url) if err != nil { return "", genericDownloadErr(url, err) @@ -262,8 +254,23 @@ func fetchZip(url string) (string, error) { if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("HTTP error when trying to download %s: %d", url, resp.StatusCode) } + etagHeader := resp.Header.Get("ETag") + contentLength := resp.ContentLength defer helperr.CloseQuietly(resp.Body) - tmpZip, err := os.CreateTemp("", "getaduck") + var tmpZip *os.File + if !useEtag && etagHeader != "" { + tmpZip, err = os.CreateTemp("", "getaduck") + } else { + fileName := fmt.Sprintf("getaduck.zip.etag_%s", etagHeader) + fileName = filepath.Join(os.TempDir(), fileName) + if info, statErr := os.Stat(fileName); statErr == nil { + if info.Size() == contentLength { + return fileName, nil + } + } + + tmpZip, err = os.Create(fileName) + } if err != nil { return "", fmt.Errorf("failed to create temp file: %w", err) } diff --git a/download/download_test.go b/download/download_test.go index b9084ae..ae16c39 100644 --- a/download/download_test.go +++ b/download/download_test.go @@ -9,7 +9,7 @@ import ( func TestDo(t *testing.T) { if testing.Short() { - t.Skip("skipping test that downloads from Github in short mode.") + t.Skip("skipping test that downloads from the Internet in short mode.") } t.Run("default lib", func(t *testing.T) { for _, version := range []string{ @@ -29,6 +29,7 @@ func TestDo(t *testing.T) { spec.Version = version spec.Arch = arch spec.Overwrite = true + spec.CacheDownload = true res, err := download.Do(spec) require.NoError(t, err) require.FileExists(t, res.OutputFile) diff --git a/download/github.go b/download/github.go new file mode 100644 index 0000000..37ed2c9 --- /dev/null +++ b/download/github.go @@ -0,0 +1,42 @@ +package download + +import ( + "errors" + "fmt" + "net/http" + "strings" +) + +// Code to download from Github - applies to semver and latest releases + +const ( + duckDbReleasesRoot = "https://github.com/duckdb/duckdb/releases" +) + +func getLatestVersionPath() (string, error) { + redirectErr := errors.New("redirect") + client := http.Client{ + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return redirectErr + }, + } + const latestUrl = duckDbReleasesRoot + "/latest" + resp, err := client.Head(latestUrl) + if errors.Is(err, redirectErr) { + location := resp.Header.Get("Location") + prefix := duckDbReleasesRoot + "/tag/" + if !strings.HasPrefix(location, prefix) { + return "", fmt.Errorf("unexpected release redirect location: %s", location) + } + return location[len(prefix):], nil + } + if err != nil { + return "", fmt.Errorf("HEAD failed for %s: %w", latestUrl, err) + } + _ = resp.Body.Close() + return "", fmt.Errorf("redirect expected for %s but got code %d", latestUrl, resp.StatusCode) +} + +func getGithubURL(spec Spec) string { + return fmt.Sprintf("%s/download/%s/%s-%s-%s.zip", duckDbReleasesRoot, spec.Version, spec.Type.Prefix(), spec.OS, spec.Arch) +} diff --git a/download/preview.go b/download/preview.go index 3334d39..c61a508 100644 --- a/download/preview.go +++ b/download/preview.go @@ -7,9 +7,7 @@ import ( "github.com/ansel1/merry/v2" ) -const ( - PreviewVersion = "preview" -) +// Downloading preview releases func getPreviewZipUrl(spec Spec) string { // https://artifacts.duckdb.org/latest/duckdb-binaries-osx.zip @@ -45,7 +43,6 @@ func getInnerZipName(spec Spec) string { // libduckdb-windows-amd64.zip // duckdb_cli-linux-amd64.zip // libduckdb-linux-amd64.zip - prefix := getPrefixByType(spec.Type) // For osx, spec.Arch has been normalized to universal in normalizeSpec - return fmt.Sprintf("%s-%s-%s.zip", prefix, spec.OS, spec.Arch) + return fmt.Sprintf("%s-%s-%s.zip", spec.Type.Prefix(), spec.OS, spec.Arch) } From 6f12a9faf2fe328486f414d6bfe3a4c89d791c12 Mon Sep 17 00:00:00 2001 From: Marin Nozhchev Date: Tue, 30 Sep 2025 16:37:23 +0300 Subject: [PATCH 2/2] fixup --- download/download.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/download/download.go b/download/download.go index 8d8cc6f..5da3fcd 100644 --- a/download/download.go +++ b/download/download.go @@ -3,6 +3,7 @@ package download import ( "archive/zip" + "encoding/base64" "fmt" "io" "net/http" @@ -261,7 +262,9 @@ func fetchZip(url string, useEtag bool) (string, error) { if !useEtag && etagHeader != "" { tmpZip, err = os.CreateTemp("", "getaduck") } else { - fileName := fmt.Sprintf("getaduck.zip.etag_%s", etagHeader) + // ETag may contain chars not allowed in filenames. + safeEtag := base64.URLEncoding.EncodeToString([]byte(etagHeader)) + fileName := fmt.Sprintf("getaduck.zip.etagbase64_%s", safeEtag) fileName = filepath.Join(os.TempDir(), fileName) if info, statErr := os.Stat(fileName); statErr == nil { if info.Size() == contentLength {