diff --git a/util/git/canon.go b/util/git/canon.go new file mode 100644 index 0000000..df76062 --- /dev/null +++ b/util/git/canon.go @@ -0,0 +1,55 @@ +package git + +import ( + "net/url" + "strings" +) + +// defaultPorts for supported schemes. These are published on +// https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml +// and available in /etc/services. +var defaultPorts = map[string]string{ + "ftp": "21", + "ssh": "22", + "http": "80", + "https": "443", + "ftps": "990", + "git": "9418", +} + +// canon returns a canonicalized URL for the given git repository URL. +// +// A shallow copy of the URL is made so that the original URL is not modified. +func canon(repo *url.URL) *url.URL { + u := *repo // shallow copy + + // Ensure the host is lowercased. + u.Host = strings.ToLower(u.Host) + + // Remove any deprecated git prefix or suffix. + if strings.HasSuffix(u.Scheme, "+git") { + u.Scheme = u.Scheme[:len(u.Scheme)-4] + } else if strings.HasPrefix(u.Scheme, "git+") { + u.Scheme = u.Scheme[4:] + } + + // Strip the default port if it is present. + if port, ok := defaultPorts[u.Scheme]; ok && u.Port() == port { + // Remove the port from the URL. There should always be a colon if the + // port is present. + // We do this rather than u.Host = u.Hostname() because the latter will + // remove the brackets from IPv6 addresses. + lastColon := strings.LastIndex(u.Host, ":") + if lastColon > 0 { + u.Host = u.Host[:lastColon] + } + } + + // Always strip passwords if they are present. They should never really be + // present for open source repositories. + if _, ok := u.User.Password(); ok { + u.User = url.User(u.User.Username()) + } + + return &u +} diff --git a/util/git/go.mod b/util/git/go.mod new file mode 100644 index 0000000..5ce7497 --- /dev/null +++ b/util/git/go.mod @@ -0,0 +1,3 @@ +module deps.dev/util/git + +go 1.23.4 diff --git a/util/git/hosts.go b/util/git/hosts.go new file mode 100644 index 0000000..a3ea17a --- /dev/null +++ b/util/git/hosts.go @@ -0,0 +1,197 @@ +package git + +import ( + "fmt" + "net/url" + "strings" +) + +// HostHandler defines the interface for host-specific validation and +// canonicalization of URLs. +type HostHandler interface { + // Validate checks if the URL is valid for this host. + Validate(u *url.URL) error + + // Canon canonicalizes the URL for this host. + Canon(u *url.URL) *url.URL +} + +var hostHandlers []struct { + exactHost string + handler HostHandler +} + +// RegisterHostHandler allows for the registration of a host handler +// that is triggered when the host of a URL exactly matches the given host, +// ignoring case. +// +// This method is not thread-safe, and is expected to be called from init(). +func RegisterHostHandler(exactHost string, handler HostHandler) { + hostHandlers = append(hostHandlers, struct { + exactHost string + handler HostHandler + }{ + exactHost: exactHost, + handler: handler, + }) +} + +// getHostHandler returns the registered handler for the host. +func getHostHandler(host string) HostHandler { + host = strings.ToLower(host) + for _, h := range hostHandlers { + if h.exactHost == host { + return h.handler + } + } + return noop +} + +// noopHostHandler is a HostHandler that does nothing. +type noopHostHandler struct{} + +func (h *noopHostHandler) Validate(u *url.URL) error { + return nil +} + +func (h *noopHostHandler) Canon(u *url.URL) *url.URL { + return u +} + +var noop = &noopHostHandler{} + +// StandardHostHandler implements HostHandler with common validation and +// canonicalization settings. +type StandardHostHandler struct { + // ForceScheme will replace any URL scheme with its value when set. + ForceScheme string + + // StripUser will remove the user from the URL when true. + StripUser bool + + // HasTrailingSlash will add a trailing slash to the URL if it doesn't have + // one. If false any trailing slash will be removed. + HasTrailingSlash bool + + // HasDotGitSuffix will add a .git suffix to the URL if it doesn't have one. + // If false any .git suffix will be removed. + HasDotGitSuffix bool + + // PathPrefix is the prefix that is required to be set for a valid git + // repository for this host. + PathPrefix string + + // PathSegments is the number of path segments that are required for a valid + // git repository for this host. If 0 there is no restriction. If PathPrefix + // is not empty, then only segments after the prefix are considered. + PathSegments int + + // LowerPathSegments is the number of path segments that should be lowercased + // following any PathPrefix, during the canonicalization process. This is to + // ensure that URLs for case-insensitive hosts are canonicalized correctly. + LowerPathSegments int +} + +// Validate implements the HostHandler interface. +func (h *StandardHostHandler) Validate(u *url.URL) error { + path := strings.TrimRight(u.Path, "/") + if h.PathPrefix != "" { + prefix := "/" + strings.Trim(h.PathPrefix, "/") + if !strings.HasPrefix(path, prefix) { + return fmt.Errorf("invalid path prefix: must start with %q", h.PathPrefix) + } + path = strings.TrimPrefix(path, prefix) + } + + // Any slashes at the end of the path are stripped above, we strip any at the + // start so that we can count the remaining path segments correctly. + path = strings.TrimLeft(path, "/") + var segments []string + if path != "" { + segments = strings.Split(path, "/") + } + + if h.PathSegments > 0 && len(segments) != h.PathSegments { + return fmt.Errorf("incorrect number of path segemnts") + } + return nil +} + +// Canon implements the HostHandler interface. +func (h *StandardHostHandler) Canon(u *url.URL) *url.URL { + res := *u // shallow copy + + if h.ForceScheme != "" { + res.Scheme = h.ForceScheme + } + if h.StripUser { + res.User = nil + } + + if h.LowerPathSegments > 0 { + // Strip the prefix and trailing slash so that we can count the remaining path segments correctly. + path := strings.TrimRight(res.Path, "/") + prefix := "" + if h.PathPrefix != "" { + prefix = "/" + strings.Trim(h.PathPrefix, "/") + path = strings.TrimPrefix(path, prefix) + } + + // Split the remaining path into segments and lowercase the first N segments. + segments := strings.Split(strings.TrimLeft(path, "/"), "/") + for i := 0; i < len(segments) && i < h.LowerPathSegments; i++ { + segments[i] = strings.ToLower(segments[i]) + } + + // Reconstruct the path with the prefix and the lowercased segments. + res.Path = prefix + "/" + strings.Join(segments, "/") + } + + if h.HasDotGitSuffix { + if !strings.HasSuffix(res.Path, ".git") { + res.Path += ".git" + } + } else { + res.Path = strings.TrimSuffix(res.Path, ".git") + } + + if h.HasTrailingSlash { + if !strings.HasSuffix(res.Path, "/") { + res.Path += "/" + } + } else { + res.Path = strings.TrimSuffix(res.Path, "/") + } + + return &res +} + +var defaultHostHandler = &StandardHostHandler{ + ForceScheme: "https", + StripUser: true, + HasDotGitSuffix: true, + LowerPathSegments: 2, + PathSegments: 2, +} + +func init() { + // Register default handlers for well-known hosts. + RegisterHostHandler("github.com", defaultHostHandler) + RegisterHostHandler("gitlab.com", defaultHostHandler) + RegisterHostHandler("bitbucket.org", defaultHostHandler) + + RegisterHostHandler("gitee.com", &StandardHostHandler{ + ForceScheme: "https", + StripUser: true, + HasDotGitSuffix: true, + LowerPathSegments: 1, + PathSegments: 2, + }) + RegisterHostHandler("gitee.cn", &StandardHostHandler{ + ForceScheme: "https", + StripUser: true, + HasDotGitSuffix: true, + LowerPathSegments: 1, + PathSegments: 2, + }) +} diff --git a/util/git/hosts_test.go b/util/git/hosts_test.go new file mode 100644 index 0000000..3f560b4 --- /dev/null +++ b/util/git/hosts_test.go @@ -0,0 +1,157 @@ +package git + +import ( + "net/url" + "testing" +) + +func TestGetHostHandler(t *testing.T) { + // Replace all the handlers so we can corrcetly test the noop behavior. + oldHandlers := hostHandlers + defer func() { hostHandlers = oldHandlers }() + hostHandlers = nil + + h1 := &StandardHostHandler{} + + RegisterHostHandler("example.com", h1) + + tests := []struct { + host string + want HostHandler + }{ + {"example.com", h1}, + {"foo.example.com", noop}, + {"other.com", noop}, + } + + for _, tt := range tests { + got := getHostHandler(tt.host) + if got != tt.want { + t.Errorf("getHostHandler(%q) = %p; want %p", tt.host, got, tt.want) + } + } +} + +func TestStandardHostHandler_Validate(t *testing.T) { + tests := []struct { + name string + handler StandardHostHandler + url string + wantErr bool + }{ + { + name: "no restrictions", + handler: StandardHostHandler{}, + url: "https://example.com/foo/bar", + wantErr: false, + }, + { + name: "segments OK", + handler: StandardHostHandler{PathSegments: 2}, + url: "https://example.com/foo/bar", + wantErr: false, + }, + { + name: "segments Fail", + handler: StandardHostHandler{PathSegments: 3}, + url: "https://example.com/foo/bar", + wantErr: true, + }, + { + name: "prefix OK", + handler: StandardHostHandler{PathPrefix: "/git/"}, + url: "https://example.com/git/repo", + wantErr: false, + }, + { + name: "invalid prefix", + handler: StandardHostHandler{PathPrefix: "/git/"}, + url: "https://example.com/other/repo", + wantErr: true, + }, + { + name: "prefix and segments OK", + handler: StandardHostHandler{PathPrefix: "/git/", PathSegments: 1}, + url: "https://example.com/git/repo", + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + u, _ := url.Parse(tt.url) + err := tt.handler.Validate(u) + if (err != nil) != tt.wantErr { + t.Errorf("Validate() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func TestStandardHostHandler_Canon(t *testing.T) { + tests := []struct { + name string + handler StandardHostHandler + url string + want string + }{ + { + name: "force scheme", + handler: StandardHostHandler{ForceScheme: "https"}, + url: "http://example.com/foo", + want: "https://example.com/foo", + }, + { + name: "strip user", + handler: StandardHostHandler{StripUser: true}, + url: "https://user:pass@example.com/foo", + want: "https://example.com/foo", + }, + { + name: "lower path segments", + handler: StandardHostHandler{LowerPathSegments: 2}, + url: "https://example.com/Foo/Bar/Baz", + want: "https://example.com/foo/bar/Baz", + }, + { + name: "has dot git suffix (add)", + handler: StandardHostHandler{HasDotGitSuffix: true}, + url: "https://example.com/foo/bar", + want: "https://example.com/foo/bar.git", + }, + { + name: "has dot git suffix (remove)", + handler: StandardHostHandler{HasDotGitSuffix: false}, + url: "https://example.com/foo/bar.git", + want: "https://example.com/foo/bar", + }, + { + name: "has trailing slash (add)", + handler: StandardHostHandler{HasTrailingSlash: true}, + url: "https://example.com/foo/bar", + want: "https://example.com/foo/bar/", + }, + { + name: "has trailing slash (remove)", + handler: StandardHostHandler{HasTrailingSlash: false}, + url: "https://example.com/foo/bar/", + want: "https://example.com/foo/bar", + }, + { + name: "prefix and lower path segments", + handler: StandardHostHandler{PathPrefix: "/git/", LowerPathSegments: 1}, + url: "https://example.com/git/Foo/Bar", + want: "https://example.com/git/foo/Bar", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + u, _ := url.Parse(tt.url) + got := tt.handler.Canon(u) + if got.String() != tt.want { + t.Errorf("Canon() = %v; want %v", got.String(), tt.want) + } + }) + } +} diff --git a/util/git/parse.go b/util/git/parse.go new file mode 100644 index 0000000..d92d72f --- /dev/null +++ b/util/git/parse.go @@ -0,0 +1,192 @@ +// Package git provides utilities for working with git repositories. +package git + +import ( + "errors" + "fmt" + "net/url" + "regexp" + "slices" + "strings" +) + +var validGitRemoteSchemes = []string{ + "http", + "https", + "ftp", + "ftps", + "ssh", + "git", +} + +var schemeRequiresPath = []string{ + "ssh", + "git", +} + +// ErrInvalidRepo indicates that the provided string is not a valid git +// repository name or URL. +var ErrInvalidRepo = errors.New("invalid git repository") + +// schemeRegexp matches schemes the same way git does in is_urlschemechar(). It +// is more permissive than RFC3986 as it can start with digits. +var schemeRegexp = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9+.-]*`) + +// gitRemoteHelper matches any custom transport using gitremote-helpers via the +// ::
syntax. It returns the transport name and true if a +// match is found. +func gitRemoteHelper(name string) (string, bool) { + loc := schemeRegexp.FindStringIndex(name) + if loc != nil && loc[0] == 0 && loc[1]+1 < len(name) && name[loc[1]] == ':' && name[loc[1]+1] == ':' { + return name[:loc[1]], true + } + return "", false +} + +// isGitURL checks if the given string is a URL according to the same logic +// used by the git source code in url.c. +func isGitURL(url string) bool { + if url == "" { + // An empty string is not a git URL. + return false + } + + loc := schemeRegexp.FindStringIndex(url) + if loc == nil || loc[0] != 0 || loc[1] == 0 { + // Either there was no scheme found, or the location of the scheme is not + // anchored to the beginning of the URL. + return false + } + + // Check if there is a '://' immediately following the scheme. + return loc[1]+2 < len(url) && url[loc[1]] == ':' && url[loc[1]+1] == '/' && url[loc[1]+2] == '/' +} + +// isSCP checks if a url is SCP-like by checking if the colon appears before any +// slash. This mimics the logic of the git source code in connect.c, but +// without the local file checking. +func isSCP(url string) bool { + colonPos := strings.Index(url, ":") + slashPos := strings.Index(url, "/") + return colonPos > 0 && (slashPos < 0 || colonPos < slashPos) +} + +// parseSCP parses an SCP-style git remote name into a URL. It is intended to be +// used by ParseRemote after isSCP has returned true. +// The logic here is based off the git source code in connect.c. +func parseSCP(name string) (*url.URL, error) { + // Handle the case where the host is an IPv6 address. These are surrounded in + // square brackets. There is also a possibility that there is a user and an + // "@" before the IPv6 address that needs to be accounted for. + index := 0 + if bracketOpenPos := strings.Index(name, "@["); bracketOpenPos >= 0 { + // Jump over the "@" to get to the start of the IPv6 address. + index = bracketOpenPos + 1 + } + if name[index] == '[' { + // We have found an IPv6 address. Find the closing bracket. + bracketClosedOff := strings.IndexRune(name[index:], ']') + if bracketClosedOff >= 0 { + // Found the closing bracket, so move the index past it. + index += bracketClosedOff + 1 + } + } + // Now we can safely hunt for the colon separating the user+host from the + // path. SCP URLs do not have ports, so we do not have to worry about them. + colonPos := strings.IndexRune(name[index:], ':') + if colonPos < 0 { + return nil, fmt.Errorf("no colon in %q", name) + } + // Adjust the colon position to be relative to the start of the string. + index += colonPos + + userHost := name[:index] + path := name[index+1:] + + if len(path) > 0 && path[0] == '/' { + // Remove preceeding slash so that we don't end up with a double slash. + path = path[1:] + } + + // Formulate a text URL we can parse. + urlStr := fmt.Sprintf("ssh://%s/%s", userHost, path) + + return url.Parse(urlStr) +} + +// parse parses a git repository name and returns a URL. +// +// Both URL and SCP-like remote names are supported. +// +// Note that this function is intended to be used for processing open source git +// repository data, not for private git repositories or git repositories that +// are intended to be used. +// +// Custom gitremote-helpers are explicitly rejected. Local file paths and +// bundles are also rejected. +func parse(name string) (*url.URL, error) { + // Reject any custom gitremote-helpers (https://git-scm.com/docs/gitremote-helpers) + // following the same logic as the git source code to ensure the same behavior. + if helper, ok := gitRemoteHelper(name); ok { + return nil, fmt.Errorf("%w: custom transport %q", ErrInvalidRepo, helper) + } + + var u *url.URL + var err error + + if isGitURL(name) { + u, err = url.Parse(name) + if err != nil { + return nil, fmt.Errorf("%w: url parsing: %w", ErrInvalidRepo, err) + } + } else if isSCP(name) { + u, err = parseSCP(name) + if err != nil { + return nil, fmt.Errorf("%w: scp parsing: %w", ErrInvalidRepo, err) + } + } else { + return nil, fmt.Errorf("%w: unable to parse %q", ErrInvalidRepo, name) + } + + // Extract the scheme so we can manipulate it before validation, but we + // preserve the original as we are not canonicalizing the URL here. + scheme := u.Scheme + + // Remove any deprecated or unnecessary suffix or prefix, as these can be added + // from package manifests, or referencing the deprecated "git+ssh" or "ssh+git" + // schemes. + if strings.HasSuffix(scheme, "+git") { + scheme = scheme[:len(scheme)-4] + } else if strings.HasPrefix(scheme, "git+") { + scheme = scheme[4:] + } + + // Explicitly reject the file scheme to ensure the error message is useful. + if scheme == "file" { + return nil, fmt.Errorf("%w: file scheme not supported", ErrInvalidRepo) + } + + // Validate the scheme is a scheme that is supported natively by git. Open + // source repositories should not be using custom gitremote-helpers. + if !slices.Contains(validGitRemoteSchemes, scheme) { + return nil, fmt.Errorf("%w: custom transport %q", ErrInvalidRepo, u.Scheme) + } + + // For schemes the require a path ensure they have a path component. + if slices.Contains(schemeRequiresPath, scheme) && u.Path == "" { + return nil, fmt.Errorf("%w: %q scheme requires a path", ErrInvalidRepo, u.Scheme) + } + + // For the git scheme ensure there is no user info, as it is an unauthenticated + // transport. + if scheme == "git" && u.User != nil { + return nil, fmt.Errorf("%w: git scheme has authentication", ErrInvalidRepo) + } + + // Ensure the host is set. It is the only required component of a git remote name. + if u.Host == "" { + return nil, fmt.Errorf("%w: missing host", ErrInvalidRepo) + } + + return u, nil +} diff --git a/util/git/parse_test.go b/util/git/parse_test.go new file mode 100644 index 0000000..bf9235d --- /dev/null +++ b/util/git/parse_test.go @@ -0,0 +1,141 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +import ( + "errors" + "testing" +) + +func TestParseRemote(t *testing.T) { + tests := []struct { + name string + want string // Expected u.String() + wantErr error + }{ + { + name: "https://github.com/user/repo.git", + want: "https://github.com/user/repo.git", + }, + { + name: "ssh://git@github.com/user/repo.git", + want: "ssh://git@github.com/user/repo.git", + }, + { + name: "git@github.com:user/repo.git", + want: "ssh://git@github.com/user/repo.git", + }, + { + name: "git@github.com:/user/repo.git", + want: "ssh://git@github.com/user/repo.git", + }, + { + name: "git@[2001:db8::1]:user/repo.git", + want: "ssh://git@[2001:db8::1]/user/repo.git", + }, + { + name: "[2001:db8::1]:user/repo.git", + want: "ssh://[2001:db8::1]/user/repo.git", + }, + { + name: "git://github.com/user/repo.git", + want: "git://github.com/user/repo.git", + }, + { + name: "ftp://example.com/repo.git", + want: "ftp://example.com/repo.git", + }, + { + name: "git+https://github.com/user/repo.git", + want: "git+https://github.com/user/repo.git", + }, + { + name: "ssh+git://git@github.com/user/repo", + want: "ssh+git://git@github.com/user/repo", + }, + { + name: "host:", + want: "ssh://host/", + }, + { + name: "", + wantErr: ErrInvalidRepo, + }, + { + name: "transport", + wantErr: ErrInvalidRepo, + }, + { + name: "hg::https://example.com/repo", + wantErr: ErrInvalidRepo, + }, + { + name: "file:///path/to/repo", + wantErr: ErrInvalidRepo, + }, + { + name: "something://github.com/user/repo", + wantErr: ErrInvalidRepo, + }, + { + name: "ssh://git@github.com", + wantErr: ErrInvalidRepo, + }, + { + name: "git://github.com", + wantErr: ErrInvalidRepo, + }, + { + name: "git://user@github.com/repo", + wantErr: ErrInvalidRepo, + }, + { + name: "git:///repo", + wantErr: ErrInvalidRepo, + }, + { + name: "git@github.com/user/repo", + wantErr: ErrInvalidRepo, + }, + { + name: "git@[2001:db8::1abc/test", + wantErr: ErrInvalidRepo, + }, + { + name: "https://example.com:1234a/test", + wantErr: ErrInvalidRepo, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parse(tt.name) + if tt.wantErr != nil { + if err == nil { + t.Fatalf("parse(%q) = %v; wantErr %v", tt.name, got, tt.wantErr) + } else if !errors.Is(err, tt.wantErr) { + t.Fatalf("parse(%q) error = %v; wantErr %v", tt.name, err, tt.wantErr) + } + return + } + if err != nil { + t.Fatalf("parse(%q) unexpected error: %v", tt.name, err) + } + if got.String() != tt.want { + t.Errorf("parse(%q) = %v; want %v", tt.name, got, tt.want) + } + }) + } +} diff --git a/util/git/repo.go b/util/git/repo.go new file mode 100644 index 0000000..15f6709 --- /dev/null +++ b/util/git/repo.go @@ -0,0 +1,76 @@ +package git + +import ( + "fmt" + "net/url" + "strings" +) + +// Repo represents a parsed and canonicalized git repository. +type Repo struct { + raw string + parsed *url.URL + canonical *url.URL +} + +// ParseRepo parses a git repository URL or SCP-like string and returns a Repo. +// +// It validates the repository against registered host handlers and computes a +// canonical URL. +// +// If the repository is not valid, an error is returned. +func ParseRepo(name string) (*Repo, error) { + parsed, err := parse(name) + if err != nil { + return nil, err + } + + h := getHostHandler(parsed.Hostname()) + + // Carry out any host-based validation. + if err := h.Validate(parsed); err != nil { + return nil, fmt.Errorf("%w: host validation: %w", ErrInvalidRepo, err) + } + + return &Repo{ + raw: name, + parsed: parsed, + canonical: h.Canon(canon(parsed)), + }, nil +} + +// Canon returns the canonicalized URL of the repository. +func (r *Repo) Canon() *url.URL { + return r.canonical +} + +// Parsed returns the parsed URL of the repository (preserving case and +// structure from parsing). +func (r *Repo) Parsed() *url.URL { + return r.parsed +} + +// Raw returns the original raw string used to parse the repository. +func (r *Repo) Raw() string { + return r.raw +} + +// ID returns a string that can be used to identify the repository. +// +// The ID is based on the canonical URL, but is made up of the host and path +// only. +// The ID is unsuitable for interacting with the repository. +func (r *Repo) ID() string { + u := *(r.canonical) // Shallow copy + + host := u.Hostname() + if strings.ContainsRune(host, ':') { + // Add square brackets around IPv6 addresses. + host = "[" + host + "]" + } + + path := strings.TrimSuffix(u.EscapedPath(), ".git") + path = strings.TrimSuffix(path, "/") + + return host + path +} diff --git a/util/git/repo_test.go b/util/git/repo_test.go new file mode 100644 index 0000000..7335fef --- /dev/null +++ b/util/git/repo_test.go @@ -0,0 +1,130 @@ +package git + +import ( + "errors" + "testing" +) + +func TestParseRepo(t *testing.T) { + tests := []struct { + name string + input string + wantCanon string + wantParsed string + wantErr error + }{ + { + name: "github happy path", + input: "https://github.com/user/repo.git", + wantCanon: "https://github.com/user/repo.git", + wantParsed: "https://github.com/user/repo.git", + }, + { + name: "github ssh to https", + input: "git@github.com:user/repo.git", + wantCanon: "https://github.com/user/repo.git", // defaultHostHandler forces https + wantParsed: "ssh://git@github.com/user/repo.git", + }, + { + name: "github mixed case", + input: "https://Github.com/User/Repo.git", + wantCanon: "https://github.com/user/repo.git", // lowerPathSegments = 2 + wantParsed: "https://Github.com/User/Repo.git", + }, + { + name: "gitee happy path", + input: "https://gitee.com/user/repo.git", + wantCanon: "https://gitee.com/user/repo.git", + wantParsed: "https://gitee.com/user/repo.git", + }, + { + name: "gitee lower path segments", + input: "https://gitee.com/User/Repo.git", + wantCanon: "https://gitee.com/user/Repo.git", // lowerPathSegments = 1 + wantParsed: "https://gitee.com/User/Repo.git", + }, + { + name: "invalid repo", + input: "not-a-repo", + wantErr: ErrInvalidRepo, + }, + { + name: "github invalid path segments", + input: "https://github.com/only-one", + wantErr: ErrInvalidRepo, // defaultHostHandler requires 2 segments + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r, err := ParseRepo(tt.input) + if tt.wantErr != nil { + if err == nil { + t.Fatalf("ParseRepo(%q) = %v; wantErr %v", tt.input, r, tt.wantErr) + } else if !errors.Is(err, tt.wantErr) { + t.Fatalf("ParseRepo(%q) error = %v; wantErr %v", tt.input, err, tt.wantErr) + } + return + } + if err != nil { + t.Fatalf("ParseRepo(%q) unexpected error: %v", tt.input, err) + } + + if got := r.Canon().String(); got != tt.wantCanon { + t.Errorf("Canon() = %v; want %v", got, tt.wantCanon) + } + if got := r.Parsed().String(); got != tt.wantParsed { + t.Errorf("Parsed() = %v; want %v", got, tt.wantParsed) + } + if got := r.Raw(); got != tt.input { + t.Errorf("Raw() = %v; want %v", got, tt.input) + } + }) + } +} + +func TestRepo_ID(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "standard github", + input: "https://github.com/user/repo.git", + want: "github.com/user/repo", + }, + { + name: "github ssh", + input: "git@github.com:user/repo.git", + want: "github.com/user/repo", + }, + { + name: "ipv6 host", + input: "ssh://git@[2001:db8::1]/user/repo.git", + want: "[2001:db8::1]/user/repo", + }, + { + name: "no dot git", + input: "https://github.com/user/repo", + want: "github.com/user/repo", + }, + { + name: "trailing slash", + input: "https://github.com/user/repo/", + want: "github.com/user/repo", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r, err := ParseRepo(tt.input) + if err != nil { + t.Fatalf("ParseRepo(%q) unexpected error: %v", tt.input, err) + } + if got := r.ID(); got != tt.want { + t.Errorf("ID() = %v; want %v", got, tt.want) + } + }) + } +}