Skip to content
Snippets Groups Projects
Commit 1050328b authored by ale's avatar ale
Browse files

Merge branch 'renovate/github.com-puerkitobio-purell-1.x' into 'master'

Update module github.com/PuerkitoBio/purell to v1

See merge request !6
parents bd810938 55b2a516
No related branches found
No related tags found
1 merge request!6Update module github.com/PuerkitoBio/purell to v1
Pipeline #42517 passed
Showing
with 25319 additions and 77 deletions
......@@ -4,7 +4,7 @@ go 1.15
require (
github.com/PuerkitoBio/goquery v1.8.0
github.com/PuerkitoBio/purell v0.1.0
github.com/PuerkitoBio/purell v1.2.0
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/google/go-cmp v0.5.6
github.com/google/uuid v1.1.1 // indirect
......
......@@ -8,6 +8,8 @@ github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597 h1:1H3FyRw7YsqI
github.com/PuerkitoBio/purell v0.0.0-20180310210909-975f53781597/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v0.1.0 h1:N8Bcc53nei5frgNYgAKo93qMUVdU5LUGHCBv8efdVcM=
github.com/PuerkitoBio/purell v0.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v1.2.0 h1:/Jdm5QfyM8zdlqT6WVZU4cfP23sot6CEHA4CS49Ezig=
github.com/PuerkitoBio/purell v1.2.0/go.mod h1:OhLRTaaIzhvIyofkJfB24gokC7tM42Px5UhoT32THBk=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
......@@ -58,6 +60,8 @@ golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GE
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220617184016-355a448f1bc9 h1:Yqz/iviulwKwAREEeUd3nbBFn0XuyJqkoft2IlrvOhc=
golang.org/x/net v0.0.0-20220617184016-355a448f1bc9/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
......@@ -65,10 +69,15 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
......
*.sublime-*
.DS_Store
*.swp
*.swo
tags
language: go
Copyright (c) 2012, Martin Angers
Copyright (c) 2012-2022, The Go Authors
Copyright (c) 2012-2022, Martin Angers, Yuki Okushi & Contributors
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
......
......@@ -4,12 +4,21 @@ Purell is a tiny Go library to normalize URLs. It returns a pure URL. Pure-ell.
Based on the [wikipedia paper][wiki] and the [RFC 3986 document][rfc].
[![build status](https://secure.travis-ci.org/PuerkitoBio/purell.png)](http://travis-ci.org/PuerkitoBio/purell)
[![CI](https://github.com/PuerkitoBio/purell/actions/workflows/ci.yml/badge.svg)](https://github.com/PuerkitoBio/purell/actions/workflows/ci.yml)
## Install
`go get github.com/PuerkitoBio/purell`
## Changelog
* **v1.1.1** : Fix failing test due to Go1.12 changes (thanks to @ianlancetaylor).
* **2016-11-14 (v1.1.0)** : IDN: Conform to RFC 5895: Fold character width (thanks to @beeker1121).
* **2016-07-27 (v1.0.0)** : Normalize IDN to ASCII (thanks to @zenovich).
* **2015-02-08** : Add fix for relative paths issue ([PR #5][pr5]) and add fix for unnecessary encoding of reserved characters ([see issue #7][iss7]).
* **v0.2.0** : Add benchmarks, Attempt IDN support.
* **v0.1.0** : Initial release.
## Examples
From `example_test.go` (note that in your code, you would import "github.com/PuerkitoBio/purell", and would prefix references to its methods and constants with "purell."):
......@@ -58,53 +67,54 @@ As seen in the examples above, purell offers three methods, `NormalizeURLString(
```go
const (
// Safe normalizations
FlagLowercaseScheme NormalizationFlags = 1 << iota // HTTP://host -> http://host
FlagLowercaseHost // http://HOST -> http://host
FlagUppercaseEscapes // http://host/t%ef -> http://host/t%EF
FlagDecodeUnnecessaryEscapes // http://host/t%41 -> http://host/tA
FlagRemoveDefaultPort // http://host:80 -> http://host
FlagRemoveEmptyQuerySeparator // http://host/path? -> http://host/path
// Usually safe normalizations
FlagRemoveTrailingSlash // http://host/path/ -> http://host/path
FlagAddTrailingSlash // http://host/path -> http://host/path/ (should choose only one of these add/remove trailing slash flags)
FlagRemoveDotSegments // http://host/path/./a/b/../c -> http://host/path/a/c
// Unsafe normalizations
FlagRemoveDirectoryIndex // http://host/path/index.html -> http://host/path/
FlagRemoveFragment // http://host/path#fragment -> http://host/path
FlagForceHTTP // https://host -> http://host
FlagRemoveDuplicateSlashes // http://host/path//a///b -> http://host/path/a/b
FlagRemoveWWW // http://www.host/ -> http://host/
FlagAddWWW // http://host/ -> http://www.host/ (should choose only one of these add/remove WWW flags)
FlagSortQuery // http://host/path?c=3&b=2&a=1&b=1 -> http://host/path?a=1&b=1&b=2&c=3
// Normalizations not in the wikipedia article, required to cover tests cases
// submitted by jehiah
FlagDecodeDWORDHost // http://1113982867 -> http://66.102.7.147
FlagDecodeOctalHost // http://0102.0146.07.0223 -> http://66.102.7.147
FlagDecodeHexHost // http://0x42660793 -> http://66.102.7.147
FlagRemoveUnnecessaryHostDots // http://.host../path -> http://host/path
FlagRemoveEmptyPortSeparator // http://host:/path -> http://host/path
// Convenience set of safe normalizations
FlagsSafe NormalizationFlags = FlagLowercaseHost | FlagLowercaseScheme | FlagUppercaseEscapes | FlagDecodeUnnecessaryEscapes | FlagRemoveDefaultPort | FlagRemoveEmptyQuerySeparator
// For convenience sets, "greedy" uses the "remove trailing slash" and "remove www. prefix" flags,
// while "non-greedy" uses the "add (or keep) the trailing slash" and "add www. prefix".
// Convenience set of usually safe normalizations (includes FlagsSafe)
FlagsUsuallySafeGreedy NormalizationFlags = FlagsSafe | FlagRemoveTrailingSlash | FlagRemoveDotSegments
FlagsUsuallySafeNonGreedy NormalizationFlags = FlagsSafe | FlagAddTrailingSlash | FlagRemoveDotSegments
// Convenience set of unsafe normalizations (includes FlagsUsuallySafe)
FlagsUnsafeGreedy NormalizationFlags = FlagsUsuallySafeGreedy | FlagRemoveDirectoryIndex | FlagRemoveFragment | FlagForceHTTP | FlagRemoveDuplicateSlashes | FlagRemoveWWW | FlagSortQuery
FlagsUnsafeNonGreedy NormalizationFlags = FlagsUsuallySafeNonGreedy | FlagRemoveDirectoryIndex | FlagRemoveFragment | FlagForceHTTP | FlagRemoveDuplicateSlashes | FlagAddWWW | FlagSortQuery
// Convenience set of all available flags
FlagsAllGreedy = FlagsUnsafeGreedy | FlagDecodeDWORDHost | FlagDecodeOctalHost | FlagDecodeHexHost | FlagRemoveUnnecessaryHostDots | FlagRemoveEmptyPortSeparator
FlagsAllNonGreedy = FlagsUnsafeNonGreedy | FlagDecodeDWORDHost | FlagDecodeOctalHost | FlagDecodeHexHost | FlagRemoveUnnecessaryHostDots | FlagRemoveEmptyPortSeparator
// Safe normalizations
FlagLowercaseScheme NormalizationFlags = 1 << iota // HTTP://host -> http://host, applied by default in Go1.1
FlagLowercaseHost // http://HOST -> http://host
FlagUppercaseEscapes // http://host/t%ef -> http://host/t%EF
FlagDecodeUnnecessaryEscapes // http://host/t%41 -> http://host/tA
FlagEncodeNecessaryEscapes // http://host/!"#$ -> http://host/%21%22#$
FlagRemoveDefaultPort // http://host:80 -> http://host
FlagRemoveEmptyQuerySeparator // http://host/path? -> http://host/path
// Usually safe normalizations
FlagRemoveTrailingSlash // http://host/path/ -> http://host/path
FlagAddTrailingSlash // http://host/path -> http://host/path/ (should choose only one of these add/remove trailing slash flags)
FlagRemoveDotSegments // http://host/path/./a/b/../c -> http://host/path/a/c
// Unsafe normalizations
FlagRemoveDirectoryIndex // http://host/path/index.html -> http://host/path/
FlagRemoveFragment // http://host/path#fragment -> http://host/path
FlagForceHTTP // https://host -> http://host
FlagRemoveDuplicateSlashes // http://host/path//a///b -> http://host/path/a/b
FlagRemoveWWW // http://www.host/ -> http://host/
FlagAddWWW // http://host/ -> http://www.host/ (should choose only one of these add/remove WWW flags)
FlagSortQuery // http://host/path?c=3&b=2&a=1&b=1 -> http://host/path?a=1&b=1&b=2&c=3
// Normalizations not in the wikipedia article, required to cover tests cases
// submitted by jehiah
FlagDecodeDWORDHost // http://1113982867 -> http://66.102.7.147
FlagDecodeOctalHost // http://0102.0146.07.0223 -> http://66.102.7.147
FlagDecodeHexHost // http://0x42660793 -> http://66.102.7.147
FlagRemoveUnnecessaryHostDots // http://.host../path -> http://host/path
FlagRemoveEmptyPortSeparator // http://host:/path -> http://host/path
// Convenience set of safe normalizations
FlagsSafe NormalizationFlags = FlagLowercaseHost | FlagLowercaseScheme | FlagUppercaseEscapes | FlagDecodeUnnecessaryEscapes | FlagEncodeNecessaryEscapes | FlagRemoveDefaultPort | FlagRemoveEmptyQuerySeparator
// For convenience sets, "greedy" uses the "remove trailing slash" and "remove www. prefix" flags,
// while "non-greedy" uses the "add (or keep) the trailing slash" and "add www. prefix".
// Convenience set of usually safe normalizations (includes FlagsSafe)
FlagsUsuallySafeGreedy NormalizationFlags = FlagsSafe | FlagRemoveTrailingSlash | FlagRemoveDotSegments
FlagsUsuallySafeNonGreedy NormalizationFlags = FlagsSafe | FlagAddTrailingSlash | FlagRemoveDotSegments
// Convenience set of unsafe normalizations (includes FlagsUsuallySafe)
FlagsUnsafeGreedy NormalizationFlags = FlagsUsuallySafeGreedy | FlagRemoveDirectoryIndex | FlagRemoveFragment | FlagForceHTTP | FlagRemoveDuplicateSlashes | FlagRemoveWWW | FlagSortQuery
FlagsUnsafeNonGreedy NormalizationFlags = FlagsUsuallySafeNonGreedy | FlagRemoveDirectoryIndex | FlagRemoveFragment | FlagForceHTTP | FlagRemoveDuplicateSlashes | FlagAddWWW | FlagSortQuery
// Convenience set of all available flags
FlagsAllGreedy = FlagsUnsafeGreedy | FlagDecodeDWORDHost | FlagDecodeOctalHost | FlagDecodeHexHost | FlagRemoveUnnecessaryHostDots | FlagRemoveEmptyPortSeparator
FlagsAllNonGreedy = FlagsUnsafeNonGreedy | FlagDecodeDWORDHost | FlagDecodeOctalHost | FlagDecodeHexHost | FlagRemoveUnnecessaryHostDots | FlagRemoveEmptyPortSeparator
)
```
......@@ -114,7 +124,7 @@ The [full godoc reference is available on gopkgdoc][godoc].
Some things to note:
* `FlagDecodeUnnecessaryEscapes`, `FlagEncodeNecessaryEscapes`, `FlagUppercaseEscapes` and `FlagRemoveEmptyQuerySeparator` are always implicitly set, because internally, the URL string is parsed as an URL object, which automatically decodes unnecessary escapes, uppercases and encodes necessary ones, and removes empty query separators (an unnecessary `?` at the end of the url). So this operation cannot **not** be done. For this reason, `FlagRemoveEmptyQuerySeparator` (as well as the other three) has been included in the `FlagsSafe` convenience set, instead of `FlagsUnsafe`, where Wikipedia puts it (strangely?).
* `FlagDecodeUnnecessaryEscapes`, `FlagEncodeNecessaryEscapes`, `FlagUppercaseEscapes` and `FlagRemoveEmptyQuerySeparator` are always implicitly set, because internally, the URL string is parsed as an URL object, which automatically decodes unnecessary escapes, uppercases and encodes necessary ones, and removes empty query separators (an unnecessary `?` at the end of the url). So this operation cannot **not** be done. For this reason, `FlagRemoveEmptyQuerySeparator` (as well as the other three) has been included in the `FlagsSafe` convenience set, instead of `FlagsUnsafe`, where Wikipedia puts it.
* The `FlagDecodeUnnecessaryEscapes` decodes the following escapes (*from -> to*):
- %24 -> $
......@@ -125,7 +135,6 @@ Some things to note:
- %5F -> _
- %61-%7A -> abcdefghijklmnopqrstuvwxyz
- %7E -> ~
- When the test runs on Travis-ci, it fails because more escapes are decoded than on my machine, so it is either machine/OS-dependent or it is because they run a different Go version (**which they do, they run go1 while I run go1.0.2** so this is very likely the cause). To avoid a failing build banner on GitHub, I commented-out these specific tests (`TestDecodeUnnecessaryEscapesAll()`, `TestEncodeNecessaryEscapesAll()`, `TestGoVersion()`).
* When the `NormalizeURL` function is used (passing an URL object), this source URL object is modified (that is, after the call, the URL object will be modified to reflect the normalization).
......@@ -156,13 +165,16 @@ And with `FlagsUnsafeGreedy`:
## TODOs
* Try to make jehiah's tests pass (more exactly, support IDNA domains/utf8/unicode encoding/escaping). For now theses tests are commented out.
* Add a class/default instance to allow specifying custom directory index names? At the moment, removing directory index removes `(^|/)((?:default|index)\.\w{1,4})$`.
## Thanks / Contributions
@rogpeppe
@jehiah
@opennota
@pchristopher1275
@zenovich
@beeker1121
## License
......@@ -171,4 +183,6 @@ The [BSD 3-Clause license][bsd].
[bsd]: http://opensource.org/licenses/BSD-3-Clause
[wiki]: http://en.wikipedia.org/wiki/URL_normalization
[rfc]: http://tools.ietf.org/html/rfc3986#section-6
[godoc]: http://go.pkgdoc.org/github.com/puerkitobio/purell
[godoc]: http://go.pkgdoc.org/github.com/PuerkitoBio/purell
[pr5]: https://github.com/PuerkitoBio/purell/pull/5
[iss7]: https://github.com/PuerkitoBio/purell/issues/7
module github.com/PuerkitoBio/purell
go 1.19
require (
golang.org/x/net v0.0.0-20220617184016-355a448f1bc9
golang.org/x/text v0.3.7
)
golang.org/x/net v0.0.0-20220617184016-355a448f1bc9 h1:Yqz/iviulwKwAREEeUd3nbBFn0XuyJqkoft2IlrvOhc=
golang.org/x/net v0.0.0-20220617184016-355a448f1bc9/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
......@@ -12,6 +12,10 @@ import (
"sort"
"strconv"
"strings"
"golang.org/x/net/idna"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/width"
)
// A set of normalization flags determines how a URL will
......@@ -20,7 +24,7 @@ type NormalizationFlags uint
const (
// Safe normalizations
FlagLowercaseScheme NormalizationFlags = 1 << iota // HTTP://host -> http://host
FlagLowercaseScheme NormalizationFlags = 1 << iota // HTTP://host -> http://host, applied by default in Go1.1
FlagLowercaseHost // http://HOST -> http://host
FlagUppercaseEscapes // http://host/t%ef -> http://host/t%EF
FlagDecodeUnnecessaryEscapes // http://host/t%41 -> http://host/tA
......@@ -135,23 +139,36 @@ var flags = map[NormalizationFlags]func(*url.URL){
// MustNormalizeURLString returns the normalized string, and panics if an error occurs.
// It takes an URL string as input, as well as the normalization flags.
func MustNormalizeURLString(u string, f NormalizationFlags) string {
if parsed, e := url.Parse(u); e != nil {
result, e := NormalizeURLString(u, f)
if e != nil {
panic(e)
} else {
return NormalizeURL(parsed, f)
}
panic("Unreachable code.")
return result
}
// NormalizeURLString returns the normalized string, or an error if it can't be parsed into an URL object.
// It takes an URL string as input, as well as the normalization flags.
func NormalizeURLString(u string, f NormalizationFlags) (string, error) {
if parsed, e := url.Parse(u); e != nil {
return "", e
} else {
return NormalizeURL(parsed, f), nil
parsed, err := url.Parse(u)
if err != nil {
return "", err
}
if f&FlagLowercaseHost == FlagLowercaseHost {
parsed.Host = strings.ToLower(parsed.Host)
}
panic("Unreachable code.")
// The idna package doesn't fully conform to RFC 5895
// (https://tools.ietf.org/html/rfc5895), so we do it here.
// Taken from Go 1.8 cycle source, courtesy of bradfitz.
// TODO: Remove when (if?) idna package conforms to RFC 5895.
parsed.Host = width.Fold.String(parsed.Host)
parsed.Host = norm.NFC.String(parsed.Host)
if parsed.Host, err = idna.ToASCII(parsed.Host); err != nil {
return "", err
}
return NormalizeURL(parsed, f), nil
}
// NormalizeURL returns the normalized string.
......@@ -162,7 +179,7 @@ func NormalizeURL(u *url.URL, f NormalizationFlags) string {
flags[k](u)
}
}
return u.String()
return escapeURL(u)
}
func lowercaseScheme(u *url.URL) {
......@@ -190,18 +207,26 @@ func removeDefaultPort(u *url.URL) {
}
func removeTrailingSlash(u *url.URL) {
if l := len(u.Path); l > 0 && strings.HasSuffix(u.Path, "/") {
u.Path = u.Path[:l-1]
} else if l = len(u.Host); l > 0 && strings.HasSuffix(u.Host, "/") {
u.Host = u.Host[:l-1]
if l := len(u.Path); l > 0 {
if strings.HasSuffix(u.Path, "/") {
u.Path = u.Path[:l-1]
}
} else if l = len(u.Host); l > 0 {
if strings.HasSuffix(u.Host, "/") {
u.Host = u.Host[:l-1]
}
}
}
func addTrailingSlash(u *url.URL) {
if l := len(u.Path); l > 0 && !strings.HasSuffix(u.Path, "/") {
u.Path += "/"
} else if l = len(u.Host); l > 0 && !strings.HasSuffix(u.Host, "/") {
u.Host += "/"
if l := len(u.Path); l > 0 {
if !strings.HasSuffix(u.Path, "/") {
u.Path += "/"
}
} else if l = len(u.Host); l > 0 {
if !strings.HasSuffix(u.Host, "/") {
u.Host += "/"
}
}
}
......@@ -223,7 +248,7 @@ func removeDotSegments(u *url.URL) {
}
// Special case if host does not end with / and new path does not begin with /
u.Path = strings.Join(dotFree, "/")
if !strings.HasSuffix(u.Host, "/") && !strings.HasPrefix(u.Path, "/") {
if u.Host != "" && !strings.HasSuffix(u.Host, "/") && !strings.HasPrefix(u.Path, "/") {
u.Path = "/" + u.Path
}
// Special case if the last segment was a dot, make sure the path ends with a slash
......@@ -273,7 +298,7 @@ func sortQuery(u *url.URL) {
if len(q) > 0 {
arKeys := make([]string, len(q))
i := 0
for k, _ := range q {
for k := range q {
arKeys[i] = k
i++
}
......
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file implements query escaping as per RFC 3986.
// It contains some parts of the net/url package, modified so as to allow
// some reserved characters incorrectly escaped by net/url.
// See https://github.com/golang/go/issues/5684
package purell
import (
"bytes"
"net/url"
"strings"
)
type encoding int
const (
encodePath encoding = 1 + iota
encodeUserPassword
encodeQueryComponent
encodeFragment
)
// Return true if the specified character should be escaped when
// appearing in a URL string, according to RFC 3986.
func shouldEscape(c byte, mode encoding) bool {
// §2.3 Unreserved characters (alphanum)
if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
return false
}
switch c {
case '-', '.', '_', '~': // §2.3 Unreserved characters (mark)
return false
// §2.2 Reserved characters (reserved)
case ':', '/', '?', '#', '[', ']', '@', // gen-delims
'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': // sub-delims
// Different sections of the URL allow a few of
// the reserved characters to appear unescaped.
switch mode {
case encodePath: // §3.3
// The RFC allows sub-delims and : @.
// '/', '[' and ']' can be used to assign meaning to individual path
// segments. This package only manipulates the path as a whole,
// so we allow those as well. That leaves only ? and # to escape.
return c == '?' || c == '#'
case encodeUserPassword: // §3.2.1
// The RFC allows : and sub-delims in
// userinfo. The parsing of userinfo treats ':' as special so we must escape
// all the gen-delims.
return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' || c == ']' || c == '@'
case encodeQueryComponent: // §3.4
// The RFC allows / and ?.
return c != '/' && c != '?'
case encodeFragment: // §4.1
// The RFC text is silent but the grammar allows
// everything, so escape nothing but #
return c == '#'
}
}
// Everything else must be escaped.
return true
}
func escape(s string, mode encoding) string {
spaceCount, hexCount := 0, 0
for i := 0; i < len(s); i++ {
c := s[i]
if shouldEscape(c, mode) {
if c == ' ' && mode == encodeQueryComponent {
spaceCount++
} else {
hexCount++
}
}
}
if spaceCount == 0 && hexCount == 0 {
return s
}
t := make([]byte, len(s)+2*hexCount)
j := 0
for i := 0; i < len(s); i++ {
switch c := s[i]; {
case c == ' ' && mode == encodeQueryComponent:
t[j] = '+'
j++
case shouldEscape(c, mode):
t[j] = '%'
t[j+1] = "0123456789ABCDEF"[c>>4]
t[j+2] = "0123456789ABCDEF"[c&15]
j += 3
default:
t[j] = s[i]
j++
}
}
return string(t)
}
var uiReplacer = strings.NewReplacer(
"%21", "!",
"%27", "'",
"%28", "(",
"%29", ")",
"%2A", "*",
)
// unescapeUserinfo unescapes some characters that need not to be escaped as per RFC3986.
func unescapeUserinfo(s string) string {
return uiReplacer.Replace(s)
}
// Escape reassembles the URL into a valid URL string.
// The general form of the result is one of:
//
// scheme:opaque
// scheme://userinfo@host/path?query#fragment
//
// If u.Opaque is non-empty, String uses the first form;
// otherwise it uses the second form.
//
// In the second form, the following rules apply:
// - if u.Scheme is empty, scheme: is omitted.
// - if u.User is nil, userinfo@ is omitted.
// - if u.Host is empty, host/ is omitted.
// - if u.Scheme and u.Host are empty and u.User is nil,
// the entire scheme://userinfo@host/ is omitted.
// - if u.Host is non-empty and u.Path begins with a /,
// the form host/path does not add its own /.
// - if u.RawQuery is empty, ?query is omitted.
// - if u.Fragment is empty, #fragment is omitted.
func escapeURL(u *url.URL) string {
var buf bytes.Buffer
if u.Scheme != "" {
buf.WriteString(u.Scheme)
buf.WriteByte(':')
}
if u.Opaque != "" {
buf.WriteString(u.Opaque)
} else {
if u.Scheme != "" || u.Host != "" || u.User != nil {
buf.WriteString("//")
if ui := u.User; ui != nil {
buf.WriteString(unescapeUserinfo(ui.String()))
buf.WriteByte('@')
}
if h := u.Host; h != "" {
buf.WriteString(h)
}
}
if u.Path != "" && u.Path[0] != '/' && u.Host != "" {
buf.WriteByte('/')
}
buf.WriteString(escape(u.Path, encodePath))
}
if u.RawQuery != "" {
buf.WriteByte('?')
buf.WriteString(u.RawQuery)
}
if u.Fragment != "" {
buf.WriteByte('#')
buf.WriteString(escape(u.Fragment, encodeFragment))
}
return buf.String()
}
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.18
// +build go1.18
package idna
// Transitional processing is disabled by default in Go 1.18.
// https://golang.org/issue/47510
const transitionalLookup = false
This diff is collapsed.
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.10
// +build !go1.10
// Package idna implements IDNA2008 using the compatibility processing
// defined by UTS (Unicode Technical Standard) #46, which defines a standard to
// deal with the transition from IDNA2003.
//
// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
// UTS #46 is defined in https://www.unicode.org/reports/tr46.
// See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
// differences between these two standards.
package idna // import "golang.org/x/net/idna"
import (
"fmt"
"strings"
"unicode/utf8"
"golang.org/x/text/secure/bidirule"
"golang.org/x/text/unicode/norm"
)
// NOTE: Unlike common practice in Go APIs, the functions will return a
// sanitized domain name in case of errors. Browsers sometimes use a partially
// evaluated string as lookup.
// TODO: the current error handling is, in my opinion, the least opinionated.
// Other strategies are also viable, though:
// Option 1) Return an empty string in case of error, but allow the user to
// specify explicitly which errors to ignore.
// Option 2) Return the partially evaluated string if it is itself a valid
// string, otherwise return the empty string in case of error.
// Option 3) Option 1 and 2.
// Option 4) Always return an empty string for now and implement Option 1 as
// needed, and document that the return string may not be empty in case of
// error in the future.
// I think Option 1 is best, but it is quite opinionated.
// ToASCII is a wrapper for Punycode.ToASCII.
func ToASCII(s string) (string, error) {
return Punycode.process(s, true)
}
// ToUnicode is a wrapper for Punycode.ToUnicode.
func ToUnicode(s string) (string, error) {
return Punycode.process(s, false)
}
// An Option configures a Profile at creation time.
type Option func(*options)
// Transitional sets a Profile to use the Transitional mapping as defined in UTS
// #46. This will cause, for example, "ß" to be mapped to "ss". Using the
// transitional mapping provides a compromise between IDNA2003 and IDNA2008
// compatibility. It is used by some browsers when resolving domain names. This
// option is only meaningful if combined with MapForLookup.
func Transitional(transitional bool) Option {
return func(o *options) { o.transitional = transitional }
}
// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
// are longer than allowed by the RFC.
//
// This option corresponds to the VerifyDnsLength flag in UTS #46.
func VerifyDNSLength(verify bool) Option {
return func(o *options) { o.verifyDNSLength = verify }
}
// RemoveLeadingDots removes leading label separators. Leading runes that map to
// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
func RemoveLeadingDots(remove bool) Option {
return func(o *options) { o.removeLeadingDots = remove }
}
// ValidateLabels sets whether to check the mandatory label validation criteria
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
// of hyphens ('-'), normalization, validity of runes, and the context rules.
// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
// in UTS #46.
func ValidateLabels(enable bool) Option {
return func(o *options) {
// Don't override existing mappings, but set one that at least checks
// normalization if it is not set.
if o.mapping == nil && enable {
o.mapping = normalize
}
o.trie = trie
o.checkJoiners = enable
o.checkHyphens = enable
if enable {
o.fromPuny = validateFromPunycode
} else {
o.fromPuny = nil
}
}
}
// CheckHyphens sets whether to check for correct use of hyphens ('-') in
// labels. Most web browsers do not have this option set, since labels such as
// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
//
// This option corresponds to the CheckHyphens flag in UTS #46.
func CheckHyphens(enable bool) Option {
return func(o *options) { o.checkHyphens = enable }
}
// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
// A of RFC 5892, concerning the use of joiner runes.
//
// This option corresponds to the CheckJoiners flag in UTS #46.
func CheckJoiners(enable bool) Option {
return func(o *options) {
o.trie = trie
o.checkJoiners = enable
}
}
// StrictDomainName limits the set of permissable ASCII characters to those
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
// hyphen). This is set by default for MapForLookup and ValidateForRegistration,
// but is only useful if ValidateLabels is set.
//
// This option is useful, for instance, for browsers that allow characters
// outside this range, for example a '_' (U+005F LOW LINE). See
// http://www.rfc-editor.org/std/std3.txt for more details.
//
// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
func StrictDomainName(use bool) Option {
return func(o *options) { o.useSTD3Rules = use }
}
// NOTE: the following options pull in tables. The tables should not be linked
// in as long as the options are not used.
// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
// that relies on proper validation of labels should include this rule.
//
// This option corresponds to the CheckBidi flag in UTS #46.
func BidiRule() Option {
return func(o *options) { o.bidirule = bidirule.ValidString }
}
// ValidateForRegistration sets validation options to verify that a given IDN is
// properly formatted for registration as defined by Section 4 of RFC 5891.
func ValidateForRegistration() Option {
return func(o *options) {
o.mapping = validateRegistration
StrictDomainName(true)(o)
ValidateLabels(true)(o)
VerifyDNSLength(true)(o)
BidiRule()(o)
}
}
// MapForLookup sets validation and mapping options such that a given IDN is
// transformed for domain name lookup according to the requirements set out in
// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
// to add this check.
//
// The mappings include normalization and mapping case, width and other
// compatibility mappings.
func MapForLookup() Option {
return func(o *options) {
o.mapping = validateAndMap
StrictDomainName(true)(o)
ValidateLabels(true)(o)
RemoveLeadingDots(true)(o)
}
}
type options struct {
transitional bool
useSTD3Rules bool
checkHyphens bool
checkJoiners bool
verifyDNSLength bool
removeLeadingDots bool
trie *idnaTrie
// fromPuny calls validation rules when converting A-labels to U-labels.
fromPuny func(p *Profile, s string) error
// mapping implements a validation and mapping step as defined in RFC 5895
// or UTS 46, tailored to, for example, domain registration or lookup.
mapping func(p *Profile, s string) (string, error)
// bidirule, if specified, checks whether s conforms to the Bidi Rule
// defined in RFC 5893.
bidirule func(s string) bool
}
// A Profile defines the configuration of a IDNA mapper.
type Profile struct {
options
}
func apply(o *options, opts []Option) {
for _, f := range opts {
f(o)
}
}
// New creates a new Profile.
//
// With no options, the returned Profile is the most permissive and equals the
// Punycode Profile. Options can be passed to further restrict the Profile. The
// MapForLookup and ValidateForRegistration options set a collection of options,
// for lookup and registration purposes respectively, which can be tailored by
// adding more fine-grained options, where later options override earlier
// options.
func New(o ...Option) *Profile {
p := &Profile{}
apply(&p.options, o)
return p
}
// ToASCII converts a domain or domain label to its ASCII form. For example,
// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
// ToASCII("golang") is "golang". If an error is encountered it will return
// an error and a (partially) processed result.
func (p *Profile) ToASCII(s string) (string, error) {
return p.process(s, true)
}
// ToUnicode converts a domain or domain label to its Unicode form. For example,
// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
// ToUnicode("golang") is "golang". If an error is encountered it will return
// an error and a (partially) processed result.
func (p *Profile) ToUnicode(s string) (string, error) {
pp := *p
pp.transitional = false
return pp.process(s, false)
}
// String reports a string with a description of the profile for debugging
// purposes. The string format may change with different versions.
func (p *Profile) String() string {
s := ""
if p.transitional {
s = "Transitional"
} else {
s = "NonTransitional"
}
if p.useSTD3Rules {
s += ":UseSTD3Rules"
}
if p.checkHyphens {
s += ":CheckHyphens"
}
if p.checkJoiners {
s += ":CheckJoiners"
}
if p.verifyDNSLength {
s += ":VerifyDNSLength"
}
return s
}
var (
// Punycode is a Profile that does raw punycode processing with a minimum
// of validation.
Punycode *Profile = punycode
// Lookup is the recommended profile for looking up domain names, according
// to Section 5 of RFC 5891. The exact configuration of this profile may
// change over time.
Lookup *Profile = lookup
// Display is the recommended profile for displaying domain names.
// The configuration of this profile may change over time.
Display *Profile = display
// Registration is the recommended profile for checking whether a given
// IDN is valid for registration, according to Section 4 of RFC 5891.
Registration *Profile = registration
punycode = &Profile{}
lookup = &Profile{options{
transitional: true,
removeLeadingDots: true,
useSTD3Rules: true,
checkHyphens: true,
checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
bidirule: bidirule.ValidString,
}}
display = &Profile{options{
useSTD3Rules: true,
removeLeadingDots: true,
checkHyphens: true,
checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
bidirule: bidirule.ValidString,
}}
registration = &Profile{options{
useSTD3Rules: true,
verifyDNSLength: true,
checkHyphens: true,
checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateRegistration,
bidirule: bidirule.ValidString,
}}
// TODO: profiles
// Register: recommended for approving domain names: don't do any mappings
// but rather reject on invalid input. Bundle or block deviation characters.
)
type labelError struct{ label, code_ string }
func (e labelError) code() string { return e.code_ }
func (e labelError) Error() string {
return fmt.Sprintf("idna: invalid label %q", e.label)
}
type runeError rune
func (e runeError) code() string { return "P1" }
func (e runeError) Error() string {
return fmt.Sprintf("idna: disallowed rune %U", e)
}
// process implements the algorithm described in section 4 of UTS #46,
// see https://www.unicode.org/reports/tr46.
func (p *Profile) process(s string, toASCII bool) (string, error) {
var err error
if p.mapping != nil {
s, err = p.mapping(p, s)
}
// Remove leading empty labels.
if p.removeLeadingDots {
for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
}
}
// It seems like we should only create this error on ToASCII, but the
// UTS 46 conformance tests suggests we should always check this.
if err == nil && p.verifyDNSLength && s == "" {
err = &labelError{s, "A4"}
}
labels := labelIter{orig: s}
for ; !labels.done(); labels.next() {
label := labels.label()
if label == "" {
// Empty labels are not okay. The label iterator skips the last
// label if it is empty.
if err == nil && p.verifyDNSLength {
err = &labelError{s, "A4"}
}
continue
}
if strings.HasPrefix(label, acePrefix) {
u, err2 := decode(label[len(acePrefix):])
if err2 != nil {
if err == nil {
err = err2
}
// Spec says keep the old label.
continue
}
labels.set(u)
if err == nil && p.fromPuny != nil {
err = p.fromPuny(p, u)
}
if err == nil {
// This should be called on NonTransitional, according to the
// spec, but that currently does not have any effect. Use the
// original profile to preserve options.
err = p.validateLabel(u)
}
} else if err == nil {
err = p.validateLabel(label)
}
}
if toASCII {
for labels.reset(); !labels.done(); labels.next() {
label := labels.label()
if !ascii(label) {
a, err2 := encode(acePrefix, label)
if err == nil {
err = err2
}
label = a
labels.set(a)
}
n := len(label)
if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
err = &labelError{label, "A4"}
}
}
}
s = labels.result()
if toASCII && p.verifyDNSLength && err == nil {
// Compute the length of the domain name minus the root label and its dot.
n := len(s)
if n > 0 && s[n-1] == '.' {
n--
}
if len(s) < 1 || n > 253 {
err = &labelError{s, "A4"}
}
}
return s, err
}
func normalize(p *Profile, s string) (string, error) {
return norm.NFC.String(s), nil
}
func validateRegistration(p *Profile, s string) (string, error) {
if !norm.NFC.IsNormalString(s) {
return s, &labelError{s, "V1"}
}
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
// Copy bytes not copied so far.
switch p.simplify(info(v).category()) {
// TODO: handle the NV8 defined in the Unicode idna data set to allow
// for strict conformance to IDNA2008.
case valid, deviation:
case disallowed, mapped, unknown, ignored:
r, _ := utf8.DecodeRuneInString(s[i:])
return s, runeError(r)
}
i += sz
}
return s, nil
}
func validateAndMap(p *Profile, s string) (string, error) {
var (
err error
b []byte
k int
)
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
start := i
i += sz
// Copy bytes not copied so far.
switch p.simplify(info(v).category()) {
case valid:
continue
case disallowed:
if err == nil {
r, _ := utf8.DecodeRuneInString(s[start:])
err = runeError(r)
}
continue
case mapped, deviation:
b = append(b, s[k:start]...)
b = info(v).appendMapping(b, s[start:i])
case ignored:
b = append(b, s[k:start]...)
// drop the rune
case unknown:
b = append(b, s[k:start]...)
b = append(b, "\ufffd"...)
}
k = i
}
if k == 0 {
// No changes so far.
s = norm.NFC.String(s)
} else {
b = append(b, s[k:]...)
if norm.NFC.QuickSpan(b) != len(b) {
b = norm.NFC.Bytes(b)
}
// TODO: the punycode converters require strings as input.
s = string(b)
}
return s, err
}
// A labelIter allows iterating over domain name labels.
type labelIter struct {
orig string
slice []string
curStart int
curEnd int
i int
}
func (l *labelIter) reset() {
l.curStart = 0
l.curEnd = 0
l.i = 0
}
func (l *labelIter) done() bool {
return l.curStart >= len(l.orig)
}
func (l *labelIter) result() string {
if l.slice != nil {
return strings.Join(l.slice, ".")
}
return l.orig
}
func (l *labelIter) label() string {
if l.slice != nil {
return l.slice[l.i]
}
p := strings.IndexByte(l.orig[l.curStart:], '.')
l.curEnd = l.curStart + p
if p == -1 {
l.curEnd = len(l.orig)
}
return l.orig[l.curStart:l.curEnd]
}
// next sets the value to the next label. It skips the last label if it is empty.
func (l *labelIter) next() {
l.i++
if l.slice != nil {
if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
l.curStart = len(l.orig)
}
} else {
l.curStart = l.curEnd + 1
if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
l.curStart = len(l.orig)
}
}
}
func (l *labelIter) set(s string) {
if l.slice == nil {
l.slice = strings.Split(l.orig, ".")
}
l.slice[l.i] = s
}
// acePrefix is the ASCII Compatible Encoding prefix.
const acePrefix = "xn--"
func (p *Profile) simplify(cat category) category {
switch cat {
case disallowedSTD3Mapped:
if p.useSTD3Rules {
cat = disallowed
} else {
cat = mapped
}
case disallowedSTD3Valid:
if p.useSTD3Rules {
cat = disallowed
} else {
cat = valid
}
case deviation:
if !p.transitional {
cat = valid
}
case validNV8, validXV8:
// TODO: handle V2008
cat = valid
}
return cat
}
func validateFromPunycode(p *Profile, s string) error {
if !norm.NFC.IsNormalString(s) {
return &labelError{s, "V1"}
}
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
if c := p.simplify(info(v).category()); c != valid && c != deviation {
return &labelError{s, "V6"}
}
i += sz
}
return nil
}
const (
zwnj = "\u200c"
zwj = "\u200d"
)
type joinState int8
const (
stateStart joinState = iota
stateVirama
stateBefore
stateBeforeVirama
stateAfter
stateFAIL
)
var joinStates = [][numJoinTypes]joinState{
stateStart: {
joiningL: stateBefore,
joiningD: stateBefore,
joinZWNJ: stateFAIL,
joinZWJ: stateFAIL,
joinVirama: stateVirama,
},
stateVirama: {
joiningL: stateBefore,
joiningD: stateBefore,
},
stateBefore: {
joiningL: stateBefore,
joiningD: stateBefore,
joiningT: stateBefore,
joinZWNJ: stateAfter,
joinZWJ: stateFAIL,
joinVirama: stateBeforeVirama,
},
stateBeforeVirama: {
joiningL: stateBefore,
joiningD: stateBefore,
joiningT: stateBefore,
},
stateAfter: {
joiningL: stateFAIL,
joiningD: stateBefore,
joiningT: stateAfter,
joiningR: stateStart,
joinZWNJ: stateFAIL,
joinZWJ: stateFAIL,
joinVirama: stateAfter, // no-op as we can't accept joiners here
},
stateFAIL: {
0: stateFAIL,
joiningL: stateFAIL,
joiningD: stateFAIL,
joiningT: stateFAIL,
joiningR: stateFAIL,
joinZWNJ: stateFAIL,
joinZWJ: stateFAIL,
joinVirama: stateFAIL,
},
}
// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
// already implicitly satisfied by the overall implementation.
func (p *Profile) validateLabel(s string) error {
if s == "" {
if p.verifyDNSLength {
return &labelError{s, "A4"}
}
return nil
}
if p.bidirule != nil && !p.bidirule(s) {
return &labelError{s, "B"}
}
if p.checkHyphens {
if len(s) > 4 && s[2] == '-' && s[3] == '-' {
return &labelError{s, "V2"}
}
if s[0] == '-' || s[len(s)-1] == '-' {
return &labelError{s, "V3"}
}
}
if !p.checkJoiners {
return nil
}
trie := p.trie // p.checkJoiners is only set if trie is set.
// TODO: merge the use of this in the trie.
v, sz := trie.lookupString(s)
x := info(v)
if x.isModifier() {
return &labelError{s, "V5"}
}
// Quickly return in the absence of zero-width (non) joiners.
if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
return nil
}
st := stateStart
for i := 0; ; {
jt := x.joinType()
if s[i:i+sz] == zwj {
jt = joinZWJ
} else if s[i:i+sz] == zwnj {
jt = joinZWNJ
}
st = joinStates[st][jt]
if x.isViramaModifier() {
st = joinStates[st][joinVirama]
}
if i += sz; i == len(s) {
break
}
v, sz = trie.lookupString(s[i:])
x = info(v)
}
if st == stateFAIL || st == stateAfter {
return &labelError{s, "C"}
}
return nil
}
func ascii(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] >= utf8.RuneSelf {
return false
}
}
return true
}
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.18
// +build !go1.18
package idna
const transitionalLookup = true
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
// This file implements the Punycode algorithm from RFC 3492.
import (
"math"
"strings"
"unicode/utf8"
)
// These parameter values are specified in section 5.
//
// All computation is done with int32s, so that overflow behavior is identical
// regardless of whether int is 32-bit or 64-bit.
const (
base int32 = 36
damp int32 = 700
initialBias int32 = 72
initialN int32 = 128
skew int32 = 38
tmax int32 = 26
tmin int32 = 1
)
func punyError(s string) error { return &labelError{s, "A3"} }
// decode decodes a string as specified in section 6.2.
func decode(encoded string) (string, error) {
if encoded == "" {
return "", nil
}
pos := 1 + strings.LastIndex(encoded, "-")
if pos == 1 {
return "", punyError(encoded)
}
if pos == len(encoded) {
return encoded[:len(encoded)-1], nil
}
output := make([]rune, 0, len(encoded))
if pos != 0 {
for _, r := range encoded[:pos-1] {
output = append(output, r)
}
}
i, n, bias := int32(0), initialN, initialBias
overflow := false
for pos < len(encoded) {
oldI, w := i, int32(1)
for k := base; ; k += base {
if pos == len(encoded) {
return "", punyError(encoded)
}
digit, ok := decodeDigit(encoded[pos])
if !ok {
return "", punyError(encoded)
}
pos++
i, overflow = madd(i, digit, w)
if overflow {
return "", punyError(encoded)
}
t := k - bias
if k <= bias {
t = tmin
} else if k >= bias+tmax {
t = tmax
}
if digit < t {
break
}
w, overflow = madd(0, w, base-t)
if overflow {
return "", punyError(encoded)
}
}
if len(output) >= 1024 {
return "", punyError(encoded)
}
x := int32(len(output) + 1)
bias = adapt(i-oldI, x, oldI == 0)
n += i / x
i %= x
if n < 0 || n > utf8.MaxRune {
return "", punyError(encoded)
}
output = append(output, 0)
copy(output[i+1:], output[i:])
output[i] = n
i++
}
return string(output), nil
}
// encode encodes a string as specified in section 6.3 and prepends prefix to
// the result.
//
// The "while h < length(input)" line in the specification becomes "for
// remaining != 0" in the Go code, because len(s) in Go is in bytes, not runes.
func encode(prefix, s string) (string, error) {
output := make([]byte, len(prefix), len(prefix)+1+2*len(s))
copy(output, prefix)
delta, n, bias := int32(0), initialN, initialBias
b, remaining := int32(0), int32(0)
for _, r := range s {
if r < 0x80 {
b++
output = append(output, byte(r))
} else {
remaining++
}
}
h := b
if b > 0 {
output = append(output, '-')
}
overflow := false
for remaining != 0 {
m := int32(0x7fffffff)
for _, r := range s {
if m > r && r >= n {
m = r
}
}
delta, overflow = madd(delta, m-n, h+1)
if overflow {
return "", punyError(s)
}
n = m
for _, r := range s {
if r < n {
delta++
if delta < 0 {
return "", punyError(s)
}
continue
}
if r > n {
continue
}
q := delta
for k := base; ; k += base {
t := k - bias
if k <= bias {
t = tmin
} else if k >= bias+tmax {
t = tmax
}
if q < t {
break
}
output = append(output, encodeDigit(t+(q-t)%(base-t)))
q = (q - t) / (base - t)
}
output = append(output, encodeDigit(q))
bias = adapt(delta, h+1, h == b)
delta = 0
h++
remaining--
}
delta++
n++
}
return string(output), nil
}
// madd computes a + (b * c), detecting overflow.
func madd(a, b, c int32) (next int32, overflow bool) {
p := int64(b) * int64(c)
if p > math.MaxInt32-int64(a) {
return 0, true
}
return a + int32(p), false
}
func decodeDigit(x byte) (digit int32, ok bool) {
switch {
case '0' <= x && x <= '9':
return int32(x - ('0' - 26)), true
case 'A' <= x && x <= 'Z':
return int32(x - 'A'), true
case 'a' <= x && x <= 'z':
return int32(x - 'a'), true
}
return 0, false
}
func encodeDigit(digit int32) byte {
switch {
case 0 <= digit && digit < 26:
return byte(digit + 'a')
case 26 <= digit && digit < 36:
return byte(digit + ('0' - 26))
}
panic("idna: internal error in punycode encoding")
}
// adapt is the bias adaptation function specified in section 6.1.
func adapt(delta, numPoints int32, firstTime bool) int32 {
if firstTime {
delta /= damp
} else {
delta /= 2
}
delta += delta / numPoints
k := int32(0)
for delta > ((base-tmin)*tmax)/2 {
delta /= base - tmin
k += base
}
return k + (base-tmin+1)*delta/(delta+skew)
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment