// Copyright (c) 2015, Daniel Martà <mvdan@mvdan.cc>
// See LICENSE for licensing information
package xurls
import (
"fmt"
"regexp"
"testing"
)
type testCase struct {
in string
want interface{}
}
func wantStr(in string, want interface{}) string {
switch x := want.(type) {
case string:
return x
case bool:
if x {
return in
}
}
return ""
}
func doTest(t *testing.T, name string, re *regexp.Regexp, cases []testCase) {
for i, c := range cases {
t.Run(fmt.Sprintf("%s/%03d", name, i), func(t *testing.T) {
got := re.FindString(c.in)
want := wantStr(c.in, c.want)
if got != want {
t.Errorf(`%s.FindString("%s") got "%s", want "%s"`, name, c.in, got, want)
}
})
}
}
var constantTestCases = []testCase{
{``, nil},
{` `, nil},
{`:`, nil},
{`::`, nil},
{`:::`, nil},
{`::::`, nil},
{`.`, nil},
{`..`, nil},
{`...`, nil},
{`1.1`, nil},
{`.1.`, nil},
{`1.1.1`, nil},
{`1:1`, nil},
{`:1:`, nil},
{`1:1:1`, nil},
{`://`, nil},
{`foo`, nil},
{`foo:`, nil},
{`mailto:`, nil},
{`foo://`, nil},
{`http://`, nil},
{`http:// foo`, nil},
{`http://Â foo`, nil},
{`:foo`, nil},
{`://foo`, nil},
{`foorandom:bar`, nil},
{`foo.randombar`, nil},
{`zzz.`, nil},
{`.zzz`, nil},
{`zzz.zzz`, nil},
{`/some/path`, nil},
{`rel/path`, nil},
{`localhost`, nil},
{`com`, nil},
{`.com`, nil},
{`com.`, nil},
{`http`, nil},
{`http://foo`, true},
{`http://FOO`, true},
{`http://FAÀ`, true},
{`https://localhost`, true},
{`git+https://localhost`, true},
{`foo.bar://localhost`, true},
{`foo-bar://localhost`, true},
{`mailto:foo`, true},
{`MAILTO:foo`, true},
{`sms:123`, true},
{`xmpp:foo@bar`, true},
{`bitcoin:Addr23?amount=1&message=foo`, true},
{`http://foo.com`, true},
{`http://foo.co.uk`, true},
{`http://foo.random`, true},
{` http://foo.com/bar `, `http://foo.com/bar`},
{` http://foo.com/bar more`, `http://foo.com/bar`},
{`<http://foo.com/bar>`, `http://foo.com/bar`},
{`<http://foo.com/bar>more`, `http://foo.com/bar`},
{`.http://foo.com/bar.`, `http://foo.com/bar`},
{`.http://foo.com/bar.more`, `http://foo.com/bar.more`},
{`,http://foo.com/bar,`, `http://foo.com/bar`},
{`,http://foo.com/bar,more`, `http://foo.com/bar,more`},
{`(http://foo.com/bar)`, `http://foo.com/bar`},
{`(http://foo.com/bar)more`, `http://foo.com/bar`},
{`[http://foo.com/bar]`, `http://foo.com/bar`},
{`[http://foo.com/bar]more`, `http://foo.com/bar`},
{`'http://foo.com/bar'`, `http://foo.com/bar`},
{`'http://foo.com/bar'more`, `http://foo.com/bar'more`},
{`"http://foo.com/bar"`, `http://foo.com/bar`},
{`http://a.b/a0/-+_&~*%=#@.,:;'?!|[]()a`, true},
{`http://a.b/a0/$€¥`, true},
{`http://✪foo.bar/pa✪th©more`, true},
{`http://foo.bar/path/`, true},
{`http://foo.bar/path-`, true},
{`http://foo.bar/path+`, true},
{`http://foo.bar/path_`, true},
{`http://foo.bar/path&`, true},
{`http://foo.bar/path~`, true},
{`http://foo.bar/path*`, true},
{`http://foo.bar/path%`, true},
{`http://foo.bar/path=`, true},
{`http://foo.bar/path#`, true},
{`http://foo.bar/path.`, `http://foo.bar/path`},
{`http://foo.bar/path,`, `http://foo.bar/path`},
{`http://foo.bar/path:`, `http://foo.bar/path`},
{`http://foo.bar/path;`, `http://foo.bar/path`},
{`http://foo.bar/path'`, `http://foo.bar/path`},
{`http://foo.bar/path?`, `http://foo.bar/path`},
{`http://foo.bar/path!`, `http://foo.bar/path`},
{`http://foo.bar/path@`, `http://foo.bar/path`},
{`http://foo.bar/path|`, `http://foo.bar/path`},
{`http://foo.bar/path<`, `http://foo.bar/path`},
{`http://foo.bar/path<more`, `http://foo.bar/path`},
{`http://foo.com/path_(more)`, true},
{`(http://foo.com/path_(more))`, `http://foo.com/path_(more)`},
{`http://foo.com/path_(even)-(more)`, true},
{`http://foo.com/path_(even)(more)`, true},
{`http://foo.com/path_(even_(nested))`, true},
{`(http://foo.com/path_(even_(nested)))`, `http://foo.com/path_(even_(nested))`},
{`http://foo.com/path_[more]`, true},
{`[http://foo.com/path_[more]]`, `http://foo.com/path_[more]`},
{`http://foo.com/path_[even]-[more]`, true},
{`http://foo.com/path_[even][more]`, true},
{`http://foo.com/path_[even_[nested]]`, true},
{`[http://foo.com/path_[even_[nested]]]`, `http://foo.com/path_[even_[nested]]`},
{`http://foo.com/path_{more}`, true},
{`{http://foo.com/path_{more}}`, `http://foo.com/path_{more}`},
{`http://foo.com/path_{even}-{more}`, true},
{`http://foo.com/path_{even}{more}`, true},
{`http://foo.com/path_{even_{nested}}`, true},
{`{http://foo.com/path_{even_{nested}}}`, `http://foo.com/path_{even_{nested}}`},
{`http://foo.com/path#fragment`, true},
{`http://foo.com/emptyfrag#`, true},
{`http://foo.com/spaced%20path`, true},
{`http://foo.com/?p=spaced%20param`, true},
{`http://test.foo.com/`, true},
{`http://foo.com/path`, true},
{`http://foo.com:8080/path`, true},
{`http://1.1.1.1/path`, true},
{`http://1080::8:800:200c:417a/path`, true},
{`http://ä¸å›½.ä¸å›½/ä¸å›½`, true},
{`http://ä¸å›½.ä¸å›½/fooä¸å›½`, true},
{`http://उदाहरण.परीकषा`, true},
{`http://xn-foo.xn--p1acf/path`, true},
{`what is http://foo.com?`, `http://foo.com`},
{`go visit http://foo.com/path.`, `http://foo.com/path`},
{`go visit http://foo.com/path...`, `http://foo.com/path`},
{`what is http://foo.com/path?`, `http://foo.com/path`},
{`the http://foo.com!`, `http://foo.com`},
{`https://test.foo.bar/path?a=b`, `https://test.foo.bar/path?a=b`},
{`ftp://user@foo.bar`, true},
{`http://foo.com/@"style="color:red"onmouseover=func()`, `http://foo.com/`},
{`http://foo.com/base64-bCBwbGVhcw==`, true},
{`http://foo.com/�`, true},
}
func TestRegexes(t *testing.T) {
doTest(t, "Relaxed", Relaxed, constantTestCases)
doTest(t, "Strict", Strict, constantTestCases)
doTest(t, "Relaxed2", Relaxed, []testCase{
{`foo.a`, nil},
{`foo.com`, true},
{`foo.com bar.com`, `foo.com`},
{`foo.com-foo`, `foo.com`},
{`foo.company`, true},
{`foo.comrandom`, nil},
{`foo.example`, true},
{`foo.i2p`, true},
{`foo.local`, true},
{`foo.onion`, true},
{`ä¸å›½.ä¸å›½`, true},
{`ä¸å›½.ä¸å›½/fooä¸å›½`, true},
{`foo.com/`, true},
{`1.1.1.1`, true},
{`10.50.23.250`, true},
{`121.1.1.1`, true},
{`255.1.1.1`, true},
{`300.1.1.1`, nil},
{`1.1.1.300`, nil},
{`foo@1.2.3.4`, `1.2.3.4`},
{`1080:0:0:0:8:800:200C:4171`, true},
{`3ffe:2a00:100:7031::1`, true},
{`1080::8:800:200c:417a`, true},
{`foo.com:8080`, true},
{`foo.com:8080/path`, true},
{`test.foo.com`, true},
{`test.foo.com/path`, true},
{`test.foo.com/path/more/`, true},
{`TEST.FOO.COM/PATH`, true},
{`TEST.FÓO.COM/P�TH`, true},
{`foo.com/path_(more)`, true},
{`foo.com/path_(even)_(more)`, true},
{`foo.com/path_(more)/more`, true},
{`foo.com/path_(more)/end)`, `foo.com/path_(more)/end`},
{`www.foo.com`, true},
{` foo.com/bar `, `foo.com/bar`},
{` foo.com/bar more`, `foo.com/bar`},
{`<foo.com/bar>`, `foo.com/bar`},
{`<foo.com/bar>more`, `foo.com/bar`},
{`,foo.com/bar.`, `foo.com/bar`},
{`,foo.com/bar.more`, `foo.com/bar.more`},
{`,foo.com/bar,`, `foo.com/bar`},
{`,foo.com/bar,more`, `foo.com/bar,more`},
{`(foo.com/bar)`, `foo.com/bar`},
{`"foo.com/bar'`, `foo.com/bar`},
{`"foo.com/bar'more`, `foo.com/bar'more`},
{`"foo.com/bar"`, `foo.com/bar`},
{`what is foo.com?`, `foo.com`},
{`the foo.com!`, `foo.com`},
{`foo@bar`, nil},
{`foo@bar.a`, nil},
{`foo@bar.com`, "bar.com"},
{`foo@sub.bar.com`, "sub.bar.com"},
{`foo@ä¸å›½.ä¸å›½`, "ä¸å›½.ä¸å›½"},
})
doTest(t, "Strict2", Strict, []testCase{
{`http:// foo.com`, nil},
{`foo.a`, nil},
{`foo.com`, nil},
{`foo.com/`, nil},
{`1.1.1.1`, nil},
{`3ffe:2a00:100:7031::1`, nil},
{`test.foo.com:8080/path`, nil},
{`foo@bar.com`, nil},
})
}
func TestStrictMatchingSchemeError(t *testing.T) {
for _, c := range []struct {
exp string
wantErr bool
}{
{`http://`, false},
{`https?://`, false},
{`http://|mailto:`, false},
{`http://(`, true},
} {
_, err := StrictMatchingScheme(c.exp)
if c.wantErr && err == nil {
t.Errorf(`StrictMatchingScheme("%s") did not error as expected`, c.exp)
} else if !c.wantErr && err != nil {
t.Errorf(`StrictMatchingScheme("%s") unexpectedly errored`, c.exp)
}
}
}
func TestStrictMatchingScheme(t *testing.T) {
strictMatching, _ := StrictMatchingScheme("http://|ftps?://|mailto:")
doTest(t, "StrictMatchingScheme", strictMatching, []testCase{
{`foo.com`, nil},
{`foo@bar.com`, nil},
{`http://foo`, true},
{`Http://foo`, true},
{`https://foo`, nil},
{`ftp://foo`, true},
{`ftps://foo`, true},
{`mailto:foo`, true},
{`MAILTO:foo`, true},
{`sms:123`, nil},
})
}
func bench(b *testing.B, re *regexp.Regexp, str string) {
for i := 0; i < b.N; i++ {
re.FindAllString(str, -1)
}
}
func BenchmarkStrictEmpty(b *testing.B) {
bench(b, Strict, "foo")
}
func BenchmarkStrictSingle(b *testing.B) {
bench(b, Strict, "http://foo.foo foo.com")
}
func BenchmarkStrictMany(b *testing.B) {
bench(b, Strict, ` foo bar http://foo.foo
foo.com bitcoin:address ftp://
xmpp:foo@bar.com`)
}
func BenchmarkRelaxedEmpty(b *testing.B) {
bench(b, Relaxed, "foo")
}
func BenchmarkRelaxedSingle(b *testing.B) {
bench(b, Relaxed, "http://foo.foo foo.com")
}
func BenchmarkRelaxedMany(b *testing.B) {
bench(b, Relaxed, ` foo bar http://foo.foo
foo.com bitcoin:address ftp://
xmpp:foo@bar.com`)
}