From 2ad44158af373f68c1aef528738a5baade77d316 Mon Sep 17 00:00:00 2001
From: Cherry Zhang
Date: Thu, 22 Oct 2020 10:05:44 -0400
Subject: [PATCH 001/253] cmd/internal/obj: use correct symbol size for
Hashed64 classification
Use sym.Size, instead of len(sym.P), to decide whether a
content-addressable symbol is "short" and hashed as Hashed64.
So we don't dedup a small symbol with a gigantic almost-zero
symbol.
Fixes #42140.
Change-Id: Ic65869e1eaf51947517b3ece49c8b0be1b94bb75
Reviewed-on: https://go-review.googlesource.com/c/go/+/264337
Trust: Cherry Zhang
Trust: Cuong Manh Le
Run-TryBot: Cherry Zhang
TryBot-Result: Go Bot
Reviewed-by: Cuong Manh Le
Reviewed-by: Than McIntosh
---
src/cmd/internal/obj/sym.go | 2 +-
src/cmd/link/link_test.go | 53 +++++++++++++++++++++++++++++++++++++
2 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go
index 0182773f8e..4515bdd0d3 100644
--- a/src/cmd/internal/obj/sym.go
+++ b/src/cmd/internal/obj/sym.go
@@ -205,7 +205,7 @@ func (ctxt *Link) NumberSyms() {
// if Pkgpath is unknown, cannot hash symbols with relocations, as it
// may reference named symbols whose names are not fully expanded.
if s.ContentAddressable() && (ctxt.Pkgpath != "" || len(s.R) == 0) {
- if len(s.P) <= 8 && len(s.R) == 0 && !strings.HasPrefix(s.Name, "type.") {
+ if s.Size <= 8 && len(s.R) == 0 && !strings.HasPrefix(s.Name, "type.") {
// We can use short hash only for symbols without relocations.
// Don't use short hash for type symbols, as they need special handling.
s.PkgIdx = goobj.PkgIdxHashed64
diff --git a/src/cmd/link/link_test.go b/src/cmd/link/link_test.go
index 968da4837d..204410e976 100644
--- a/src/cmd/link/link_test.go
+++ b/src/cmd/link/link_test.go
@@ -820,3 +820,56 @@ func TestReadOnly(t *testing.T) {
t.Errorf("running test program did not fail. output:\n%s", out)
}
}
+
+const testIssue38554Src = `
+package main
+
+type T [10<<20]byte
+
+//go:noinline
+func f() T {
+ return T{} // compiler will make a large stmp symbol, but not used.
+}
+
+func main() {
+ x := f()
+ println(x[1])
+}
+`
+
+func TestIssue38554(t *testing.T) {
+ testenv.MustHaveGoBuild(t)
+
+ t.Parallel()
+
+ tmpdir, err := ioutil.TempDir("", "TestIssue38554")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(tmpdir)
+
+ src := filepath.Join(tmpdir, "x.go")
+ err = ioutil.WriteFile(src, []byte(testIssue38554Src), 0666)
+ if err != nil {
+ t.Fatalf("failed to write source file: %v", err)
+ }
+ exe := filepath.Join(tmpdir, "x.exe")
+ cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", exe, src)
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ t.Fatalf("build failed: %v\n%s", err, out)
+ }
+
+ fi, err := os.Stat(exe)
+ if err != nil {
+ t.Fatalf("failed to stat output file: %v", err)
+ }
+
+ // The test program is not much different from a helloworld, which is
+ // typically a little over 1 MB. We allow 5 MB. If the bad stmp is live,
+ // it will be over 10 MB.
+ const want = 5 << 20
+ if got := fi.Size(); got > want {
+ t.Errorf("binary too big: got %d, want < %d", got, want)
+ }
+}
--
GitLab
From c92bfac01e9e8319822f472fd11a51bf28762125 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Mart=C3=AD?=
Date: Thu, 22 Oct 2020 19:32:47 +0100
Subject: [PATCH 002/253] cmd/go: support non-amd64 on script/toolexec.txt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In https://golang.org/cl/263357, I wasn't thinking that the assembly
file without a GOARCH suffix would be built for all architectures. Only
build assembly for amd64, and update the stderr matching line.
I manually verified that this works on 386; since the only Go file in
that package is a stub, and no assembly files match GOARCH=386, no
assembly is built at all.
Change-Id: Ief3c6c9bdc223f342821b0ec27f00098fc25246a
Reviewed-on: https://go-review.googlesource.com/c/go/+/264457
Trust: Daniel Martí
Run-TryBot: Daniel Martí
Reviewed-by: Bryan C. Mills
TryBot-Result: Go Bot
---
src/cmd/go/testdata/script/toolexec.txt | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/cmd/go/testdata/script/toolexec.txt b/src/cmd/go/testdata/script/toolexec.txt
index 021b7f1684..526234196b 100644
--- a/src/cmd/go/testdata/script/toolexec.txt
+++ b/src/cmd/go/testdata/script/toolexec.txt
@@ -11,7 +11,7 @@ go build ./cmd/mytool
# Finally, note that asm and cgo are run twice.
go build -toolexec=$PWD/mytool
-stderr -count=2 '^asm'${GOEXE}' TOOLEXEC_IMPORTPATH=test/main/withasm$'
+[amd64] stderr -count=2 '^asm'${GOEXE}' TOOLEXEC_IMPORTPATH=test/main/withasm$'
stderr -count=1 '^compile'${GOEXE}' TOOLEXEC_IMPORTPATH=test/main/withasm$'
[cgo] stderr -count=2 '^cgo'${GOEXE}' TOOLEXEC_IMPORTPATH=test/main/withcgo$'
[cgo] stderr -count=1 '^compile'${GOEXE}' TOOLEXEC_IMPORTPATH=test/main/withcgo$'
@@ -46,7 +46,9 @@ package withcgo
// Stub file to ensure we build without cgo too.
-- withasm/withasm.go --
package withasm
--- withasm/withasm.s --
+
+// Note that we don't need to declare the Add func at all.
+-- withasm/withasm_amd64.s --
TEXT ·Add(SB),$0-24
MOVQ a+0(FP), AX
ADDQ b+8(FP), AX
--
GitLab
From 4ce9ea52c9ac48f85fba1233b6e7d563f89dff8b Mon Sep 17 00:00:00 2001
From: Nigel Tao
Date: Thu, 22 Oct 2020 23:59:00 +1100
Subject: [PATCH 003/253] strconv: fix Eisel-Lemire for negative zero
This is somewhat academic (and no tests failed before this commit),
since func atof64 only calls func eiselLemire when func atof64exact
fails, and func atof64exact doesn't fail when parsing positive or
negative zeroes. But it's still worth fixing.
Change-Id: Ibe6ef4c8fd96827673b711d5456003fbc447e39c
Reviewed-on: https://go-review.googlesource.com/c/go/+/264140
Trust: Nigel Tao
Trust: Robert Griesemer
Reviewed-by: Robert Griesemer
---
src/strconv/eisel_lemire.go | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/strconv/eisel_lemire.go b/src/strconv/eisel_lemire.go
index 00fadbdb96..5cd22a7e37 100644
--- a/src/strconv/eisel_lemire.go
+++ b/src/strconv/eisel_lemire.go
@@ -22,14 +22,17 @@ import (
"math/bits"
)
-func eiselLemire(man uint64, exp10 int, neg bool) (ret float64, ok bool) {
+func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
// The terse comments in this function body refer to sections of the
// https://nigeltao.github.io/blog/2020/eisel-lemire.html blog post.
// Exp10 Range.
const exp10Min, exp10Max = -307, +288
if man == 0 {
- return 0, true
+ if neg {
+ f = math.Float64frombits(0x80000000_00000000) // Negative zero.
+ }
+ return f, true
}
if exp10 < exp10Min || exp10Max < exp10 {
return 0, false
--
GitLab
From 6f7b553c82b69b47becbe36d9115971d30fdab48 Mon Sep 17 00:00:00 2001
From: Quim Muntal
Date: Thu, 15 Oct 2020 23:12:49 +0200
Subject: [PATCH 004/253] cmd/cgo: avoid exporting all symbols on windows
buildmode=c-shared
Disable default symbol auto-export behaviour by marking exported
function with the __declspec(dllexport) attribute. Old behaviour can
still be used by setting -extldflags=-Wl,--export-all-symbols.
See https://sourceware.org/binutils/docs/ld/WIN32.html for more info.
This change cuts 50kb of a "hello world" dll.
Updates #6853
Fixes #30674
Change-Id: I9c7fb09c677cc760f24d0f7d199740ae73981413
Reviewed-on: https://go-review.googlesource.com/c/go/+/262797
Run-TryBot: Ian Lance Taylor
TryBot-Result: Go Bot
Reviewed-by: Ian Lance Taylor
Reviewed-by: Alex Brainman
Trust: Alex Brainman
---
misc/cgo/testcshared/cshared_test.go | 96 ++++++++++++++++++++++++++++
src/cmd/cgo/out.go | 6 +-
2 files changed, 101 insertions(+), 1 deletion(-)
diff --git a/misc/cgo/testcshared/cshared_test.go b/misc/cgo/testcshared/cshared_test.go
index d557f34b0f..e1835afa51 100644
--- a/misc/cgo/testcshared/cshared_test.go
+++ b/misc/cgo/testcshared/cshared_test.go
@@ -7,6 +7,8 @@ package cshared_test
import (
"bytes"
"debug/elf"
+ "debug/pe"
+ "encoding/binary"
"flag"
"fmt"
"io/ioutil"
@@ -355,6 +357,100 @@ func TestExportedSymbols(t *testing.T) {
}
}
+func checkNumberOfExportedFunctionsWindows(t *testing.T, exportAllSymbols bool) {
+ const prog = `
+package main
+
+import "C"
+
+//export GoFunc
+func GoFunc() {
+ println(42)
+}
+
+//export GoFunc2
+func GoFunc2() {
+ println(24)
+}
+
+func main() {
+}
+`
+
+ tmpdir := t.TempDir()
+
+ srcfile := filepath.Join(tmpdir, "test.go")
+ objfile := filepath.Join(tmpdir, "test.dll")
+ if err := ioutil.WriteFile(srcfile, []byte(prog), 0666); err != nil {
+ t.Fatal(err)
+ }
+ argv := []string{"build", "-buildmode=c-shared"}
+ if exportAllSymbols {
+ argv = append(argv, "-ldflags", "-extldflags=-Wl,--export-all-symbols")
+ }
+ argv = append(argv, "-o", objfile, srcfile)
+ out, err := exec.Command("go", argv...).CombinedOutput()
+ if err != nil {
+ t.Fatalf("build failure: %s\n%s\n", err, string(out))
+ }
+
+ f, err := pe.Open(objfile)
+ if err != nil {
+ t.Fatalf("pe.Open failed: %v", err)
+ }
+ defer f.Close()
+ section := f.Section(".edata")
+ if section == nil {
+ t.Error(".edata section is not present")
+ }
+
+ // TODO: deduplicate this struct from cmd/link/internal/ld/pe.go
+ type IMAGE_EXPORT_DIRECTORY struct {
+ _ [2]uint32
+ _ [2]uint16
+ _ [2]uint32
+ NumberOfFunctions uint32
+ NumberOfNames uint32
+ _ [3]uint32
+ }
+ var e IMAGE_EXPORT_DIRECTORY
+ if err := binary.Read(section.Open(), binary.LittleEndian, &e); err != nil {
+ t.Fatalf("binary.Read failed: %v", err)
+ }
+
+ expectedNumber := uint32(2)
+
+ if exportAllSymbols {
+ if e.NumberOfFunctions <= expectedNumber {
+ t.Fatalf("missing exported functions: %v", e.NumberOfFunctions)
+ }
+ if e.NumberOfNames <= expectedNumber {
+ t.Fatalf("missing exported names: %v", e.NumberOfNames)
+ }
+ } else {
+ if e.NumberOfFunctions != expectedNumber {
+ t.Fatalf("too many exported functions: %v", e.NumberOfFunctions)
+ }
+ if e.NumberOfNames != expectedNumber {
+ t.Fatalf("too many exported names: %v", e.NumberOfNames)
+ }
+ }
+}
+
+func TestNumberOfExportedFunctions(t *testing.T) {
+ if GOOS != "windows" {
+ t.Skip("skipping windows only test")
+ }
+ t.Parallel()
+
+ t.Run("OnlyExported", func(t *testing.T) {
+ checkNumberOfExportedFunctionsWindows(t, false)
+ })
+ t.Run("All", func(t *testing.T) {
+ checkNumberOfExportedFunctionsWindows(t, true)
+ })
+}
+
// test1: shared library can be dynamically loaded and exported symbols are accessible.
func TestExportedSymbolsWithDynamicLoad(t *testing.T) {
t.Parallel()
diff --git a/src/cmd/cgo/out.go b/src/cmd/cgo/out.go
index b447b07645..82316a300b 100644
--- a/src/cmd/cgo/out.go
+++ b/src/cmd/cgo/out.go
@@ -939,7 +939,11 @@ func (p *Package) writeExports(fgo2, fm, fgcc, fgcch io.Writer) {
}
// Build the wrapper function compiled by gcc.
- s := fmt.Sprintf("%s %s(", gccResult, exp.ExpName)
+ gccExport := ""
+ if goos == "windows" {
+ gccExport = "__declspec(dllexport)"
+ }
+ s := fmt.Sprintf("%s %s %s(", gccExport, gccResult, exp.ExpName)
if fn.Recv != nil {
s += p.cgoType(fn.Recv.List[0].Type).C.String()
s += " recv"
--
GitLab
From f1aa0b081e9a75b7757a8e08378aba0326911916 Mon Sep 17 00:00:00 2001
From: qmuntal
Date: Wed, 21 Oct 2020 16:04:07 +0200
Subject: [PATCH 005/253] doc/go1.16: document ASLR support for
-buildmode=c-shared on windows
Change-Id: I89c61e444b3ab36f0081a5252d210cb265344122
Reviewed-on: https://go-review.googlesource.com/c/go/+/264157
Reviewed-by: Ian Lance Taylor
Reviewed-by: Alex Brainman
Trust: Alex Brainman
---
doc/go1.16.html | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/doc/go1.16.html b/doc/go1.16.html
index 1e73355b69..9e7ae1da27 100644
--- a/doc/go1.16.html
+++ b/doc/go1.16.html
@@ -182,7 +182,10 @@ Do not send CLs removing the interior tags from such phrases.
TODO: update with final numbers later in the release.
-
+
+ On Windows, go build -buildmode=c-shared now generates Windows
+ ASLR DLLs by default. ASLR can be disabled with --ldflags=-aslr=false.
+
Core library
--
GitLab
From ad36f871512ea54a9044e256b2743b8346b725dd Mon Sep 17 00:00:00 2001
From: Nigel Tao
Date: Thu, 22 Oct 2020 23:43:11 +1100
Subject: [PATCH 006/253] strconv: increase the Eisel-Lemire exp10 range
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This grows the exp10 range for which the Eisel-Lemire algorithm applies
from [-307, +288] to [-348, +347], roughly equivalent to the existing
powersOfTen table in extfloat.go (which uses a different algorithm).
name old time/op new time/op delta
Atof64Decimal-4 48.4ns ± 1% 48.7ns ± 3% ~ (p=0.698 n=5+5)
Atof64Float-4 57.9ns ± 1% 58.1ns ± 2% ~ (p=0.873 n=5+5)
Atof64FloatExp-4 71.8ns ± 2% 72.2ns ± 2% ~ (p=0.730 n=5+5)
Atof64Big-4 165ns ± 1% 164ns ± 1% ~ (p=0.635 n=5+5)
Atof64RandomBits-4 165ns ± 1% 165ns ± 6% ~ (p=0.143 n=5+5)
Atof64RandomFloats-4 147ns ± 2% 147ns ± 1% ~ (p=0.857 n=5+5)
Change-Id: Idf7dc5297db6db2bd9e0bd4cb0e55e021916fa43
Reviewed-on: https://go-review.googlesource.com/c/go/+/264139
Reviewed-by: Robert Griesemer
Trust: Robert Griesemer
Trust: Nigel Tao
---
src/strconv/eisel_lemire.go | 124 ++++++++++++++++++++++++++++++++++--
1 file changed, 119 insertions(+), 5 deletions(-)
diff --git a/src/strconv/eisel_lemire.go b/src/strconv/eisel_lemire.go
index 5cd22a7e37..e548270688 100644
--- a/src/strconv/eisel_lemire.go
+++ b/src/strconv/eisel_lemire.go
@@ -27,14 +27,13 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
// https://nigeltao.github.io/blog/2020/eisel-lemire.html blog post.
// Exp10 Range.
- const exp10Min, exp10Max = -307, +288
if man == 0 {
if neg {
f = math.Float64frombits(0x80000000_00000000) // Negative zero.
}
return f, true
}
- if exp10 < exp10Min || exp10Max < exp10 {
+ if exp10 < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < exp10 {
return 0, false
}
@@ -44,11 +43,11 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
retExp2 := uint64(217706*exp10>>16+1087) - uint64(clz)
// Multiplication.
- xHi, xLo := bits.Mul64(man, detailedPowersOfTen[exp10-exp10Min][1])
+ xHi, xLo := bits.Mul64(man, detailedPowersOfTen[exp10-detailedPowersOfTenMinExp10][1])
// Wider Approximation.
if xHi&0x1FF == 0x1FF && xLo+man < man {
- yHi, yLo := bits.Mul64(man, detailedPowersOfTen[exp10-exp10Min][0])
+ yHi, yLo := bits.Mul64(man, detailedPowersOfTen[exp10-detailedPowersOfTenMinExp10][0])
mergedHi, mergedLo := xHi, xLo+yHi
if mergedLo < xLo {
mergedHi++
@@ -76,6 +75,14 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
retMantissa >>= 1
retExp2 += 1
}
+ // retExp2 is a uint64. Zero or underflow means that we're in subnormal
+ // float64 space. 0x7FF or above means that we're in Inf/NaN float64 space.
+ //
+ // The if condition is equivalent to (but has fewer branches than):
+ // if retExp2 <= 0 || retExp2 >= 0x7FF {
+ if retExp2-1 >= 0x7FF-1 {
+ return 0, false
+ }
retBits := retExp2<<52 | retMantissa&0x000FFFFF_FFFFFFFF
if neg {
retBits |= 0x80000000_00000000
@@ -83,6 +90,13 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
return math.Float64frombits(retBits), true
}
+// detailedPowersOfTen{Min,Max}Exp10 is the power of 10 represented by the
+// first and last rows of detailedPowersOfTen. Both bounds are inclusive.
+const (
+ detailedPowersOfTenMinExp10 = -348
+ detailedPowersOfTenMaxExp10 = +347
+)
+
// detailedPowersOfTen contains 128-bit mantissa approximations (rounded down)
// to the powers of 10. For example:
//
@@ -94,7 +108,48 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
//
// The table was generated by
// https://github.com/google/wuffs/blob/ba3818cb6b473a2ed0b38ecfc07dbbd3a97e8ae7/script/print-mpb-powers-of-10.go
-var detailedPowersOfTen = [596][2]uint64{
+var detailedPowersOfTen = [...][2]uint64{
+ {0x1732C869CD60E453, 0xFA8FD5A0081C0288}, // 1e-348
+ {0x0E7FBD42205C8EB4, 0x9C99E58405118195}, // 1e-347
+ {0x521FAC92A873B261, 0xC3C05EE50655E1FA}, // 1e-346
+ {0xE6A797B752909EF9, 0xF4B0769E47EB5A78}, // 1e-345
+ {0x9028BED2939A635C, 0x98EE4A22ECF3188B}, // 1e-344
+ {0x7432EE873880FC33, 0xBF29DCABA82FDEAE}, // 1e-343
+ {0x113FAA2906A13B3F, 0xEEF453D6923BD65A}, // 1e-342
+ {0x4AC7CA59A424C507, 0x9558B4661B6565F8}, // 1e-341
+ {0x5D79BCF00D2DF649, 0xBAAEE17FA23EBF76}, // 1e-340
+ {0xF4D82C2C107973DC, 0xE95A99DF8ACE6F53}, // 1e-339
+ {0x79071B9B8A4BE869, 0x91D8A02BB6C10594}, // 1e-338
+ {0x9748E2826CDEE284, 0xB64EC836A47146F9}, // 1e-337
+ {0xFD1B1B2308169B25, 0xE3E27A444D8D98B7}, // 1e-336
+ {0xFE30F0F5E50E20F7, 0x8E6D8C6AB0787F72}, // 1e-335
+ {0xBDBD2D335E51A935, 0xB208EF855C969F4F}, // 1e-334
+ {0xAD2C788035E61382, 0xDE8B2B66B3BC4723}, // 1e-333
+ {0x4C3BCB5021AFCC31, 0x8B16FB203055AC76}, // 1e-332
+ {0xDF4ABE242A1BBF3D, 0xADDCB9E83C6B1793}, // 1e-331
+ {0xD71D6DAD34A2AF0D, 0xD953E8624B85DD78}, // 1e-330
+ {0x8672648C40E5AD68, 0x87D4713D6F33AA6B}, // 1e-329
+ {0x680EFDAF511F18C2, 0xA9C98D8CCB009506}, // 1e-328
+ {0x0212BD1B2566DEF2, 0xD43BF0EFFDC0BA48}, // 1e-327
+ {0x014BB630F7604B57, 0x84A57695FE98746D}, // 1e-326
+ {0x419EA3BD35385E2D, 0xA5CED43B7E3E9188}, // 1e-325
+ {0x52064CAC828675B9, 0xCF42894A5DCE35EA}, // 1e-324
+ {0x7343EFEBD1940993, 0x818995CE7AA0E1B2}, // 1e-323
+ {0x1014EBE6C5F90BF8, 0xA1EBFB4219491A1F}, // 1e-322
+ {0xD41A26E077774EF6, 0xCA66FA129F9B60A6}, // 1e-321
+ {0x8920B098955522B4, 0xFD00B897478238D0}, // 1e-320
+ {0x55B46E5F5D5535B0, 0x9E20735E8CB16382}, // 1e-319
+ {0xEB2189F734AA831D, 0xC5A890362FDDBC62}, // 1e-318
+ {0xA5E9EC7501D523E4, 0xF712B443BBD52B7B}, // 1e-317
+ {0x47B233C92125366E, 0x9A6BB0AA55653B2D}, // 1e-316
+ {0x999EC0BB696E840A, 0xC1069CD4EABE89F8}, // 1e-315
+ {0xC00670EA43CA250D, 0xF148440A256E2C76}, // 1e-314
+ {0x380406926A5E5728, 0x96CD2A865764DBCA}, // 1e-313
+ {0xC605083704F5ECF2, 0xBC807527ED3E12BC}, // 1e-312
+ {0xF7864A44C633682E, 0xEBA09271E88D976B}, // 1e-311
+ {0x7AB3EE6AFBE0211D, 0x93445B8731587EA3}, // 1e-310
+ {0x5960EA05BAD82964, 0xB8157268FDAE9E4C}, // 1e-309
+ {0x6FB92487298E33BD, 0xE61ACF033D1A45DF}, // 1e-308
{0xA5D3B6D479F8E056, 0x8FD0C16206306BAB}, // 1e-307
{0x8F48A4899877186C, 0xB3C4F1BA87BC8696}, // 1e-306
{0x331ACDABFE94DE87, 0xE0B62E2929ABA83C}, // 1e-305
@@ -691,4 +746,63 @@ var detailedPowersOfTen = [596][2]uint64{
{0x49ED8EABCCCC485D, 0x867F59A9D4BED6C0}, // 1e286
{0x5C68F256BFFF5A74, 0xA81F301449EE8C70}, // 1e287
{0x73832EEC6FFF3111, 0xD226FC195C6A2F8C}, // 1e288
+ {0xC831FD53C5FF7EAB, 0x83585D8FD9C25DB7}, // 1e289
+ {0xBA3E7CA8B77F5E55, 0xA42E74F3D032F525}, // 1e290
+ {0x28CE1BD2E55F35EB, 0xCD3A1230C43FB26F}, // 1e291
+ {0x7980D163CF5B81B3, 0x80444B5E7AA7CF85}, // 1e292
+ {0xD7E105BCC332621F, 0xA0555E361951C366}, // 1e293
+ {0x8DD9472BF3FEFAA7, 0xC86AB5C39FA63440}, // 1e294
+ {0xB14F98F6F0FEB951, 0xFA856334878FC150}, // 1e295
+ {0x6ED1BF9A569F33D3, 0x9C935E00D4B9D8D2}, // 1e296
+ {0x0A862F80EC4700C8, 0xC3B8358109E84F07}, // 1e297
+ {0xCD27BB612758C0FA, 0xF4A642E14C6262C8}, // 1e298
+ {0x8038D51CB897789C, 0x98E7E9CCCFBD7DBD}, // 1e299
+ {0xE0470A63E6BD56C3, 0xBF21E44003ACDD2C}, // 1e300
+ {0x1858CCFCE06CAC74, 0xEEEA5D5004981478}, // 1e301
+ {0x0F37801E0C43EBC8, 0x95527A5202DF0CCB}, // 1e302
+ {0xD30560258F54E6BA, 0xBAA718E68396CFFD}, // 1e303
+ {0x47C6B82EF32A2069, 0xE950DF20247C83FD}, // 1e304
+ {0x4CDC331D57FA5441, 0x91D28B7416CDD27E}, // 1e305
+ {0xE0133FE4ADF8E952, 0xB6472E511C81471D}, // 1e306
+ {0x58180FDDD97723A6, 0xE3D8F9E563A198E5}, // 1e307
+ {0x570F09EAA7EA7648, 0x8E679C2F5E44FF8F}, // 1e308
+ {0x2CD2CC6551E513DA, 0xB201833B35D63F73}, // 1e309
+ {0xF8077F7EA65E58D1, 0xDE81E40A034BCF4F}, // 1e310
+ {0xFB04AFAF27FAF782, 0x8B112E86420F6191}, // 1e311
+ {0x79C5DB9AF1F9B563, 0xADD57A27D29339F6}, // 1e312
+ {0x18375281AE7822BC, 0xD94AD8B1C7380874}, // 1e313
+ {0x8F2293910D0B15B5, 0x87CEC76F1C830548}, // 1e314
+ {0xB2EB3875504DDB22, 0xA9C2794AE3A3C69A}, // 1e315
+ {0x5FA60692A46151EB, 0xD433179D9C8CB841}, // 1e316
+ {0xDBC7C41BA6BCD333, 0x849FEEC281D7F328}, // 1e317
+ {0x12B9B522906C0800, 0xA5C7EA73224DEFF3}, // 1e318
+ {0xD768226B34870A00, 0xCF39E50FEAE16BEF}, // 1e319
+ {0xE6A1158300D46640, 0x81842F29F2CCE375}, // 1e320
+ {0x60495AE3C1097FD0, 0xA1E53AF46F801C53}, // 1e321
+ {0x385BB19CB14BDFC4, 0xCA5E89B18B602368}, // 1e322
+ {0x46729E03DD9ED7B5, 0xFCF62C1DEE382C42}, // 1e323
+ {0x6C07A2C26A8346D1, 0x9E19DB92B4E31BA9}, // 1e324
+ {0xC7098B7305241885, 0xC5A05277621BE293}, // 1e325
+ {0xB8CBEE4FC66D1EA7, 0xF70867153AA2DB38}, // 1e326
+ {0x737F74F1DC043328, 0x9A65406D44A5C903}, // 1e327
+ {0x505F522E53053FF2, 0xC0FE908895CF3B44}, // 1e328
+ {0x647726B9E7C68FEF, 0xF13E34AABB430A15}, // 1e329
+ {0x5ECA783430DC19F5, 0x96C6E0EAB509E64D}, // 1e330
+ {0xB67D16413D132072, 0xBC789925624C5FE0}, // 1e331
+ {0xE41C5BD18C57E88F, 0xEB96BF6EBADF77D8}, // 1e332
+ {0x8E91B962F7B6F159, 0x933E37A534CBAAE7}, // 1e333
+ {0x723627BBB5A4ADB0, 0xB80DC58E81FE95A1}, // 1e334
+ {0xCEC3B1AAA30DD91C, 0xE61136F2227E3B09}, // 1e335
+ {0x213A4F0AA5E8A7B1, 0x8FCAC257558EE4E6}, // 1e336
+ {0xA988E2CD4F62D19D, 0xB3BD72ED2AF29E1F}, // 1e337
+ {0x93EB1B80A33B8605, 0xE0ACCFA875AF45A7}, // 1e338
+ {0xBC72F130660533C3, 0x8C6C01C9498D8B88}, // 1e339
+ {0xEB8FAD7C7F8680B4, 0xAF87023B9BF0EE6A}, // 1e340
+ {0xA67398DB9F6820E1, 0xDB68C2CA82ED2A05}, // 1e341
+ {0x88083F8943A1148C, 0x892179BE91D43A43}, // 1e342
+ {0x6A0A4F6B948959B0, 0xAB69D82E364948D4}, // 1e343
+ {0x848CE34679ABB01C, 0xD6444E39C3DB9B09}, // 1e344
+ {0xF2D80E0C0C0B4E11, 0x85EAB0E41A6940E5}, // 1e345
+ {0x6F8E118F0F0E2195, 0xA7655D1D2103911F}, // 1e346
+ {0x4B7195F2D2D1A9FB, 0xD13EB46469447567}, // 1e347
}
--
GitLab
From 67bce7c1cf6540a853c6b8a9721e381c8258d7dc Mon Sep 17 00:00:00 2001
From: Hana
Date: Wed, 7 Oct 2020 17:27:42 -0400
Subject: [PATCH 007/253] cmd/vendor: sync
pprof@v0.0.0-20201007051231-1066cbb265c7
This is a belated early sync for 1.16 dev cycle
For #36905
Change-Id: I387528ae897794841c0c78b0f0910fc5ce8599ab
Reviewed-on: https://go-review.googlesource.com/c/go/+/260538
Run-TryBot: Hyang-Ah Hana Kim
TryBot-Result: Go Bot
Trust: Hyang-Ah Hana Kim
Reviewed-by: Dmitri Shuralyov
---
src/cmd/go.mod | 2 +-
src/cmd/go.sum | 4 ++--
.../pprof/internal/driver/driver_focus.go | 2 +-
.../google/pprof/internal/driver/tempfile.go | 18 ++++++++++++------
.../google/pprof/internal/graph/dotgraph.go | 13 ++++++++++++-
.../google/pprof/internal/report/report.go | 2 +-
src/cmd/vendor/modules.txt | 2 +-
7 files changed, 30 insertions(+), 13 deletions(-)
diff --git a/src/cmd/go.mod b/src/cmd/go.mod
index 4d4320aa9b..f79b238a1d 100644
--- a/src/cmd/go.mod
+++ b/src/cmd/go.mod
@@ -3,7 +3,7 @@ module cmd
go 1.16
require (
- github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99
+ github.com/google/pprof v0.0.0-20201007051231-1066cbb265c7
github.com/ianlancetaylor/demangle v0.0.0-20200414190113-039b1ae3a340 // indirect
golang.org/x/arch v0.0.0-20201008161808-52c3e6f60cff
golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a
diff --git a/src/cmd/go.sum b/src/cmd/go.sum
index 3f2562a040..6eff8a2c57 100644
--- a/src/cmd/go.sum
+++ b/src/cmd/go.sum
@@ -1,8 +1,8 @@
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
-github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99 h1:Ak8CrdlwwXwAZxzS66vgPt4U8yUZX7JwLvVR58FN5jM=
-github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
+github.com/google/pprof v0.0.0-20201007051231-1066cbb265c7 h1:qYWTuM6SUNWgtvkhV8oH6GFHCpU+rKQOxPcepM3xKi0=
+github.com/google/pprof v0.0.0-20201007051231-1066cbb265c7/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20200414190113-039b1ae3a340 h1:S1+yTUaFPXuDZnPDbO+TrDFIjPzQraYH8/CwSlu9Fac=
github.com/ianlancetaylor/demangle v0.0.0-20200414190113-039b1ae3a340/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
diff --git a/src/cmd/vendor/github.com/google/pprof/internal/driver/driver_focus.go b/src/cmd/vendor/github.com/google/pprof/internal/driver/driver_focus.go
index 048ba17cb0..fd05adb146 100644
--- a/src/cmd/vendor/github.com/google/pprof/internal/driver/driver_focus.go
+++ b/src/cmd/vendor/github.com/google/pprof/internal/driver/driver_focus.go
@@ -58,7 +58,7 @@ func applyFocus(prof *profile.Profile, numLabelUnits map[string]string, cfg conf
taghide, err := compileRegexOption("taghide", cfg.TagHide, err)
tns, tnh := prof.FilterTagsByName(tagshow, taghide)
warnNoMatches(tagshow == nil || tns, "TagShow", ui)
- warnNoMatches(tagignore == nil || tnh, "TagHide", ui)
+ warnNoMatches(taghide == nil || tnh, "TagHide", ui)
if prunefrom != nil {
prof.PruneFrom(prunefrom)
diff --git a/src/cmd/vendor/github.com/google/pprof/internal/driver/tempfile.go b/src/cmd/vendor/github.com/google/pprof/internal/driver/tempfile.go
index 28679f1c15..b6c8776ff8 100644
--- a/src/cmd/vendor/github.com/google/pprof/internal/driver/tempfile.go
+++ b/src/cmd/vendor/github.com/google/pprof/internal/driver/tempfile.go
@@ -24,9 +24,11 @@ import (
// newTempFile returns a new output file in dir with the provided prefix and suffix.
func newTempFile(dir, prefix, suffix string) (*os.File, error) {
for index := 1; index < 10000; index++ {
- path := filepath.Join(dir, fmt.Sprintf("%s%03d%s", prefix, index, suffix))
- if _, err := os.Stat(path); err != nil {
- return os.Create(path)
+ switch f, err := os.OpenFile(filepath.Join(dir, fmt.Sprintf("%s%03d%s", prefix, index, suffix)), os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666); {
+ case err == nil:
+ return f, nil
+ case !os.IsExist(err):
+ return nil, err
}
}
// Give up
@@ -44,11 +46,15 @@ func deferDeleteTempFile(path string) {
}
// cleanupTempFiles removes any temporary files selected for deferred cleaning.
-func cleanupTempFiles() {
+func cleanupTempFiles() error {
tempFilesMu.Lock()
+ defer tempFilesMu.Unlock()
+ var lastErr error
for _, f := range tempFiles {
- os.Remove(f)
+ if err := os.Remove(f); err != nil {
+ lastErr = err
+ }
}
tempFiles = nil
- tempFilesMu.Unlock()
+ return lastErr
}
diff --git a/src/cmd/vendor/github.com/google/pprof/internal/graph/dotgraph.go b/src/cmd/vendor/github.com/google/pprof/internal/graph/dotgraph.go
index 09debfb007..cde648f20b 100644
--- a/src/cmd/vendor/github.com/google/pprof/internal/graph/dotgraph.go
+++ b/src/cmd/vendor/github.com/google/pprof/internal/graph/dotgraph.go
@@ -127,7 +127,7 @@ func (b *builder) addLegend() {
}
title := labels[0]
fmt.Fprintf(b, `subgraph cluster_L { "%s" [shape=box fontsize=16`, title)
- fmt.Fprintf(b, ` label="%s\l"`, strings.Join(labels, `\l`))
+ fmt.Fprintf(b, ` label="%s\l"`, strings.Join(escapeForDot(labels), `\l`))
if b.config.LegendURL != "" {
fmt.Fprintf(b, ` URL="%s" target="_blank"`, b.config.LegendURL)
}
@@ -472,3 +472,14 @@ func min64(a, b int64) int64 {
}
return b
}
+
+// escapeForDot escapes double quotes and backslashes, and replaces Graphviz's
+// "center" character (\n) with a left-justified character.
+// See https://graphviz.org/doc/info/attrs.html#k:escString for more info.
+func escapeForDot(in []string) []string {
+ var out = make([]string, len(in))
+ for i := range in {
+ out[i] = strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(in[i], `\`, `\\`), `"`, `\"`), "\n", `\l`)
+ }
+ return out
+}
diff --git a/src/cmd/vendor/github.com/google/pprof/internal/report/report.go b/src/cmd/vendor/github.com/google/pprof/internal/report/report.go
index a345208910..bc5685d61e 100644
--- a/src/cmd/vendor/github.com/google/pprof/internal/report/report.go
+++ b/src/cmd/vendor/github.com/google/pprof/internal/report/report.go
@@ -1207,7 +1207,7 @@ func reportLabels(rpt *Report, g *graph.Graph, origCount, droppedNodes, droppedE
// Help new users understand the graph.
// A new line is intentionally added here to better show this message.
if fullHeaders {
- label = append(label, "\\lSee https://git.io/JfYMW for how to read the graph")
+ label = append(label, "\nSee https://git.io/JfYMW for how to read the graph")
}
return label
diff --git a/src/cmd/vendor/modules.txt b/src/cmd/vendor/modules.txt
index 75a118f039..4be3a2b680 100644
--- a/src/cmd/vendor/modules.txt
+++ b/src/cmd/vendor/modules.txt
@@ -1,4 +1,4 @@
-# github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99
+# github.com/google/pprof v0.0.0-20201007051231-1066cbb265c7
## explicit
github.com/google/pprof/driver
github.com/google/pprof/internal/binutils
--
GitLab
From 400581b8b008ece8d0df34f54f281d365a175dba Mon Sep 17 00:00:00 2001
From: Russ Cox
Date: Sat, 18 Jul 2020 23:50:48 -0400
Subject: [PATCH 008/253] embed: implement FS
embed.FS is the implementation of embedded file trees, providing
an fs.FS for each embed.FS variable.
Tests are in a follow-up CL, in the package embed/internal/embedtest.
(They can only be written once the toolchain can initialize one of these,
which requires changes to cmd/compile and cmd/go.)
For #41191.
Change-Id: Ieb0ead1d305cdac3d5d4e11772dca75740a72730
Reviewed-on: https://go-review.googlesource.com/c/go/+/243942
Trust: Russ Cox
Trust: Jay Conrod
Run-TryBot: Russ Cox
TryBot-Result: Go Bot
Reviewed-by: Rob Pike
Reviewed-by: Jay Conrod
---
src/embed/embed.go | 403 ++++++++++++++++++++++++++++++++++++++
src/go/build/read_test.go | 12 +-
2 files changed, 409 insertions(+), 6 deletions(-)
create mode 100644 src/embed/embed.go
diff --git a/src/embed/embed.go b/src/embed/embed.go
new file mode 100644
index 0000000000..b22975cc3a
--- /dev/null
+++ b/src/embed/embed.go
@@ -0,0 +1,403 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package embed provides access to files embedded in the running Go program.
+//
+// Go source files that import "embed" can use the //go:embed directive
+// to initialize a variable of type string, []byte, or FS with the contents of
+// files read from the package directory or subdirectories at compile time.
+//
+// For example, here are three ways to embed a file named hello.txt
+// and then print its contents at run time:
+//
+// import "embed"
+//
+// //go:embed hello.txt
+// var s string
+// print(s)
+//
+// //go:embed hello.txt
+// var b []byte
+// print(string(b))
+//
+// //go:embed hello.txt
+// var f embed.FS
+// data, _ := f.ReadFile("hello.txt")
+// print(string(data))
+//
+// Directives
+//
+// A //go:embed directive above a variable declaration specifies which files to embed,
+// using one or more path.Match patterns.
+//
+// The directive must immediately precede a line containing the declaration of a single variable.
+// Only blank lines and ‘//’ line comments are permitted between the directive and the declaration.
+//
+// The variable must be of type string, []byte, or FS exactly. Named types or type aliases
+// derived from those types are not allowed.
+//
+// For example:
+//
+// package server
+//
+// import "embed"
+//
+// // content holds our static web server content.
+// //go:embed image/* template/*
+// //go:embed html/index.html
+// var content embed.FS
+//
+// The Go build system will recognize the directives and arrange for the declared variable
+// (in the example above, content) to be populated with the matching files from the file system.
+//
+// The //go:embed directive accepts multiple space-separated patterns for brevity,
+// but it can also be repeated, to avoid very long lines when there are many patterns.
+// The patterns are interpreted relative to the package directory containing the source file.
+// The path separator is a forward slash, even on Windows systems.
+// To allow for naming files with spaces in their names, patterns can be written
+// as Go double-quoted or back-quoted string literals.
+//
+// If a pattern names a directory, all files in the subtree rooted at that directory are
+// embedded (recursively), so the variable in the above example is equivalent to:
+//
+// // content is our static web server content.
+// //go:embed image template html/index.html
+// var content embed.FS
+//
+// The //go:embed directive can be used with both exported and unexported variables,
+// depending on whether the package wants to make the data available to other packages.
+// Similarly, it can be used with both global and function-local variables,
+// depending on what is more convenient in context.
+//
+// Patterns must not match files outside the package's module, such as ‘.git/*’ or symbolic links.
+// Matches for empty directories are ignored. After that, each pattern in a //go:embed line
+// must match at least one file or non-empty directory.
+//
+// Patterns must not contain ‘.’ or ‘..’ path elements nor begin with a leading slash.
+// To match everything in the current directory, use ‘*’ instead of ‘.’.
+//
+// If any patterns are invalid or have invalid matches, the build will fail.
+//
+// Strings and Bytes
+//
+// The //go:embed line for a variable of type string or []byte can have only a single pattern,
+// and that pattern can match only a single file. The string or []byte is initialized with
+// the contents of that file.
+//
+// The //go:embed directive requires importing "embed", even when using a string or []byte.
+// In source files that don't refer to embed.FS, use a blank import (import _ "embed").
+//
+// File Systems
+//
+// For embedding a single file, a variable of type string or []byte is often best.
+// The FS type enables embedding a tree of files, such as a directory of static
+// web server content, as in the example above.
+//
+// FS implements the io/fs package's FS interface, so it can be used with any package that
+// understands file systems, including net/http, text/template, and html/template.
+//
+// For example, given the content variable in the example above, we can write:
+//
+// http.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.FS(content))))
+//
+// template.ParseFS(content, "*.tmpl")
+//
+// Tools
+//
+// To support tools that analyze Go packages, the patterns found in //go:embed lines
+// are available in “go list” output. See the EmbedPatterns, TestEmbedPatterns,
+// and XTestEmbedPatterns fields in the “go help list” output.
+//
+package embed
+
+import (
+ "errors"
+ "io"
+ "io/fs"
+ "time"
+)
+
+// An FS is a read-only collection of files, usually initialized with a //go:embed directive.
+// When declared without a //go:embed directive, an FS is an empty file system.
+//
+// An FS is a read-only value, so it is safe to use from multiple goroutines
+// simultaneously and also safe to assign values of type FS to each other.
+//
+// FS implements fs.FS, so it can be used with any package that understands
+// file system interfaces, including net/http, text/template, and html/template.
+//
+// See the package documentation for more details about initializing an FS.
+type FS struct {
+ // The compiler knows the layout of this struct.
+ // See cmd/compile/internal/gc's initEmbed.
+ //
+ // The files list is sorted by name but not by simple string comparison.
+ // Instead, each file's name takes the form "dir/elem" or "dir/elem/".
+ // The optional trailing slash indicates that the file is itself a directory.
+ // The files list is sorted first by dir (if dir is missing, it is taken to be ".")
+ // and then by base, so this list of files:
+ //
+ // p
+ // q/
+ // q/r
+ // q/s/
+ // q/s/t
+ // q/s/u
+ // q/v
+ // w
+ //
+ // is actually sorted as:
+ //
+ // p # dir=. elem=p
+ // q/ # dir=. elem=q
+ // w/ # dir=. elem=w
+ // q/r # dir=q elem=r
+ // q/s/ # dir=q elem=s
+ // q/v # dir=q elem=v
+ // q/s/t # dir=q/s elem=t
+ // q/s/u # dir=q/s elem=u
+ //
+ // This order brings directory contents together in contiguous sections
+ // of the list, allowing a directory read to use binary search to find
+ // the relevant sequence of entries.
+ files *[]file
+}
+
+// split splits the name into dir and elem as described in the
+// comment in the FS struct above. isDir reports whether the
+// final trailing slash was present, indicating that name is a directory.
+func split(name string) (dir, elem string, isDir bool) {
+ if name[len(name)-1] == '/' {
+ isDir = true
+ name = name[:len(name)-1]
+ }
+ i := len(name) - 1
+ for i >= 0 && name[i] != '/' {
+ i--
+ }
+ if i < 0 {
+ return ".", name, isDir
+ }
+ return name[:i], name[i+1:], isDir
+}
+
+// trimSlash trims a trailing slash from name, if present,
+// returning the possibly shortened name.
+func trimSlash(name string) string {
+ if len(name) > 0 && name[len(name)-1] == '/' {
+ return name[:len(name)-1]
+ }
+ return name
+}
+
+var (
+ _ fs.ReadDirFS = FS{}
+ _ fs.ReadFileFS = FS{}
+)
+
+// A file is a single file in the FS.
+// It implements fs.FileInfo and fs.DirEntry.
+type file struct {
+ // The compiler knows the layout of this struct.
+ // See cmd/compile/internal/gc's initEmbed.
+ name string
+ data string
+ hash [16]byte // truncated SHA256 hash
+}
+
+var (
+ _ fs.FileInfo = (*file)(nil)
+ _ fs.DirEntry = (*file)(nil)
+)
+
+func (f *file) Name() string { _, elem, _ := split(f.name); return elem }
+func (f *file) Size() int64 { return int64(len(f.data)) }
+func (f *file) ModTime() time.Time { return time.Time{} }
+func (f *file) IsDir() bool { _, _, isDir := split(f.name); return isDir }
+func (f *file) Sys() interface{} { return nil }
+func (f *file) Type() fs.FileMode { return f.Mode().Type() }
+func (f *file) Info() (fs.FileInfo, error) { return f, nil }
+
+func (f *file) Mode() fs.FileMode {
+ if f.IsDir() {
+ return fs.ModeDir | 0555
+ }
+ return 0444
+}
+
+// dotFile is a file for the root directory,
+// which is omitted from the files list in a FS.
+var dotFile = &file{name: "./"}
+
+// lookup returns the named file, or nil if it is not present.
+func (f FS) lookup(name string) *file {
+ if !fs.ValidPath(name) {
+ // The compiler should never emit a file with an invalid name,
+ // so this check is not strictly necessary (if name is invalid,
+ // we shouldn't find a match below), but it's a good backstop anyway.
+ return nil
+ }
+ if name == "." {
+ return dotFile
+ }
+
+ // Binary search to find where name would be in the list,
+ // and then check if name is at that position.
+ dir, elem, _ := split(name)
+ files := *f.files
+ i := sortSearch(len(files), func(i int) bool {
+ idir, ielem, _ := split(files[i].name)
+ return idir > dir || idir == dir && ielem >= elem
+ })
+ if i < len(files) && trimSlash(files[i].name) == name {
+ return &files[i]
+ }
+ return nil
+}
+
+// readDir returns the list of files corresponding to the directory dir.
+func (f FS) readDir(dir string) []file {
+ // Binary search to find where dir starts and ends in the list
+ // and then return that slice of the list.
+ files := *f.files
+ i := sortSearch(len(files), func(i int) bool {
+ idir, _, _ := split(files[i].name)
+ return idir >= dir
+ })
+ j := sortSearch(len(files), func(j int) bool {
+ jdir, _, _ := split(files[j].name)
+ return jdir > dir
+ })
+ return files[i:j]
+}
+
+// Open opens the named file for reading and returns it as an fs.File.
+func (f FS) Open(name string) (fs.File, error) {
+ file := f.lookup(name)
+ if file == nil {
+ return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist}
+ }
+ if file.IsDir() {
+ return &openDir{file, f.readDir(name), 0}, nil
+ }
+ return &openFile{file, 0}, nil
+}
+
+// ReadDir reads and returns the entire named directory.
+func (f FS) ReadDir(name string) ([]fs.DirEntry, error) {
+ file, err := f.Open(name)
+ if err != nil {
+ return nil, err
+ }
+ dir, ok := file.(*openDir)
+ if !ok {
+ return nil, &fs.PathError{Op: "read", Path: name, Err: errors.New("not a directory")}
+ }
+ list := make([]fs.DirEntry, len(dir.files))
+ for i := range list {
+ list[i] = &dir.files[i]
+ }
+ return list, nil
+}
+
+// ReadFile reads and returns the content of the named file.
+func (f FS) ReadFile(name string) ([]byte, error) {
+ file, err := f.Open(name)
+ if err != nil {
+ return nil, err
+ }
+ ofile, ok := file.(*openFile)
+ if !ok {
+ return nil, &fs.PathError{Op: "read", Path: name, Err: errors.New("is a directory")}
+ }
+ return []byte(ofile.f.data), nil
+}
+
+// An openFile is a regular file open for reading.
+type openFile struct {
+ f *file // the file itself
+ offset int64 // current read offset
+}
+
+func (f *openFile) Close() error { return nil }
+func (f *openFile) Stat() (fs.FileInfo, error) { return f.f, nil }
+
+func (f *openFile) Read(b []byte) (int, error) {
+ if f.offset >= int64(len(f.f.data)) {
+ return 0, io.EOF
+ }
+ if f.offset < 0 {
+ return 0, &fs.PathError{Op: "read", Path: f.f.name, Err: fs.ErrInvalid}
+ }
+ n := copy(b, f.f.data[f.offset:])
+ f.offset += int64(n)
+ return n, nil
+}
+
+func (f *openFile) Seek(offset int64, whence int) (int64, error) {
+ switch whence {
+ case 0:
+ // offset += 0
+ case 1:
+ offset += f.offset
+ case 2:
+ offset += int64(len(f.f.data))
+ }
+ if offset < 0 || offset > int64(len(f.f.data)) {
+ return 0, &fs.PathError{Op: "seek", Path: f.f.name, Err: fs.ErrInvalid}
+ }
+ f.offset = offset
+ return offset, nil
+}
+
+// An openDir is a directory open for reading.
+type openDir struct {
+ f *file // the directory file itself
+ files []file // the directory contents
+ offset int // the read offset, an index into the files slice
+}
+
+func (d *openDir) Close() error { return nil }
+func (d *openDir) Stat() (fs.FileInfo, error) { return d.f, nil }
+
+func (d *openDir) Read([]byte) (int, error) {
+ return 0, &fs.PathError{Op: "read", Path: d.f.name, Err: errors.New("is a directory")}
+}
+
+func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) {
+ n := len(d.files) - d.offset
+ if count > 0 && n > count {
+ n = count
+ }
+ if n == 0 {
+ if count <= 0 {
+ return nil, nil
+ }
+ return nil, io.EOF
+ }
+ list := make([]fs.DirEntry, n)
+ for i := range list {
+ list[i] = &d.files[d.offset+i]
+ }
+ d.offset += n
+ return list, nil
+}
+
+// sortSearch is like sort.Search, avoiding an import.
+func sortSearch(n int, f func(int) bool) int {
+ // Define f(-1) == false and f(n) == true.
+ // Invariant: f(i-1) == false, f(j) == true.
+ i, j := 0, n
+ for i < j {
+ h := int(uint(i+j) >> 1) // avoid overflow when computing h
+ // i ≤ h < j
+ if !f(h) {
+ i = h + 1 // preserves f(i-1) == false
+ } else {
+ j = h // preserves f(j) == true
+ }
+ }
+ // i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i.
+ return i
+}
diff --git a/src/go/build/read_test.go b/src/go/build/read_test.go
index dc75c9f202..9264d2606f 100644
--- a/src/go/build/read_test.go
+++ b/src/go/build/read_test.go
@@ -236,23 +236,23 @@ var readEmbedTests = []struct {
nil,
},
{
- "package p\nimport \"embed\"\nvar i int\n//go:embed x y z\nvar files embed.Files",
+ "package p\nimport \"embed\"\nvar i int\n//go:embed x y z\nvar files embed.FS",
[]string{"x", "y", "z"},
},
{
- "package p\nimport \"embed\"\nvar i int\n//go:embed x \"\\x79\" `z`\nvar files embed.Files",
+ "package p\nimport \"embed\"\nvar i int\n//go:embed x \"\\x79\" `z`\nvar files embed.FS",
[]string{"x", "y", "z"},
},
{
- "package p\nimport \"embed\"\nvar i int\n//go:embed x y\n//go:embed z\nvar files embed.Files",
+ "package p\nimport \"embed\"\nvar i int\n//go:embed x y\n//go:embed z\nvar files embed.FS",
[]string{"x", "y", "z"},
},
{
- "package p\nimport \"embed\"\nvar i int\n\t //go:embed x y\n\t //go:embed z\n\t var files embed.Files",
+ "package p\nimport \"embed\"\nvar i int\n\t //go:embed x y\n\t //go:embed z\n\t var files embed.FS",
[]string{"x", "y", "z"},
},
{
- "package p\nimport \"embed\"\n//go:embed x y z\nvar files embed.Files",
+ "package p\nimport \"embed\"\n//go:embed x y z\nvar files embed.FS",
[]string{"x", "y", "z"},
},
{
@@ -260,7 +260,7 @@ var readEmbedTests = []struct {
nil,
},
{
- "package p\n//go:embed x y z\nvar files embed.Files", // no import, no scan
+ "package p\n//go:embed x y z\nvar files embed.FS", // no import, no scan
nil,
},
}
--
GitLab
From 8bde9b320e25b2d6edf96fa5e694046fea0c04c8 Mon Sep 17 00:00:00 2001
From: Russ Cox
Date: Sun, 19 Jul 2020 00:32:02 -0400
Subject: [PATCH 009/253] cmd/compile: add //go:embed support
This commit contains the compiler support for //go:embed lines.
The go command passes to the compiler an "embed config"
that maps literal patterns like *.txt to the set of files to embed.
The compiler then lays out the content of those files as static data
in the form of an embed.Files or string or []byte in the final object file.
The test for this code is the end-to-end test hooking up the
embed, cmd/compile, and cmd/go changes, in the next CL.
For #41191.
Change-Id: I916e57f8cc65871dc0044c13d3f90c252a3fe1bf
Reviewed-on: https://go-review.googlesource.com/c/go/+/243944
Trust: Russ Cox
Reviewed-by: Cherry Zhang
---
src/cmd/compile/internal/gc/embed.go | 273 +++++++++++++++++++++++
src/cmd/compile/internal/gc/main.go | 8 +-
src/cmd/compile/internal/gc/noder.go | 145 ++++++++++--
src/cmd/compile/internal/gc/obj.go | 132 +++++++++--
src/cmd/compile/internal/gc/syntax.go | 86 ++++++-
src/cmd/compile/internal/gc/typecheck.go | 8 +-
src/cmd/internal/obj/link.go | 29 ++-
src/cmd/internal/obj/objfile.go | 49 +++-
8 files changed, 682 insertions(+), 48 deletions(-)
create mode 100644 src/cmd/compile/internal/gc/embed.go
diff --git a/src/cmd/compile/internal/gc/embed.go b/src/cmd/compile/internal/gc/embed.go
new file mode 100644
index 0000000000..103949c1f9
--- /dev/null
+++ b/src/cmd/compile/internal/gc/embed.go
@@ -0,0 +1,273 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gc
+
+import (
+ "cmd/compile/internal/syntax"
+ "cmd/compile/internal/types"
+ "cmd/internal/obj"
+ "encoding/json"
+ "io/ioutil"
+ "log"
+ "path"
+ "sort"
+ "strconv"
+ "strings"
+)
+
+var embedlist []*Node
+
+var embedCfg struct {
+ Patterns map[string][]string
+ Files map[string]string
+}
+
+func readEmbedCfg(file string) {
+ data, err := ioutil.ReadFile(file)
+ if err != nil {
+ log.Fatalf("-embedcfg: %v", err)
+ }
+ if err := json.Unmarshal(data, &embedCfg); err != nil {
+ log.Fatalf("%s: %v", file, err)
+ }
+ if embedCfg.Patterns == nil {
+ log.Fatalf("%s: invalid embedcfg: missing Patterns", file)
+ }
+ if embedCfg.Files == nil {
+ log.Fatalf("%s: invalid embedcfg: missing Files", file)
+ }
+}
+
+const (
+ embedUnknown = iota
+ embedBytes
+ embedString
+ embedFiles
+)
+
+var numLocalEmbed int
+
+func varEmbed(p *noder, names []*Node, typ *Node, exprs []*Node, embeds []PragmaEmbed) (newExprs []*Node) {
+ haveEmbed := false
+ for _, decl := range p.file.DeclList {
+ imp, ok := decl.(*syntax.ImportDecl)
+ if !ok {
+ // imports always come first
+ break
+ }
+ path, _ := strconv.Unquote(imp.Path.Value)
+ if path == "embed" {
+ haveEmbed = true
+ break
+ }
+ }
+
+ pos := embeds[0].Pos
+ if !haveEmbed {
+ p.yyerrorpos(pos, "invalid go:embed: missing import \"embed\"")
+ return exprs
+ }
+ if embedCfg.Patterns == nil {
+ p.yyerrorpos(pos, "invalid go:embed: build system did not supply embed configuration")
+ return exprs
+ }
+ if len(names) > 1 {
+ p.yyerrorpos(pos, "go:embed cannot apply to multiple vars")
+ return exprs
+ }
+ if len(exprs) > 0 {
+ p.yyerrorpos(pos, "go:embed cannot apply to var with initializer")
+ return exprs
+ }
+ if typ == nil {
+ // Should not happen, since len(exprs) == 0 now.
+ p.yyerrorpos(pos, "go:embed cannot apply to var without type")
+ return exprs
+ }
+
+ kind := embedKindApprox(typ)
+ if kind == embedUnknown {
+ p.yyerrorpos(pos, "go:embed cannot apply to var of type %v", typ)
+ return exprs
+ }
+
+ // Build list of files to store.
+ have := make(map[string]bool)
+ var list []string
+ for _, e := range embeds {
+ for _, pattern := range e.Patterns {
+ files, ok := embedCfg.Patterns[pattern]
+ if !ok {
+ p.yyerrorpos(e.Pos, "invalid go:embed: build system did not map pattern: %s", pattern)
+ }
+ for _, file := range files {
+ if embedCfg.Files[file] == "" {
+ p.yyerrorpos(e.Pos, "invalid go:embed: build system did not map file: %s", file)
+ continue
+ }
+ if !have[file] {
+ have[file] = true
+ list = append(list, file)
+ }
+ if kind == embedFiles {
+ for dir := path.Dir(file); dir != "." && !have[dir]; dir = path.Dir(dir) {
+ have[dir] = true
+ list = append(list, dir+"/")
+ }
+ }
+ }
+ }
+ }
+ sort.Slice(list, func(i, j int) bool {
+ return embedFileLess(list[i], list[j])
+ })
+
+ if kind == embedString || kind == embedBytes {
+ if len(list) > 1 {
+ p.yyerrorpos(pos, "invalid go:embed: multiple files for type %v", typ)
+ return exprs
+ }
+ }
+
+ v := names[0]
+ if dclcontext != PEXTERN {
+ numLocalEmbed++
+ v = newnamel(v.Pos, lookupN("embed.", numLocalEmbed))
+ v.Sym.Def = asTypesNode(v)
+ v.Name.Param.Ntype = typ
+ v.SetClass(PEXTERN)
+ externdcl = append(externdcl, v)
+ exprs = []*Node{v}
+ }
+
+ v.Name.Param.SetEmbedFiles(list)
+ embedlist = append(embedlist, v)
+ return exprs
+}
+
+// embedKindApprox determines the kind of embedding variable, approximately.
+// The match is approximate because we haven't done scope resolution yet and
+// can't tell whether "string" and "byte" really mean "string" and "byte".
+// The result must be confirmed later, after type checking, using embedKind.
+func embedKindApprox(typ *Node) int {
+ if typ.Sym != nil && typ.Sym.Name == "FS" && (typ.Sym.Pkg.Path == "embed" || (typ.Sym.Pkg == localpkg && myimportpath == "embed")) {
+ return embedFiles
+ }
+ // These are not guaranteed to match only string and []byte -
+ // maybe the local package has redefined one of those words.
+ // But it's the best we can do now during the noder.
+ // The stricter check happens later, in initEmbed calling embedKind.
+ if typ.Sym != nil && typ.Sym.Name == "string" && typ.Sym.Pkg == localpkg {
+ return embedString
+ }
+ if typ.Op == OTARRAY && typ.Left == nil && typ.Right.Sym != nil && typ.Right.Sym.Name == "byte" && typ.Right.Sym.Pkg == localpkg {
+ return embedBytes
+ }
+ return embedUnknown
+}
+
+// embedKind determines the kind of embedding variable.
+func embedKind(typ *types.Type) int {
+ if typ.Sym != nil && typ.Sym.Name == "FS" && (typ.Sym.Pkg.Path == "embed" || (typ.Sym.Pkg == localpkg && myimportpath == "embed")) {
+ return embedFiles
+ }
+ if typ == types.Types[TSTRING] {
+ return embedString
+ }
+ if typ.Sym == nil && typ.IsSlice() && typ.Elem() == types.Bytetype {
+ return embedBytes
+ }
+ return embedUnknown
+}
+
+func embedFileNameSplit(name string) (dir, elem string, isDir bool) {
+ if name[len(name)-1] == '/' {
+ isDir = true
+ name = name[:len(name)-1]
+ }
+ i := len(name) - 1
+ for i >= 0 && name[i] != '/' {
+ i--
+ }
+ if i < 0 {
+ return ".", name, isDir
+ }
+ return name[:i], name[i+1:], isDir
+}
+
+// embedFileLess implements the sort order for a list of embedded files.
+// See the comment inside ../../../../embed/embed.go's Files struct for rationale.
+func embedFileLess(x, y string) bool {
+ xdir, xelem, _ := embedFileNameSplit(x)
+ ydir, yelem, _ := embedFileNameSplit(y)
+ return xdir < ydir || xdir == ydir && xelem < yelem
+}
+
+func dumpembeds() {
+ for _, v := range embedlist {
+ initEmbed(v)
+ }
+}
+
+// initEmbed emits the init data for a //go:embed variable,
+// which is either a string, a []byte, or an embed.FS.
+func initEmbed(v *Node) {
+ files := v.Name.Param.EmbedFiles()
+ switch kind := embedKind(v.Type); kind {
+ case embedUnknown:
+ yyerrorl(v.Pos, "go:embed cannot apply to var of type %v", v.Type)
+
+ case embedString, embedBytes:
+ file := files[0]
+ fsym, size, err := fileStringSym(v.Pos, embedCfg.Files[file], kind == embedString, nil)
+ if err != nil {
+ yyerrorl(v.Pos, "embed %s: %v", file, err)
+ }
+ sym := v.Sym.Linksym()
+ off := 0
+ off = dsymptr(sym, off, fsym, 0) // data string
+ off = duintptr(sym, off, uint64(size)) // len
+ if kind == embedBytes {
+ duintptr(sym, off, uint64(size)) // cap for slice
+ }
+
+ case embedFiles:
+ slicedata := Ctxt.Lookup(`"".` + v.Sym.Name + `.files`)
+ off := 0
+ // []files pointed at by Files
+ off = dsymptr(slicedata, off, slicedata, 3*Widthptr) // []file, pointing just past slice
+ off = duintptr(slicedata, off, uint64(len(files)))
+ off = duintptr(slicedata, off, uint64(len(files)))
+
+ // embed/embed.go type file is:
+ // name string
+ // data string
+ // hash [16]byte
+ // Emit one of these per file in the set.
+ const hashSize = 16
+ hash := make([]byte, hashSize)
+ for _, file := range files {
+ off = dsymptr(slicedata, off, stringsym(v.Pos, file), 0) // file string
+ off = duintptr(slicedata, off, uint64(len(file)))
+ if strings.HasSuffix(file, "/") {
+ // entry for directory - no data
+ off = duintptr(slicedata, off, 0)
+ off = duintptr(slicedata, off, 0)
+ off += hashSize
+ } else {
+ fsym, size, err := fileStringSym(v.Pos, embedCfg.Files[file], true, hash)
+ if err != nil {
+ yyerrorl(v.Pos, "embed %s: %v", file, err)
+ }
+ off = dsymptr(slicedata, off, fsym, 0) // data string
+ off = duintptr(slicedata, off, uint64(size))
+ off = int(slicedata.WriteBytes(Ctxt, int64(off), hash))
+ }
+ }
+ ggloblsym(slicedata, int32(off), obj.RODATA|obj.LOCAL)
+ sym := v.Sym.Linksym()
+ dsymptr(sym, 0, slicedata, 0)
+ }
+}
diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index 949755a0e2..4b401f2aa4 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -34,8 +34,6 @@ import (
"strings"
)
-var imported_unsafe bool
-
var (
buildid string
spectre string
@@ -240,6 +238,7 @@ func Main(archInit func(*Arch)) {
flag.BoolVar(&flagDWARF, "dwarf", !Wasm, "generate DWARF symbols")
flag.BoolVar(&Ctxt.Flag_locationlists, "dwarflocationlists", true, "add location lists to DWARF in optimized mode")
flag.IntVar(&genDwarfInline, "gendwarfinl", 2, "generate DWARF inline info records")
+ objabi.Flagfn1("embedcfg", "read go:embed configuration from `file`", readEmbedCfg)
objabi.Flagfn1("importmap", "add `definition` of the form source=actual to import map", addImportMap)
objabi.Flagfn1("importcfg", "read import configuration from `file`", readImportCfg)
flag.StringVar(&flag_installsuffix, "installsuffix", "", "set pkg directory `suffix`")
@@ -597,7 +596,7 @@ func Main(archInit func(*Arch)) {
timings.Start("fe", "typecheck", "top1")
for i := 0; i < len(xtop); i++ {
n := xtop[i]
- if op := n.Op; op != ODCL && op != OAS && op != OAS2 && (op != ODCLTYPE || !n.Left.Name.Param.Alias) {
+ if op := n.Op; op != ODCL && op != OAS && op != OAS2 && (op != ODCLTYPE || !n.Left.Name.Param.Alias()) {
xtop[i] = typecheck(n, ctxStmt)
}
}
@@ -609,7 +608,7 @@ func Main(archInit func(*Arch)) {
timings.Start("fe", "typecheck", "top2")
for i := 0; i < len(xtop); i++ {
n := xtop[i]
- if op := n.Op; op == ODCL || op == OAS || op == OAS2 || op == ODCLTYPE && n.Left.Name.Param.Alias {
+ if op := n.Op; op == ODCL || op == OAS || op == OAS2 || op == ODCLTYPE && n.Left.Name.Param.Alias() {
xtop[i] = typecheck(n, ctxStmt)
}
}
@@ -1177,7 +1176,6 @@ func importfile(f *Val) *types.Pkg {
}
if path_ == "unsafe" {
- imported_unsafe = true
return unsafepkg
}
diff --git a/src/cmd/compile/internal/gc/noder.go b/src/cmd/compile/internal/gc/noder.go
index 85e710086a..67d24ef0bc 100644
--- a/src/cmd/compile/internal/gc/noder.go
+++ b/src/cmd/compile/internal/gc/noder.go
@@ -11,6 +11,7 @@ import (
"runtime"
"strconv"
"strings"
+ "unicode"
"unicode/utf8"
"cmd/compile/internal/syntax"
@@ -90,7 +91,11 @@ func (p *noder) makeSrcPosBase(b0 *syntax.PosBase) *src.PosBase {
} else {
// line directive base
p0 := b0.Pos()
- p1 := src.MakePos(p.makeSrcPosBase(p0.Base()), p0.Line(), p0.Col())
+ p0b := p0.Base()
+ if p0b == b0 {
+ panic("infinite recursion in makeSrcPosBase")
+ }
+ p1 := src.MakePos(p.makeSrcPosBase(p0b), p0.Line(), p0.Col())
b1 = src.NewLinePragmaBase(p1, fn, fileh(fn), b0.Line(), b0.Col())
}
p.basemap[b0] = b1
@@ -130,11 +135,13 @@ type noder struct {
base *src.PosBase
}
- file *syntax.File
- linknames []linkname
- pragcgobuf [][]string
- err chan syntax.Error
- scope ScopeID
+ file *syntax.File
+ linknames []linkname
+ pragcgobuf [][]string
+ err chan syntax.Error
+ scope ScopeID
+ importedUnsafe bool
+ importedEmbed bool
// scopeVars is a stack tracking the number of variables declared in the
// current function at the moment each open scope was opened.
@@ -236,7 +243,8 @@ type linkname struct {
func (p *noder) node() {
types.Block = 1
- imported_unsafe = false
+ p.importedUnsafe = false
+ p.importedEmbed = false
p.setlineno(p.file.PkgName)
mkpackage(p.file.PkgName.Value)
@@ -249,7 +257,7 @@ func (p *noder) node() {
xtop = append(xtop, p.decls(p.file.DeclList)...)
for _, n := range p.linknames {
- if !imported_unsafe {
+ if !p.importedUnsafe {
p.yyerrorpos(n.pos, "//go:linkname only allowed in Go files that import \"unsafe\"")
continue
}
@@ -324,7 +332,6 @@ func (p *noder) importDecl(imp *syntax.ImportDecl) {
val := p.basicLit(imp.Path)
ipkg := importfile(&val)
-
if ipkg == nil {
if nerrors == 0 {
Fatalf("phase error in import")
@@ -332,6 +339,13 @@ func (p *noder) importDecl(imp *syntax.ImportDecl) {
return
}
+ if ipkg == unsafepkg {
+ p.importedUnsafe = true
+ }
+ if ipkg.Path == "embed" {
+ p.importedEmbed = true
+ }
+
ipkg.Direct = true
var my *types.Sym
@@ -373,6 +387,20 @@ func (p *noder) varDecl(decl *syntax.VarDecl) []*Node {
}
if pragma, ok := decl.Pragma.(*Pragma); ok {
+ if len(pragma.Embeds) > 0 {
+ if !p.importedEmbed {
+ // This check can't be done when building the list pragma.Embeds
+ // because that list is created before the noder starts walking over the file,
+ // so at that point it hasn't seen the imports.
+ // We're left to check now, just before applying the //go:embed lines.
+ for _, e := range pragma.Embeds {
+ p.yyerrorpos(e.Pos, "//go:embed only allowed in Go files that import \"embed\"")
+ }
+ } else {
+ exprs = varEmbed(p, names, typ, exprs, pragma.Embeds)
+ }
+ pragma.Embeds = nil
+ }
p.checkUnused(pragma)
}
@@ -455,17 +483,17 @@ func (p *noder) typeDecl(decl *syntax.TypeDecl) *Node {
param := n.Name.Param
param.Ntype = typ
- param.Alias = decl.Alias
+ param.SetAlias(decl.Alias)
if pragma, ok := decl.Pragma.(*Pragma); ok {
if !decl.Alias {
- param.Pragma = pragma.Flag & TypePragmas
+ param.SetPragma(pragma.Flag & TypePragmas)
pragma.Flag &^= TypePragmas
}
p.checkUnused(pragma)
}
nod := p.nod(decl, ODCLTYPE, n, nil)
- if param.Alias && !langSupported(1, 9, localpkg) {
+ if param.Alias() && !langSupported(1, 9, localpkg) {
yyerrorl(nod.Pos, "type aliases only supported as of -lang=go1.9")
}
return nod
@@ -1493,13 +1521,15 @@ var allowedStdPragmas = map[string]bool{
"go:cgo_import_dynamic": true,
"go:cgo_ldflag": true,
"go:cgo_dynamic_linker": true,
+ "go:embed": true,
"go:generate": true,
}
// *Pragma is the value stored in a syntax.Pragma during parsing.
type Pragma struct {
- Flag PragmaFlag // collected bits
- Pos []PragmaPos // position of each individual flag
+ Flag PragmaFlag // collected bits
+ Pos []PragmaPos // position of each individual flag
+ Embeds []PragmaEmbed
}
type PragmaPos struct {
@@ -1507,12 +1537,22 @@ type PragmaPos struct {
Pos syntax.Pos
}
+type PragmaEmbed struct {
+ Pos syntax.Pos
+ Patterns []string
+}
+
func (p *noder) checkUnused(pragma *Pragma) {
for _, pos := range pragma.Pos {
if pos.Flag&pragma.Flag != 0 {
p.yyerrorpos(pos.Pos, "misplaced compiler directive")
}
}
+ if len(pragma.Embeds) > 0 {
+ for _, e := range pragma.Embeds {
+ p.yyerrorpos(e.Pos, "misplaced go:embed directive")
+ }
+ }
}
func (p *noder) checkUnusedDuringParse(pragma *Pragma) {
@@ -1521,6 +1561,11 @@ func (p *noder) checkUnusedDuringParse(pragma *Pragma) {
p.error(syntax.Error{Pos: pos.Pos, Msg: "misplaced compiler directive"})
}
}
+ if len(pragma.Embeds) > 0 {
+ for _, e := range pragma.Embeds {
+ p.error(syntax.Error{Pos: e.Pos, Msg: "misplaced go:embed directive"})
+ }
+ }
}
// pragma is called concurrently if files are parsed concurrently.
@@ -1565,6 +1610,17 @@ func (p *noder) pragma(pos syntax.Pos, blankLine bool, text string, old syntax.P
}
p.linknames = append(p.linknames, linkname{pos, f[1], target})
+ case text == "go:embed", strings.HasPrefix(text, "go:embed "):
+ args, err := parseGoEmbed(text[len("go:embed"):])
+ if err != nil {
+ p.error(syntax.Error{Pos: pos, Msg: err.Error()})
+ }
+ if len(args) == 0 {
+ p.error(syntax.Error{Pos: pos, Msg: "usage: //go:embed pattern..."})
+ break
+ }
+ pragma.Embeds = append(pragma.Embeds, PragmaEmbed{pos, args})
+
case strings.HasPrefix(text, "go:cgo_import_dynamic "):
// This is permitted for general use because Solaris
// code relies on it in golang.org/x/sys/unix and others.
@@ -1637,3 +1693,64 @@ func mkname(sym *types.Sym) *Node {
}
return n
}
+
+// parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
+// It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
+// go/build/read.go also processes these strings and contains similar logic.
+func parseGoEmbed(args string) ([]string, error) {
+ var list []string
+ for args = strings.TrimSpace(args); args != ""; args = strings.TrimSpace(args) {
+ var path string
+ Switch:
+ switch args[0] {
+ default:
+ i := len(args)
+ for j, c := range args {
+ if unicode.IsSpace(c) {
+ i = j
+ break
+ }
+ }
+ path = args[:i]
+ args = args[i:]
+
+ case '`':
+ i := strings.Index(args[1:], "`")
+ if i < 0 {
+ return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
+ }
+ path = args[1 : 1+i]
+ args = args[1+i+1:]
+
+ case '"':
+ i := 1
+ for ; i < len(args); i++ {
+ if args[i] == '\\' {
+ i++
+ continue
+ }
+ if args[i] == '"' {
+ q, err := strconv.Unquote(args[:i+1])
+ if err != nil {
+ return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
+ }
+ path = q
+ args = args[i+1:]
+ break Switch
+ }
+ }
+ if i >= len(args) {
+ return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
+ }
+ }
+
+ if args != "" {
+ r, _ := utf8.DecodeRuneInString(args)
+ if !unicode.IsSpace(r) {
+ return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
+ }
+ }
+ list = append(list, path)
+ }
+ return list, nil
+}
diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go
index f6557e2d15..226eb45252 100644
--- a/src/cmd/compile/internal/gc/obj.go
+++ b/src/cmd/compile/internal/gc/obj.go
@@ -14,6 +14,8 @@ import (
"encoding/json"
"fmt"
"io"
+ "io/ioutil"
+ "os"
"sort"
"strconv"
)
@@ -125,6 +127,7 @@ func dumpdata() {
itabsLen := len(itabs)
dumpimportstrings()
dumpbasictypes()
+ dumpembeds()
// Calls to dumpsignats can generate functions,
// like method wrappers and hash and equality routines.
@@ -358,28 +361,31 @@ func dbvec(s *obj.LSym, off int, bv bvec) int {
return off
}
+const (
+ stringSymPrefix = "go.string."
+ stringSymPattern = ".gostring.%d.%x"
+)
+
+// stringsym returns a symbol containing the string s.
+// The symbol contains the string data, not a string header.
func stringsym(pos src.XPos, s string) (data *obj.LSym) {
var symname string
if len(s) > 100 {
// Huge strings are hashed to avoid long names in object files.
// Indulge in some paranoia by writing the length of s, too,
// as protection against length extension attacks.
+ // Same pattern is known to fileStringSym below.
h := sha256.New()
io.WriteString(h, s)
- symname = fmt.Sprintf(".gostring.%d.%x", len(s), h.Sum(nil))
+ symname = fmt.Sprintf(stringSymPattern, len(s), h.Sum(nil))
} else {
// Small strings get named directly by their contents.
symname = strconv.Quote(s)
}
- const prefix = "go.string."
- symdataname := prefix + symname
-
- symdata := Ctxt.Lookup(symdataname)
-
+ symdata := Ctxt.Lookup(stringSymPrefix + symname)
if !symdata.OnList() {
- // string data
- off := dsname(symdata, 0, s, pos, "string")
+ off := dstringdata(symdata, 0, s, pos, "string")
ggloblsym(symdata, int32(off), obj.DUPOK|obj.RODATA|obj.LOCAL)
symdata.Set(obj.AttrContentAddressable, true)
}
@@ -387,26 +393,122 @@ func stringsym(pos src.XPos, s string) (data *obj.LSym) {
return symdata
}
-var slicebytes_gen int
+// fileStringSym returns a symbol for the contents and the size of file.
+// If readonly is true, the symbol shares storage with any literal string
+// or other file with the same content and is placed in a read-only section.
+// If readonly is false, the symbol is a read-write copy separate from any other,
+// for use as the backing store of a []byte.
+// The content hash of file is copied into hash. (If hash is nil, nothing is copied.)
+// The returned symbol contains the data itself, not a string header.
+func fileStringSym(pos src.XPos, file string, readonly bool, hash []byte) (*obj.LSym, int64, error) {
+ f, err := os.Open(file)
+ if err != nil {
+ return nil, 0, err
+ }
+ defer f.Close()
+ info, err := f.Stat()
+ if err != nil {
+ return nil, 0, err
+ }
+ if !info.Mode().IsRegular() {
+ return nil, 0, fmt.Errorf("not a regular file")
+ }
+ size := info.Size()
+ if size <= 1*1024 {
+ data, err := ioutil.ReadAll(f)
+ if err != nil {
+ return nil, 0, err
+ }
+ if int64(len(data)) != size {
+ return nil, 0, fmt.Errorf("file changed between reads")
+ }
+ var sym *obj.LSym
+ if readonly {
+ sym = stringsym(pos, string(data))
+ } else {
+ sym = slicedata(pos, string(data)).Sym.Linksym()
+ }
+ if len(hash) > 0 {
+ sum := sha256.Sum256(data)
+ copy(hash, sum[:])
+ }
+ return sym, size, nil
+ }
+ if size > 2e9 {
+ // ggloblsym takes an int32,
+ // and probably the rest of the toolchain
+ // can't handle such big symbols either.
+ // See golang.org/issue/9862.
+ return nil, 0, fmt.Errorf("file too large")
+ }
-func slicebytes(nam *Node, s string) {
- slicebytes_gen++
- symname := fmt.Sprintf(".gobytes.%d", slicebytes_gen)
+ // File is too big to read and keep in memory.
+ // Compute hash if needed for read-only content hashing or if the caller wants it.
+ var sum []byte
+ if readonly || len(hash) > 0 {
+ h := sha256.New()
+ n, err := io.Copy(h, f)
+ if err != nil {
+ return nil, 0, err
+ }
+ if n != size {
+ return nil, 0, fmt.Errorf("file changed between reads")
+ }
+ sum = h.Sum(nil)
+ copy(hash, sum)
+ }
+
+ var symdata *obj.LSym
+ if readonly {
+ symname := fmt.Sprintf(stringSymPattern, size, sum)
+ symdata = Ctxt.Lookup(stringSymPrefix + symname)
+ if !symdata.OnList() {
+ info := symdata.NewFileInfo()
+ info.Name = file
+ info.Size = size
+ ggloblsym(symdata, int32(size), obj.DUPOK|obj.RODATA|obj.LOCAL)
+ // Note: AttrContentAddressable cannot be set here,
+ // because the content-addressable-handling code
+ // does not know about file symbols.
+ }
+ } else {
+ // Emit a zero-length data symbol
+ // and then fix up length and content to use file.
+ symdata = slicedata(pos, "").Sym.Linksym()
+ symdata.Size = size
+ symdata.Type = objabi.SNOPTRDATA
+ info := symdata.NewFileInfo()
+ info.Name = file
+ info.Size = size
+ }
+
+ return symdata, size, nil
+}
+
+var slicedataGen int
+
+func slicedata(pos src.XPos, s string) *Node {
+ slicedataGen++
+ symname := fmt.Sprintf(".gobytes.%d", slicedataGen)
sym := localpkg.Lookup(symname)
symnode := newname(sym)
sym.Def = asTypesNode(symnode)
lsym := sym.Linksym()
- off := dsname(lsym, 0, s, nam.Pos, "slice")
+ off := dstringdata(lsym, 0, s, pos, "slice")
ggloblsym(lsym, int32(off), obj.NOPTR|obj.LOCAL)
+ return symnode
+}
+
+func slicebytes(nam *Node, s string) {
if nam.Op != ONAME {
Fatalf("slicebytes %v", nam)
}
- slicesym(nam, symnode, int64(len(s)))
+ slicesym(nam, slicedata(nam.Pos, s), int64(len(s)))
}
-func dsname(s *obj.LSym, off int, t string, pos src.XPos, what string) int {
+func dstringdata(s *obj.LSym, off int, t string, pos src.XPos, what string) int {
// Objects that are too large will cause the data section to overflow right away,
// causing a cryptic error message by the linker. Check for oversize objects here
// and provide a useful error message instead.
diff --git a/src/cmd/compile/internal/gc/syntax.go b/src/cmd/compile/internal/gc/syntax.go
index e3b4963977..83b5db834f 100644
--- a/src/cmd/compile/internal/gc/syntax.go
+++ b/src/cmd/compile/internal/gc/syntax.go
@@ -480,11 +480,87 @@ type Param struct {
Innermost *Node
Outer *Node
- // OTYPE
- //
- // TODO: Should Func pragmas also be stored on the Name?
- Pragma PragmaFlag
- Alias bool // node is alias for Ntype (only used when type-checking ODCLTYPE)
+ // OTYPE & ONAME //go:embed info,
+ // sharing storage to reduce gc.Param size.
+ // Extra is nil, or else *Extra is a *paramType or an *embedFileList.
+ Extra *interface{}
+}
+
+type paramType struct {
+ flag PragmaFlag
+ alias bool
+}
+
+type embedFileList []string
+
+// Pragma returns the PragmaFlag for p, which must be for an OTYPE.
+func (p *Param) Pragma() PragmaFlag {
+ if p.Extra == nil {
+ return 0
+ }
+ return (*p.Extra).(*paramType).flag
+}
+
+// SetPragma sets the PragmaFlag for p, which must be for an OTYPE.
+func (p *Param) SetPragma(flag PragmaFlag) {
+ if p.Extra == nil {
+ if flag == 0 {
+ return
+ }
+ p.Extra = new(interface{})
+ *p.Extra = ¶mType{flag: flag}
+ return
+ }
+ (*p.Extra).(*paramType).flag = flag
+}
+
+// Alias reports whether p, which must be for an OTYPE, is a type alias.
+func (p *Param) Alias() bool {
+ if p.Extra == nil {
+ return false
+ }
+ t, ok := (*p.Extra).(*paramType)
+ if !ok {
+ return false
+ }
+ return t.alias
+}
+
+// SetAlias sets whether p, which must be for an OTYPE, is a type alias.
+func (p *Param) SetAlias(alias bool) {
+ if p.Extra == nil {
+ if !alias {
+ return
+ }
+ p.Extra = new(interface{})
+ *p.Extra = ¶mType{alias: alias}
+ return
+ }
+ (*p.Extra).(*paramType).alias = alias
+}
+
+// EmbedFiles returns the list of embedded files for p,
+// which must be for an ONAME var.
+func (p *Param) EmbedFiles() []string {
+ if p.Extra == nil {
+ return nil
+ }
+ return *(*p.Extra).(*embedFileList)
+}
+
+// SetEmbedFiles sets the list of embedded files for p,
+// which must be for an ONAME var.
+func (p *Param) SetEmbedFiles(list []string) {
+ if p.Extra == nil {
+ if len(list) == 0 {
+ return
+ }
+ f := embedFileList(list)
+ p.Extra = new(interface{})
+ *p.Extra = &f
+ return
+ }
+ *(*p.Extra).(*embedFileList) = list
}
// Functions
diff --git a/src/cmd/compile/internal/gc/typecheck.go b/src/cmd/compile/internal/gc/typecheck.go
index a4b462da1d..ce817db446 100644
--- a/src/cmd/compile/internal/gc/typecheck.go
+++ b/src/cmd/compile/internal/gc/typecheck.go
@@ -257,12 +257,12 @@ func typecheck(n *Node, top int) (res *Node) {
// are substituted.
cycle := cycleFor(n)
for _, n1 := range cycle {
- if n1.Name != nil && !n1.Name.Param.Alias {
+ if n1.Name != nil && !n1.Name.Param.Alias() {
// Cycle is ok. But if n is an alias type and doesn't
// have a type yet, we have a recursive type declaration
// with aliases that we can't handle properly yet.
// Report an error rather than crashing later.
- if n.Name != nil && n.Name.Param.Alias && n.Type == nil {
+ if n.Name != nil && n.Name.Param.Alias() && n.Type == nil {
lineno = n.Pos
Fatalf("cannot handle alias type declaration (issue #25838): %v", n)
}
@@ -3504,7 +3504,7 @@ func setUnderlying(t, underlying *types.Type) {
}
// Propagate go:notinheap pragma from the Name to the Type.
- if n.Name != nil && n.Name.Param != nil && n.Name.Param.Pragma&NotInHeap != 0 {
+ if n.Name != nil && n.Name.Param != nil && n.Name.Param.Pragma()&NotInHeap != 0 {
t.SetNotInHeap(true)
}
@@ -3676,7 +3676,7 @@ func typecheckdef(n *Node) {
n.Name.Defn = typecheck(n.Name.Defn, ctxStmt) // fills in n.Type
case OTYPE:
- if p := n.Name.Param; p.Alias {
+ if p := n.Name.Param; p.Alias() {
// Type alias declaration: Simply use the rhs type - no need
// to create a new type.
// If we have a syntax error, p.Ntype may be nil.
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go
index ad4708138f..2037beca72 100644
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -401,7 +401,7 @@ type LSym struct {
P []byte
R []Reloc
- Extra *interface{} // *FuncInfo if present
+ Extra *interface{} // *FuncInfo or *FileInfo, if present
Pkg string
PkgIdx int32
@@ -454,6 +454,33 @@ func (s *LSym) Func() *FuncInfo {
return f
}
+// A FileInfo contains extra fields for SDATA symbols backed by files.
+// (If LSym.Extra is a *FileInfo, LSym.P == nil.)
+type FileInfo struct {
+ Name string // name of file to read into object file
+ Size int64 // length of file
+}
+
+// NewFileInfo allocates and returns a FileInfo for LSym.
+func (s *LSym) NewFileInfo() *FileInfo {
+ if s.Extra != nil {
+ log.Fatalf("invalid use of LSym - NewFileInfo with Extra of type %T", *s.Extra)
+ }
+ f := new(FileInfo)
+ s.Extra = new(interface{})
+ *s.Extra = f
+ return f
+}
+
+// File returns the *FileInfo associated with s, or else nil.
+func (s *LSym) File() *FileInfo {
+ if s.Extra == nil {
+ return nil
+ }
+ f, _ := (*s.Extra).(*FileInfo)
+ return f
+}
+
type InlMark struct {
// When unwinding from an instruction in an inlined body, mark
// where we should unwind to.
diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go
index a24a7b878f..bb58b4f0c2 100644
--- a/src/cmd/internal/obj/objfile.go
+++ b/src/cmd/internal/obj/objfile.go
@@ -16,6 +16,8 @@ import (
"encoding/binary"
"fmt"
"io"
+ "log"
+ "os"
"path/filepath"
"sort"
"strings"
@@ -147,14 +149,20 @@ func WriteObjFile(ctxt *Link, b *bio.Writer) {
// Data indexes
h.Offsets[goobj.BlkDataIdx] = w.Offset()
- dataOff := uint32(0)
+ dataOff := int64(0)
for _, list := range lists {
for _, s := range list {
- w.Uint32(dataOff)
- dataOff += uint32(len(s.P))
+ w.Uint32(uint32(dataOff))
+ dataOff += int64(len(s.P))
+ if file := s.File(); file != nil {
+ dataOff += int64(file.Size)
+ }
}
}
- w.Uint32(dataOff)
+ if int64(uint32(dataOff)) != dataOff {
+ log.Fatalf("data too large")
+ }
+ w.Uint32(uint32(dataOff))
// Relocs
h.Offsets[goobj.BlkReloc] = w.Offset()
@@ -179,6 +187,9 @@ func WriteObjFile(ctxt *Link, b *bio.Writer) {
for _, list := range lists {
for _, s := range list {
w.Bytes(s.P)
+ if file := s.File(); file != nil {
+ w.writeFile(ctxt, file)
+ }
}
}
@@ -218,6 +229,7 @@ func WriteObjFile(ctxt *Link, b *bio.Writer) {
type writer struct {
*goobj.Writer
+ filebuf []byte
ctxt *Link
pkgpath string // the package import path (escaped), "" if unknown
pkglist []string // list of packages referenced, indexed by ctxt.pkgIdx
@@ -232,6 +244,35 @@ func (w *writer) init() {
}
}
+func (w *writer) writeFile(ctxt *Link, file *FileInfo) {
+ f, err := os.Open(file.Name)
+ if err != nil {
+ ctxt.Diag("%v", err)
+ return
+ }
+ defer f.Close()
+ if w.filebuf == nil {
+ w.filebuf = make([]byte, 1024)
+ }
+ buf := w.filebuf
+ written := int64(0)
+ for {
+ n, err := f.Read(buf)
+ w.Bytes(buf[:n])
+ written += int64(n)
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ ctxt.Diag("%v", err)
+ return
+ }
+ }
+ if written != file.Size {
+ ctxt.Diag("copy %s: unexpected length %d != %d", file.Name, written, file.Size)
+ }
+}
+
func (w *writer) StringTable() {
w.AddString("")
for _, p := range w.ctxt.Imports {
--
GitLab
From 7e01b3b3879593828b89f4ff4a04667a547b22d9 Mon Sep 17 00:00:00 2001
From: Nigel Tao
Date: Fri, 23 Oct 2020 10:41:50 +1100
Subject: [PATCH 010/253] strconv: add eiselLemire32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This does for ParseFloat(etc, 32) what commit a2eb53c571 did for
ParseFloat(etc, 64).
name old time/op new time/op delta
Atof32Decimal-4 48.3ns ± 4% 48.8ns ± 2% ~ (p=0.548 n=5+5)
Atof32Float-4 56.2ns ± 5% 54.7ns ± 3% ~ (p=0.246 n=5+5)
Atof32FloatExp-4 104ns ± 0% 76ns ± 2% -27.19% (p=0.008 n=5+5)
Atof32Random-4 142ns ± 2% 109ns ± 1% -23.07% (p=0.008 n=5+5)
Change-Id: I6ee5a2f2d791d4fe3028f1d40aca96400120fda0
Reviewed-on: https://go-review.googlesource.com/c/go/+/264517
Trust: Nigel Tao
Trust: Robert Griesemer
Reviewed-by: Robert Griesemer
---
src/strconv/atof.go | 4 +-
src/strconv/eisel_lemire.go | 86 ++++++++++++++++++++++++++++++++++---
2 files changed, 84 insertions(+), 6 deletions(-)
diff --git a/src/strconv/atof.go b/src/strconv/atof.go
index c0b9c1f1e0..e61eeab1c3 100644
--- a/src/strconv/atof.go
+++ b/src/strconv/atof.go
@@ -581,6 +581,8 @@ func atof32(s string) (f float32, n int, err error) {
if !trunc {
if f, ok := atof32exact(mantissa, exp, neg); ok {
return f, n, nil
+ } else if f, ok = eiselLemire32(mantissa, exp, neg); ok {
+ return f, n, nil
}
}
// Try another fast path.
@@ -629,7 +631,7 @@ func atof64(s string) (f float64, n int, err error) {
if !trunc {
if f, ok := atof64exact(mantissa, exp, neg); ok {
return f, n, nil
- } else if f, ok = eiselLemire(mantissa, exp, neg); ok {
+ } else if f, ok = eiselLemire64(mantissa, exp, neg); ok {
return f, n, nil
}
}
diff --git a/src/strconv/eisel_lemire.go b/src/strconv/eisel_lemire.go
index e548270688..6c7f852eba 100644
--- a/src/strconv/eisel_lemire.go
+++ b/src/strconv/eisel_lemire.go
@@ -15,14 +15,14 @@ package strconv
// https://github.com/google/wuffs/blob/ba3818cb6b473a2ed0b38ecfc07dbbd3a97e8ae7/internal/cgen/base/floatconv-submodule-code.c#L990
//
// Additional testing (on over several million test strings) is done by
-// https://github.com/nigeltao/parse-number-f64-test-data/blob/d085ef805be7f0e8f61066619364b2f529ea75f2/script/test-go-strconv.go
+// https://github.com/nigeltao/parse-number-fxx-test-data/blob/5280dcfccf6d0b02a65ae282dad0b6d9de50e039/script/test-go-strconv.go
import (
"math"
"math/bits"
)
-func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
+func eiselLemire64(man uint64, exp10 int, neg bool) (f float64, ok bool) {
// The terse comments in this function body refer to sections of the
// https://nigeltao.github.io/blog/2020/eisel-lemire.html blog post.
@@ -40,7 +40,8 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
// Normalization.
clz := bits.LeadingZeros64(man)
man <<= clz
- retExp2 := uint64(217706*exp10>>16+1087) - uint64(clz)
+ const float64ExponentBias = 1023
+ retExp2 := uint64(217706*exp10>>16+64+float64ExponentBias) - uint64(clz)
// Multiplication.
xHi, xLo := bits.Mul64(man, detailedPowersOfTen[exp10-detailedPowersOfTenMinExp10][1])
@@ -78,8 +79,8 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
// retExp2 is a uint64. Zero or underflow means that we're in subnormal
// float64 space. 0x7FF or above means that we're in Inf/NaN float64 space.
//
- // The if condition is equivalent to (but has fewer branches than):
- // if retExp2 <= 0 || retExp2 >= 0x7FF {
+ // The if block is equivalent to (but has fewer branches than):
+ // if retExp2 <= 0 || retExp2 >= 0x7FF { etc }
if retExp2-1 >= 0x7FF-1 {
return 0, false
}
@@ -90,6 +91,81 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
return math.Float64frombits(retBits), true
}
+func eiselLemire32(man uint64, exp10 int, neg bool) (f float32, ok bool) {
+ // The terse comments in this function body refer to sections of the
+ // https://nigeltao.github.io/blog/2020/eisel-lemire.html blog post.
+ //
+ // That blog post discusses the float64 flavor (11 exponent bits with a
+ // -1023 bias, 52 mantissa bits) of the algorithm, but the same approach
+ // applies to the float32 flavor (8 exponent bits with a -127 bias, 23
+ // mantissa bits). The computation here happens with 64-bit values (e.g.
+ // man, xHi, retMantissa) before finally converting to a 32-bit float.
+
+ // Exp10 Range.
+ if man == 0 {
+ if neg {
+ f = math.Float32frombits(0x80000000) // Negative zero.
+ }
+ return f, true
+ }
+ if exp10 < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < exp10 {
+ return 0, false
+ }
+
+ // Normalization.
+ clz := bits.LeadingZeros64(man)
+ man <<= clz
+ const float32ExponentBias = 127
+ retExp2 := uint64(217706*exp10>>16+64+float32ExponentBias) - uint64(clz)
+
+ // Multiplication.
+ xHi, xLo := bits.Mul64(man, detailedPowersOfTen[exp10-detailedPowersOfTenMinExp10][1])
+
+ // Wider Approximation.
+ if xHi&0x3F_FFFFFFFF == 0x3F_FFFFFFFF && xLo+man < man {
+ yHi, yLo := bits.Mul64(man, detailedPowersOfTen[exp10-detailedPowersOfTenMinExp10][0])
+ mergedHi, mergedLo := xHi, xLo+yHi
+ if mergedLo < xLo {
+ mergedHi++
+ }
+ if mergedHi&0x3F_FFFFFFFF == 0x3F_FFFFFFFF && mergedLo+1 == 0 && yLo+man < man {
+ return 0, false
+ }
+ xHi, xLo = mergedHi, mergedLo
+ }
+
+ // Shifting to 54 Bits (and for float32, it's shifting to 25 bits).
+ msb := xHi >> 63
+ retMantissa := xHi >> (msb + 38)
+ retExp2 -= 1 ^ msb
+
+ // Half-way Ambiguity.
+ if xLo == 0 && xHi&0x3F_FFFFFFFF == 0 && retMantissa&3 == 1 {
+ return 0, false
+ }
+
+ // From 54 to 53 Bits (and for float32, it's from 25 to 24 bits).
+ retMantissa += retMantissa & 1
+ retMantissa >>= 1
+ if retMantissa>>24 > 0 {
+ retMantissa >>= 1
+ retExp2 += 1
+ }
+ // retExp2 is a uint64. Zero or underflow means that we're in subnormal
+ // float32 space. 0xFF or above means that we're in Inf/NaN float32 space.
+ //
+ // The if block is equivalent to (but has fewer branches than):
+ // if retExp2 <= 0 || retExp2 >= 0xFF { etc }
+ if retExp2-1 >= 0xFF-1 {
+ return 0, false
+ }
+ retBits := retExp2<<23 | retMantissa&0x007FFFFF
+ if neg {
+ retBits |= 0x80000000
+ }
+ return math.Float32frombits(uint32(retBits)), true
+}
+
// detailedPowersOfTen{Min,Max}Exp10 is the power of 10 represented by the
// first and last rows of detailedPowersOfTen. Both bounds are inclusive.
const (
--
GitLab
From ad61343f886cc5ce677e7bd62385144b2ba7b8f5 Mon Sep 17 00:00:00 2001
From: Michael Pratt
Date: Thu, 8 Oct 2020 14:38:39 -0400
Subject: [PATCH 011/253] runtime/internal/atomic: add 32-bit And/Or
These will be used in a following CL to perform larger bit clear and bit
set than And8/Or8.
Change-Id: I60f7b1099e29b69eb64add77564faee862880a8d
Reviewed-on: https://go-review.googlesource.com/c/go/+/260977
Run-TryBot: Michael Pratt
TryBot-Result: Go Bot
Reviewed-by: Cherry Zhang
Trust: Michael Pratt
---
src/runtime/internal/atomic/asm_386.s | 16 +++
src/runtime/internal/atomic/asm_amd64.s | 16 +++
src/runtime/internal/atomic/asm_mips64x.s | 26 ++++
src/runtime/internal/atomic/asm_mipsx.s | 26 ++++
src/runtime/internal/atomic/asm_ppc64x.s | 30 ++++-
src/runtime/internal/atomic/asm_s390x.s | 22 +++-
src/runtime/internal/atomic/atomic_386.go | 6 +
src/runtime/internal/atomic/atomic_amd64.go | 6 +
src/runtime/internal/atomic/atomic_arm.go | 20 +++
src/runtime/internal/atomic/atomic_arm64.go | 6 +
src/runtime/internal/atomic/atomic_arm64.s | 19 +++
src/runtime/internal/atomic/atomic_mips64x.go | 6 +
src/runtime/internal/atomic/atomic_mipsx.go | 6 +
src/runtime/internal/atomic/atomic_ppc64x.go | 6 +
src/runtime/internal/atomic/atomic_riscv64.go | 6 +
src/runtime/internal/atomic/atomic_riscv64.s | 14 +++
src/runtime/internal/atomic/atomic_s390x.go | 6 +
src/runtime/internal/atomic/atomic_test.go | 119 +++++++++++++++++-
src/runtime/internal/atomic/atomic_wasm.go | 12 ++
src/runtime/internal/atomic/bench_test.go | 40 ++++++
20 files changed, 400 insertions(+), 8 deletions(-)
diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s
index 7ebf675ac5..d82faef1f0 100644
--- a/src/runtime/internal/atomic/asm_386.s
+++ b/src/runtime/internal/atomic/asm_386.s
@@ -243,3 +243,19 @@ TEXT ·Store8(SB), NOSPLIT, $0-5
MOVB val+4(FP), AX
XCHGB AX, 0(BX)
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), AX
+ MOVL val+4(FP), BX
+ LOCK
+ ORL BX, (AX)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-8
+ MOVL ptr+0(FP), AX
+ MOVL val+4(FP), BX
+ LOCK
+ ANDL BX, (AX)
+ RET
diff --git a/src/runtime/internal/atomic/asm_amd64.s b/src/runtime/internal/atomic/asm_amd64.s
index 80fb31285d..2cf7c55870 100644
--- a/src/runtime/internal/atomic/asm_amd64.s
+++ b/src/runtime/internal/atomic/asm_amd64.s
@@ -169,3 +169,19 @@ TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9
LOCK
ANDB BX, (AX)
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT runtime∕internal∕atomic·Or(SB), NOSPLIT, $0-12
+ MOVQ ptr+0(FP), AX
+ MOVL val+8(FP), BX
+ LOCK
+ ORL BX, (AX)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT runtime∕internal∕atomic·And(SB), NOSPLIT, $0-12
+ MOVQ ptr+0(FP), AX
+ MOVL val+8(FP), BX
+ LOCK
+ ANDL BX, (AX)
+ RET
diff --git a/src/runtime/internal/atomic/asm_mips64x.s b/src/runtime/internal/atomic/asm_mips64x.s
index 03fb822929..a515683ebb 100644
--- a/src/runtime/internal/atomic/asm_mips64x.s
+++ b/src/runtime/internal/atomic/asm_mips64x.s
@@ -243,3 +243,29 @@ TEXT ·And8(SB), NOSPLIT, $0-9
BEQ R4, -4(PC)
SYNC
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVV ptr+0(FP), R1
+ MOVW val+8(FP), R2
+
+ SYNC
+ LL (R1), R3
+ OR R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVV ptr+0(FP), R1
+ MOVW val+8(FP), R2
+
+ SYNC
+ LL (R1), R3
+ AND R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
diff --git a/src/runtime/internal/atomic/asm_mipsx.s b/src/runtime/internal/atomic/asm_mipsx.s
index 63bb548825..2b2cfabe08 100644
--- a/src/runtime/internal/atomic/asm_mipsx.s
+++ b/src/runtime/internal/atomic/asm_mipsx.s
@@ -172,3 +172,29 @@ try_and8:
BEQ R4, try_and8
SYNC
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-8
+ MOVW ptr+0(FP), R1
+ MOVW val+4(FP), R2
+
+ SYNC
+ LL (R1), R3
+ OR R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-8
+ MOVW ptr+0(FP), R1
+ MOVW val+4(FP), R2
+
+ SYNC
+ LL (R1), R3
+ AND R2, R3
+ SC R3, (R1)
+ BEQ R3, -4(PC)
+ SYNC
+ RET
diff --git a/src/runtime/internal/atomic/asm_ppc64x.s b/src/runtime/internal/atomic/asm_ppc64x.s
index c0237de4d0..bb009ab34d 100644
--- a/src/runtime/internal/atomic/asm_ppc64x.s
+++ b/src/runtime/internal/atomic/asm_ppc64x.s
@@ -222,8 +222,32 @@ TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9
MOVBZ val+8(FP), R4
LWSYNC
again:
- LBAR (R3),R6
- AND R4,R6
- STBCCC R6,(R3)
+ LBAR (R3), R6
+ AND R4, R6
+ STBCCC R6, (R3)
+ BNE again
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT runtime∕internal∕atomic·Or(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LWSYNC
+again:
+ LWAR (R3), R6
+ OR R4, R6
+ STWCCC R6, (R3)
+ BNE again
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT runtime∕internal∕atomic·And(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LWSYNC
+again:
+ LWAR (R3),R6
+ AND R4, R6
+ STWCCC R6, (R3)
BNE again
RET
diff --git a/src/runtime/internal/atomic/asm_s390x.s b/src/runtime/internal/atomic/asm_s390x.s
index 9a19bc0ece..daf1f3cc9f 100644
--- a/src/runtime/internal/atomic/asm_s390x.s
+++ b/src/runtime/internal/atomic/asm_s390x.s
@@ -174,8 +174,8 @@ TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
// func Or8(addr *uint8, v uint8)
TEXT ·Or8(SB), NOSPLIT, $0-9
- MOVD ptr+0(FP), R3
- MOVBZ val+8(FP), R4
+ MOVD ptr+0(FP), R3
+ MOVBZ val+8(FP), R4
// We don't have atomic operations that work on individual bytes so we
// need to align addr down to a word boundary and create a mask
// containing v to OR with the entire word atomically.
@@ -188,8 +188,8 @@ TEXT ·Or8(SB), NOSPLIT, $0-9
// func And8(addr *uint8, v uint8)
TEXT ·And8(SB), NOSPLIT, $0-9
- MOVD ptr+0(FP), R3
- MOVBZ val+8(FP), R4
+ MOVD ptr+0(FP), R3
+ MOVBZ val+8(FP), R4
// We don't have atomic operations that work on individual bytes so we
// need to align addr down to a word boundary and create a mask
// containing v to AND with the entire word atomically.
@@ -200,3 +200,17 @@ TEXT ·And8(SB), NOSPLIT, $0-9
RLL R5, R4, R4 // R4 = rotl(R4, R5)
LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic)
RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LAO R4, R6, 0(R3) // R6 = *R3; *R3 |= R4; (atomic)
+ RET
+
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R3
+ MOVW val+8(FP), R4
+ LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic)
+ RET
diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go
index 06ce6a5356..1bfcb1143d 100644
--- a/src/runtime/internal/atomic/atomic_386.go
+++ b/src/runtime/internal/atomic/atomic_386.go
@@ -69,6 +69,12 @@ func And8(ptr *uint8, val uint8)
//go:noescape
func Or8(ptr *uint8, val uint8)
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape
diff --git a/src/runtime/internal/atomic/atomic_amd64.go b/src/runtime/internal/atomic/atomic_amd64.go
index 1b71a16d94..e36eb83a11 100644
--- a/src/runtime/internal/atomic/atomic_amd64.go
+++ b/src/runtime/internal/atomic/atomic_amd64.go
@@ -77,6 +77,12 @@ func And8(ptr *uint8, val uint8)
//go:noescape
func Or8(ptr *uint8, val uint8)
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
//go:noescape
diff --git a/src/runtime/internal/atomic/atomic_arm.go b/src/runtime/internal/atomic/atomic_arm.go
index 67d529c1cb..546b3d6120 100644
--- a/src/runtime/internal/atomic/atomic_arm.go
+++ b/src/runtime/internal/atomic/atomic_arm.go
@@ -182,6 +182,26 @@ func And8(addr *uint8, v uint8) {
}
}
+//go:nosplit
+func Or(addr *uint32, v uint32) {
+ for {
+ old := *addr
+ if Cas(addr, old, old|v) {
+ return
+ }
+ }
+}
+
+//go:nosplit
+func And(addr *uint32, v uint32) {
+ for {
+ old := *addr
+ if Cas(addr, old, old&v) {
+ return
+ }
+ }
+}
+
//go:nosplit
func armcas(ptr *uint32, old, new uint32) bool
diff --git a/src/runtime/internal/atomic/atomic_arm64.go b/src/runtime/internal/atomic/atomic_arm64.go
index c9b4322fe9..d49bee8936 100644
--- a/src/runtime/internal/atomic/atomic_arm64.go
+++ b/src/runtime/internal/atomic/atomic_arm64.go
@@ -53,6 +53,12 @@ func Or8(ptr *uint8, val uint8)
//go:noescape
func And8(ptr *uint8, val uint8)
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
//go:noescape
func Cas64(ptr *uint64, old, new uint64) bool
diff --git a/src/runtime/internal/atomic/atomic_arm64.s b/src/runtime/internal/atomic/atomic_arm64.s
index 36c7698b18..0cf3c40223 100644
--- a/src/runtime/internal/atomic/atomic_arm64.s
+++ b/src/runtime/internal/atomic/atomic_arm64.s
@@ -164,3 +164,22 @@ TEXT ·Or8(SB), NOSPLIT, $0-9
CBNZ R3, -3(PC)
RET
+// func And(addr *uint32, v uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R0
+ MOVW val+8(FP), R1
+ LDAXRW (R0), R2
+ AND R1, R2
+ STLXRW R2, (R0), R3
+ CBNZ R3, -3(PC)
+ RET
+
+// func Or(addr *uint32, v uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOVD ptr+0(FP), R0
+ MOVW val+8(FP), R1
+ LDAXRW (R0), R2
+ ORR R1, R2
+ STLXRW R2, (R0), R3
+ CBNZ R3, -3(PC)
+ RET
diff --git a/src/runtime/internal/atomic/atomic_mips64x.go b/src/runtime/internal/atomic/atomic_mips64x.go
index fca2242514..b0109d72b0 100644
--- a/src/runtime/internal/atomic/atomic_mips64x.go
+++ b/src/runtime/internal/atomic/atomic_mips64x.go
@@ -55,6 +55,12 @@ func Or8(ptr *uint8, val uint8)
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
//go:noescape
func Cas64(ptr *uint64, old, new uint64) bool
diff --git a/src/runtime/internal/atomic/atomic_mipsx.go b/src/runtime/internal/atomic/atomic_mipsx.go
index be1e6a038b..1336b50121 100644
--- a/src/runtime/internal/atomic/atomic_mipsx.go
+++ b/src/runtime/internal/atomic/atomic_mipsx.go
@@ -141,6 +141,12 @@ func And8(ptr *uint8, val uint8)
//go:noescape
func Or8(ptr *uint8, val uint8)
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
//go:noescape
func Store(ptr *uint32, val uint32)
diff --git a/src/runtime/internal/atomic/atomic_ppc64x.go b/src/runtime/internal/atomic/atomic_ppc64x.go
index e759bb27a2..e4b109f0ec 100644
--- a/src/runtime/internal/atomic/atomic_ppc64x.go
+++ b/src/runtime/internal/atomic/atomic_ppc64x.go
@@ -55,6 +55,12 @@ func Or8(ptr *uint8, val uint8)
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
//go:noescape
func Cas64(ptr *uint64, old, new uint64) bool
diff --git a/src/runtime/internal/atomic/atomic_riscv64.go b/src/runtime/internal/atomic/atomic_riscv64.go
index 617bc1a3eb..8f24d61625 100644
--- a/src/runtime/internal/atomic/atomic_riscv64.go
+++ b/src/runtime/internal/atomic/atomic_riscv64.go
@@ -51,6 +51,12 @@ func Or8(ptr *uint8, val uint8)
//go:noescape
func And8(ptr *uint8, val uint8)
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
//go:noescape
func Cas64(ptr *uint64, old, new uint64) bool
diff --git a/src/runtime/internal/atomic/atomic_riscv64.s b/src/runtime/internal/atomic/atomic_riscv64.s
index db139d690a..74c896cea6 100644
--- a/src/runtime/internal/atomic/atomic_riscv64.s
+++ b/src/runtime/internal/atomic/atomic_riscv64.s
@@ -242,3 +242,17 @@ TEXT ·Or8(SB), NOSPLIT, $0-9
SLL A2, A1
AMOORW A1, (A0), ZERO
RET
+
+// func And(ptr *uint32, val uint32)
+TEXT ·And(SB), NOSPLIT, $0-12
+ MOV ptr+0(FP), A0
+ MOVW val+8(FP), A1
+ AMOANDW A1, (A0), ZERO
+ RET
+
+// func Or(ptr *uint32, val uint32)
+TEXT ·Or(SB), NOSPLIT, $0-12
+ MOV ptr+0(FP), A0
+ MOVW val+8(FP), A1
+ AMOORW A1, (A0), ZERO
+ RET
diff --git a/src/runtime/internal/atomic/atomic_s390x.go b/src/runtime/internal/atomic/atomic_s390x.go
index b649caa39f..a058d60102 100644
--- a/src/runtime/internal/atomic/atomic_s390x.go
+++ b/src/runtime/internal/atomic/atomic_s390x.go
@@ -91,6 +91,12 @@ func Or8(ptr *uint8, val uint8)
// NOTE: Do not add atomicxor8 (XOR is not idempotent).
+//go:noescape
+func And(ptr *uint32, val uint32)
+
+//go:noescape
+func Or(ptr *uint32, val uint32)
+
//go:noescape
func Xadd(ptr *uint32, delta int32) uint32
diff --git a/src/runtime/internal/atomic/atomic_test.go b/src/runtime/internal/atomic/atomic_test.go
index a9f95077c0..c9c2eba248 100644
--- a/src/runtime/internal/atomic/atomic_test.go
+++ b/src/runtime/internal/atomic/atomic_test.go
@@ -150,6 +150,45 @@ func TestAnd8(t *testing.T) {
}
}
+func TestAnd(t *testing.T) {
+ // Basic sanity check.
+ x := uint32(0xffffffff)
+ for i := uint32(0); i < 32; i++ {
+ atomic.And(&x, ^(1 << i))
+ if r := uint32(0xffffffff) << (i + 1); x != r {
+ t.Fatalf("clearing bit %#x: want %#x, got %#x", uint32(1<
Date: Fri, 9 Oct 2020 12:41:50 -0400
Subject: [PATCH 012/253] cmd/compile: intrinsify
runtime/internal/atomic.{And,Or} on AMD64
These are identical to And8 and Or8, just using ANDL/ORL instead of
ANDB/ORB.
Change-Id: I99cf90a8b0b5f211fb23325dddd55821875f0c8f
Reviewed-on: https://go-review.googlesource.com/c/go/+/263140
Run-TryBot: Michael Pratt
TryBot-Result: Go Bot
Trust: Michael Pratt
Reviewed-by: Cherry Zhang
---
src/cmd/compile/internal/amd64/ssa.go | 2 +-
src/cmd/compile/internal/gc/ssa.go | 12 +++++
src/cmd/compile/internal/ssa/gen/AMD64.rules | 6 ++-
src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 2 +
.../compile/internal/ssa/gen/genericOps.go | 2 +
src/cmd/compile/internal/ssa/opGen.go | 48 +++++++++++++++++++
src/cmd/compile/internal/ssa/rewriteAMD64.go | 34 +++++++++++++
7 files changed, 103 insertions(+), 3 deletions(-)
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 4ac877986c..f30a47b903 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -1210,7 +1210,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p = s.Prog(x86.ASETEQ)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg0()
- case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
+ case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock:
s.Prog(x86.ALOCK)
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 65beb84911..10d0d5fb56 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -3541,12 +3541,24 @@ func init() {
return nil
},
sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
+ addF("runtime/internal/atomic", "And",
+ func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
+ return nil
+ },
+ sys.AMD64) // TODO: same arches as And8.
addF("runtime/internal/atomic", "Or8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
return nil
},
sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
+ addF("runtime/internal/atomic", "Or",
+ func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
+ return nil
+ },
+ sys.AMD64) // TODO: same arches as Or8.
alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...)
alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...)
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 8a253035e0..b9b29a489d 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -530,8 +530,10 @@
(AtomicCompareAndSwap64 ptr old new_ mem) => (CMPXCHGQlock ptr old new_ mem)
// Atomic memory updates.
-(AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem)
-(AtomicOr8 ptr val mem) => (ORBlock ptr val mem)
+(AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem)
+(AtomicAnd32 ptr val mem) => (ANDLlock ptr val mem)
+(AtomicOr8 ptr val mem) => (ORBlock ptr val mem)
+(AtomicOr32 ptr val mem) => (ORLlock ptr val mem)
// Write barrier.
(WB ...) => (LoweredWB ...)
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index 2df5016d59..de5372670b 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -902,7 +902,9 @@ func init() {
// Atomic memory updates.
{name: "ANDBlock", argLength: 3, reg: gpstore, asm: "ANDB", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) &= arg1
+ {name: "ANDLlock", argLength: 3, reg: gpstore, asm: "ANDL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) &= arg1
{name: "ORBlock", argLength: 3, reg: gpstore, asm: "ORB", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) |= arg1
+ {name: "ORLlock", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) |= arg1
}
var AMD64blocks = []blockData{
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 12ba9f1fc9..23bd4af2cd 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -565,7 +565,9 @@ var genericOps = []opData{
{name: "AtomicCompareAndSwap64", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory.
{name: "AtomicCompareAndSwapRel32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Lock release, reports whether store happens and new memory.
{name: "AtomicAnd8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory.
+ {name: "AtomicAnd32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory.
{name: "AtomicOr8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory.
+ {name: "AtomicOr32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory.
// Atomic operation variants
// These variants have the same semantics as above atomic operations.
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index e9c63cdddf..00efc8f38d 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1034,7 +1034,9 @@ const (
OpAMD64CMPXCHGLlock
OpAMD64CMPXCHGQlock
OpAMD64ANDBlock
+ OpAMD64ANDLlock
OpAMD64ORBlock
+ OpAMD64ORLlock
OpARMADD
OpARMADDconst
@@ -2854,7 +2856,9 @@ const (
OpAtomicCompareAndSwap64
OpAtomicCompareAndSwapRel32
OpAtomicAnd8
+ OpAtomicAnd32
OpAtomicOr8
+ OpAtomicOr32
OpAtomicAdd32Variant
OpAtomicAdd64Variant
OpClobber
@@ -13575,6 +13579,22 @@ var opcodeTable = [...]opInfo{
},
},
},
+ {
+ name: "ANDLlock",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ symEffect: SymRdWr,
+ asm: x86.AANDL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
{
name: "ORBlock",
auxType: auxSymOff,
@@ -13591,6 +13611,22 @@ var opcodeTable = [...]opInfo{
},
},
},
+ {
+ name: "ORLlock",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ symEffect: SymRdWr,
+ asm: x86.AORL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
{
name: "ADD",
@@ -35520,12 +35556,24 @@ var opcodeTable = [...]opInfo{
hasSideEffects: true,
generic: true,
},
+ {
+ name: "AtomicAnd32",
+ argLen: 3,
+ hasSideEffects: true,
+ generic: true,
+ },
{
name: "AtomicOr8",
argLen: 3,
hasSideEffects: true,
generic: true,
},
+ {
+ name: "AtomicOr32",
+ argLen: 3,
+ hasSideEffects: true,
+ generic: true,
+ },
{
name: "AtomicAdd32Variant",
argLen: 3,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 32ef26f98d..15bb627450 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -572,6 +572,8 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAtomicAdd32(v)
case OpAtomicAdd64:
return rewriteValueAMD64_OpAtomicAdd64(v)
+ case OpAtomicAnd32:
+ return rewriteValueAMD64_OpAtomicAnd32(v)
case OpAtomicAnd8:
return rewriteValueAMD64_OpAtomicAnd8(v)
case OpAtomicCompareAndSwap32:
@@ -590,6 +592,8 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAtomicLoad8(v)
case OpAtomicLoadPtr:
return rewriteValueAMD64_OpAtomicLoadPtr(v)
+ case OpAtomicOr32:
+ return rewriteValueAMD64_OpAtomicOr32(v)
case OpAtomicOr8:
return rewriteValueAMD64_OpAtomicOr8(v)
case OpAtomicStore32:
@@ -28476,6 +28480,21 @@ func rewriteValueAMD64_OpAtomicAdd64(v *Value) bool {
return true
}
}
+func rewriteValueAMD64_OpAtomicAnd32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (AtomicAnd32 ptr val mem)
+ // result: (ANDLlock ptr val mem)
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64ANDLlock)
+ v.AddArg3(ptr, val, mem)
+ return true
+ }
+}
func rewriteValueAMD64_OpAtomicAnd8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
@@ -28607,6 +28626,21 @@ func rewriteValueAMD64_OpAtomicLoadPtr(v *Value) bool {
return true
}
}
+func rewriteValueAMD64_OpAtomicOr32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (AtomicOr32 ptr val mem)
+ // result: (ORLlock ptr val mem)
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64ORLlock)
+ v.AddArg3(ptr, val, mem)
+ return true
+ }
+}
func rewriteValueAMD64_OpAtomicOr8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
--
GitLab
From 4a2cc73f8789e3df43c1c96944c90f55757a23b0 Mon Sep 17 00:00:00 2001
From: Michael Pratt
Date: Thu, 1 Oct 2020 15:21:37 -0400
Subject: [PATCH 013/253] runtime: don't attempt to steal from idle Ps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Work stealing is a scalability bottleneck in the scheduler. Since each P
has a work queue, work stealing must look at every P to determine if
there is any work. The number of Ps scales linearly with GOMAXPROCS
(i.e., the number of Ps _is_ GOMAXPROCS), thus this work scales linearly
with GOMAXPROCS.
Work stealing is a later attempt by a P to find work before it goes
idle. Since the P has no work of its own, extra costs here tend not to
directly affect application-level benchmarks. Where they show up is
extra CPU usage by the process as a whole. These costs get particularly
expensive for applications that transition between blocked and running
frequently.
Long term, we need a more scalable approach in general, but for now we
can make a simple observation: idle Ps ([1]) cannot possibly have
anything in their runq, so we need not bother checking at all.
We track idle Ps via a new global bitmap, updated in pidleput/pidleget.
This is already a slow path (requires sched.lock), so we don't expect
high contention there.
Using a single bitmap avoids the need to touch every P to read p.status.
Currently, the bitmap approach is not significantly better than reading
p.status. However, in a future CL I'd like to apply a similiar
optimization to timers. Once done, findrunnable would not touch most Ps
at all (in mostly idle programs), which will avoid memory latency to
pull those Ps into cache.
When reading this bitmap, we are racing with Ps going in and out of
idle, so there are a few cases to consider:
1. _Prunning -> _Pidle: Running P goes idle after we check the bitmap.
In this case, we will try to steal (and find nothing) so there is no
harm.
2. _Pidle -> _Prunning while spinning: A P that starts running may queue
new work that we miss. This is OK: (a) that P cannot go back to sleep
without completing its work, and (b) more fundamentally, we will recheck
after we drop our P.
3. _Pidle -> _Prunning after spinning: After spinning, we really can
miss work from a newly woken P. (a) above still applies here as well,
but this is also the same delicate dance case described in findrunnable:
if nothing is spinning anymore, the other P will unpark a thread to run
the work it submits.
Benchmark results from WakeupParallel/syscall/pair/race/1ms (see
golang.org/cl/228577):
name old msec new msec delta
Perf-task-clock-8 250 ± 1% 247 ± 4% ~ (p=0.690 n=5+5)
Perf-task-clock-16 258 ± 2% 259 ± 2% ~ (p=0.841 n=5+5)
Perf-task-clock-32 284 ± 2% 270 ± 4% -4.94% (p=0.032 n=5+5)
Perf-task-clock-64 326 ± 3% 303 ± 2% -6.92% (p=0.008 n=5+5)
Perf-task-clock-128 407 ± 2% 363 ± 5% -10.69% (p=0.008 n=5+5)
Perf-task-clock-256 561 ± 1% 481 ± 1% -14.20% (p=0.016 n=4+5)
Perf-task-clock-512 840 ± 5% 683 ± 2% -18.70% (p=0.008 n=5+5)
Perf-task-clock-1024 1.38k ±14% 1.07k ± 2% -21.85% (p=0.008 n=5+5)
[1] "Idle Ps" here refers to _Pidle Ps in the sched.pidle list. In other
contexts, Ps may temporarily transition through _Pidle (e.g., in
handoffp); those Ps may have work.
Updates #28808
Updates #18237
Change-Id: Ieeb958bd72e7d8fb375b0b1f414e8d7378b14e29
Reviewed-on: https://go-review.googlesource.com/c/go/+/259578
Run-TryBot: Michael Pratt
TryBot-Result: Go Bot
Reviewed-by: Michael Knyszek
Reviewed-by: Austin Clements
Trust: Michael Pratt
---
src/runtime/proc.go | 78 +++++++++++++++++++++++++++++++++++++----
src/runtime/runtime2.go | 12 +++++--
2 files changed, 82 insertions(+), 8 deletions(-)
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index e1de70a997..d088b969c8 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -2295,8 +2295,12 @@ top:
if _p_ == p2 {
continue
}
- if gp := runqsteal(_p_, p2, stealRunNextG); gp != nil {
- return gp, false
+
+ // Don't bother to attempt to steal if p2 is idle.
+ if !idlepMask.read(enum.position()) {
+ if gp := runqsteal(_p_, p2, stealRunNextG); gp != nil {
+ return gp, false
+ }
}
// Consider stealing timers from p2.
@@ -2307,8 +2311,13 @@ top:
// and is not marked for preemption. If p2 is running
// and not being preempted we assume it will handle its
// own timers.
+ //
// If we're still looking for work after checking all
// the P's, then go ahead and steal from an active P.
+ //
+ // TODO(prattmic): Maintain a global look-aside similar
+ // to idlepMask to avoid looking at p2 if it can't
+ // possibly have timers.
if i > 2 || (i > 1 && shouldStealTimers(p2)) {
tnow, w, ran := checkTimers(p2, now)
now = tnow
@@ -2379,6 +2388,9 @@ stop:
// safe-points. We don't need to snapshot the contents because
// everything up to cap(allp) is immutable.
allpSnapshot := allp
+ // Also snapshot idlepMask. Value changes are OK, but we can't allow
+ // len to change out from under us.
+ idlepMaskSnapshot := idlepMask
// return P and block
lock(&sched.lock)
@@ -2419,8 +2431,8 @@ stop:
}
// check all runqueues once again
- for _, _p_ := range allpSnapshot {
- if !runqempty(_p_) {
+ for id, _p_ := range allpSnapshot {
+ if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(_p_) {
lock(&sched.lock)
_p_ = pidleget()
unlock(&sched.lock)
@@ -4398,6 +4410,8 @@ func procresize(nprocs int32) *p {
}
sched.procresizetime = now
+ maskWords := (nprocs+31) / 32
+
// Grow allp if necessary.
if nprocs > int32(len(allp)) {
// Synchronize with retake, which could be running
@@ -4412,6 +4426,15 @@ func procresize(nprocs int32) *p {
copy(nallp, allp[:cap(allp)])
allp = nallp
}
+
+ if maskWords <= int32(cap(idlepMask)) {
+ idlepMask = idlepMask[:maskWords]
+ } else {
+ nidlepMask := make([]uint32, maskWords)
+ // No need to copy beyond len, old Ps are irrelevant.
+ copy(nidlepMask, idlepMask)
+ idlepMask = nidlepMask
+ }
unlock(&allpLock)
}
@@ -4470,6 +4493,7 @@ func procresize(nprocs int32) *p {
if int32(len(allp)) != nprocs {
lock(&allpLock)
allp = allp[:nprocs]
+ idlepMask = idlepMask[:maskWords]
unlock(&allpLock)
}
@@ -5153,8 +5177,46 @@ func globrunqget(_p_ *p, max int32) *g {
return gp
}
-// Put p to on _Pidle list.
+// pIdleMask is a bitmap of of Ps in the _Pidle list, one bit per P.
+type pIdleMask []uint32
+
+// read returns true if P id is in the _Pidle list, and thus cannot have work.
+func (p pIdleMask) read(id uint32) bool {
+ word := id / 32
+ mask := uint32(1) << (id % 32)
+ return (atomic.Load(&p[word]) & mask) != 0
+}
+
+// set sets P id as idle in mask.
+//
+// Must be called only for a P owned by the caller. In order to maintain
+// consistency, a P going idle must the idle mask simultaneously with updates
+// to the idle P list under the sched.lock, otherwise a racing pidleget may
+// clear the mask before pidleput sets the mask, corrupting the bitmap.
+//
+// N.B., procresize takes ownership of all Ps in stopTheWorldWithSema.
+func (p pIdleMask) set(id int32) {
+ word := id / 32
+ mask := uint32(1) << (id % 32)
+ atomic.Or(&p[word], mask)
+}
+
+// clear sets P id as non-idle in mask.
+//
+// See comment on set.
+func (p pIdleMask) clear(id int32) {
+ word := id / 32
+ mask := uint32(1) << (id % 32)
+ atomic.And(&p[word], ^mask)
+}
+
+// pidleput puts p to on the _Pidle list.
+//
+// This releases ownership of p. Once sched.lock is released it is no longer
+// safe to use p.
+//
// sched.lock must be held.
+//
// May run during STW, so write barriers are not allowed.
//go:nowritebarrierrec
func pidleput(_p_ *p) {
@@ -5163,13 +5225,16 @@ func pidleput(_p_ *p) {
if !runqempty(_p_) {
throw("pidleput: P has non-empty run queue")
}
+ idlepMask.set(_p_.id)
_p_.link = sched.pidle
sched.pidle.set(_p_)
atomic.Xadd(&sched.npidle, 1) // TODO: fast atomic
}
-// Try get a p from _Pidle list.
+// pidleget tries to get a p from the _Pidle list, acquiring ownership.
+//
// sched.lock must be held.
+//
// May run during STW, so write barriers are not allowed.
//go:nowritebarrierrec
func pidleget() *p {
@@ -5177,6 +5242,7 @@ func pidleget() *p {
_p_ := sched.pidle.ptr()
if _p_ != nil {
+ idlepMask.clear(_p_.id)
sched.pidle = _p_.link
atomic.Xadd(&sched.npidle, -1) // TODO: fast atomic
}
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 519872b8e2..0758a35e01 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -1035,14 +1035,22 @@ func (w waitReason) String() string {
var (
allglen uintptr
allm *m
- allp []*p // len(allp) == gomaxprocs; may change at safe points, otherwise immutable
- allpLock mutex // Protects P-less reads of allp and all writes
gomaxprocs int32
ncpu int32
forcegc forcegcstate
sched schedt
newprocs int32
+ // allpLock protects P-less reads and size changes of allp and
+ // idlepMask, and all writes to allp.
+ allpLock mutex
+ // len(allp) == gomaxprocs; may change at safe points, otherwise
+ // immutable.
+ allp []*p
+ // Bitmask of Ps in _Pidle list, one bit per P. Reads and writes must
+ // be atomic. Length may change at safe points.
+ idlepMask pIdleMask
+
// Information about what cpu features are available.
// Packages outside the runtime should not use these
// as they are not an external api.
--
GitLab
From b5ddc42b465dd5b9532ee336d98343d81a6d35b2 Mon Sep 17 00:00:00 2001
From: Russ Cox
Date: Thu, 22 Oct 2020 12:11:29 -0400
Subject: [PATCH 014/253] io/fs, path, path/filepath, testing/fstest: validate
patterns in Match, Glob
According to #28614, proposal review agreed in December 2018 that
Match should return an error for failed matches where the unmatched
part of the pattern has a syntax error. (The failed match has to date
caused the scan of the pattern to stop early.)
This change implements that behavior: the match loop continues
scanning to the end of the pattern, even after a confirmed mismatch,
to check whether the pattern is even well-formed.
The change applies to both path.Match and filepath.Match.
Then filepath.Glob and fs.Glob make a single validity-checking
call to Match before beginning their usual processing.
Also update fstest.TestFS to check for correct validation in custom
Glob implementations.
Fixes #28614.
Change-Id: Ic1d35a4bb9c3565184ae83dbefc425c5c96318e7
Reviewed-on: https://go-review.googlesource.com/c/go/+/264397
Trust: Russ Cox
Reviewed-by: Rob Pike
---
src/io/fs/glob.go | 4 +++
src/io/fs/glob_test.go | 10 ++++--
src/path/filepath/match.go | 61 ++++++++++++++++++++-------------
src/path/filepath/match_test.go | 12 ++++---
src/path/match.go | 60 ++++++++++++++++++++++----------
src/path/match_test.go | 4 ++-
src/testing/fstest/testfs.go | 6 ++++
7 files changed, 106 insertions(+), 51 deletions(-)
diff --git a/src/io/fs/glob.go b/src/io/fs/glob.go
index 77f6ebbaaf..cde6c49f3d 100644
--- a/src/io/fs/glob.go
+++ b/src/io/fs/glob.go
@@ -36,6 +36,10 @@ func Glob(fsys FS, pattern string) (matches []string, err error) {
return fsys.Glob(pattern)
}
+ // Check pattern is well-formed.
+ if _, err := path.Match(pattern, ""); err != nil {
+ return nil, err
+ }
if !hasMeta(pattern) {
if _, err = Stat(fsys, pattern); err != nil {
return nil, nil
diff --git a/src/io/fs/glob_test.go b/src/io/fs/glob_test.go
index 0183a49b6c..5c8ac3fbf3 100644
--- a/src/io/fs/glob_test.go
+++ b/src/io/fs/glob_test.go
@@ -7,6 +7,7 @@ package fs_test
import (
. "io/fs"
"os"
+ "path"
"testing"
)
@@ -44,9 +45,12 @@ func TestGlob(t *testing.T) {
}
func TestGlobError(t *testing.T) {
- _, err := Glob(os.DirFS("."), "[]")
- if err == nil {
- t.Error("expected error for bad pattern; got none")
+ bad := []string{`[]`, `nonexist/[]`}
+ for _, pattern := range bad {
+ _, err := Glob(os.DirFS("."), pattern)
+ if err != path.ErrBadPattern {
+ t.Errorf("Glob(fs, %#q) returned err=%v, want path.ErrBadPattern", pattern, err)
+ }
}
}
diff --git a/src/path/filepath/match.go b/src/path/filepath/match.go
index 20a334805b..c77a26952a 100644
--- a/src/path/filepath/match.go
+++ b/src/path/filepath/match.go
@@ -122,25 +122,28 @@ Scan:
// If so, it returns the remainder of s (after the match).
// Chunk is all single-character operators: literals, char classes, and ?.
func matchChunk(chunk, s string) (rest string, ok bool, err error) {
+ // failed records whether the match has failed.
+ // After the match fails, the loop continues on processing chunk,
+ // checking that the pattern is well-formed but no longer reading s.
+ failed := false
for len(chunk) > 0 {
- if len(s) == 0 {
- return
+ if !failed && len(s) == 0 {
+ failed = true
}
switch chunk[0] {
case '[':
// character class
- r, n := utf8.DecodeRuneInString(s)
- s = s[n:]
- chunk = chunk[1:]
- // We can't end right after '[', we're expecting at least
- // a closing bracket and possibly a caret.
- if len(chunk) == 0 {
- err = ErrBadPattern
- return
+ var r rune
+ if !failed {
+ var n int
+ r, n = utf8.DecodeRuneInString(s)
+ s = s[n:]
}
+ chunk = chunk[1:]
// possibly negated
- negated := chunk[0] == '^'
- if negated {
+ negated := false
+ if len(chunk) > 0 && chunk[0] == '^' {
+ negated = true
chunk = chunk[1:]
}
// parse all ranges
@@ -153,12 +156,12 @@ func matchChunk(chunk, s string) (rest string, ok bool, err error) {
}
var lo, hi rune
if lo, chunk, err = getEsc(chunk); err != nil {
- return
+ return "", false, err
}
hi = lo
if chunk[0] == '-' {
if hi, chunk, err = getEsc(chunk[1:]); err != nil {
- return
+ return "", false, err
}
}
if lo <= r && r <= hi {
@@ -167,35 +170,41 @@ func matchChunk(chunk, s string) (rest string, ok bool, err error) {
nrange++
}
if match == negated {
- return
+ failed = true
}
case '?':
- if s[0] == Separator {
- return
+ if !failed {
+ if s[0] == Separator {
+ failed = true
+ }
+ _, n := utf8.DecodeRuneInString(s)
+ s = s[n:]
}
- _, n := utf8.DecodeRuneInString(s)
- s = s[n:]
chunk = chunk[1:]
case '\\':
if runtime.GOOS != "windows" {
chunk = chunk[1:]
if len(chunk) == 0 {
- err = ErrBadPattern
- return
+ return "", false, ErrBadPattern
}
}
fallthrough
default:
- if chunk[0] != s[0] {
- return
+ if !failed {
+ if chunk[0] != s[0] {
+ failed = true
+ }
+ s = s[1:]
}
- s = s[1:]
chunk = chunk[1:]
}
}
+ if failed {
+ return "", false, nil
+ }
return s, true, nil
}
@@ -232,6 +241,10 @@ func getEsc(chunk string) (r rune, nchunk string, err error) {
// The only possible returned error is ErrBadPattern, when pattern
// is malformed.
func Glob(pattern string) (matches []string, err error) {
+ // Check pattern is well-formed.
+ if _, err := Match(pattern, ""); err != nil {
+ return nil, err
+ }
if !hasMeta(pattern) {
if _, err = os.Lstat(pattern); err != nil {
return nil, nil
diff --git a/src/path/filepath/match_test.go b/src/path/filepath/match_test.go
index b8657626bc..1c3b567fa3 100644
--- a/src/path/filepath/match_test.go
+++ b/src/path/filepath/match_test.go
@@ -75,8 +75,10 @@ var matchTests = []MatchTest{
{"[", "a", false, ErrBadPattern},
{"[^", "a", false, ErrBadPattern},
{"[^bc", "a", false, ErrBadPattern},
- {"a[", "a", false, nil},
+ {"a[", "a", false, ErrBadPattern},
{"a[", "ab", false, ErrBadPattern},
+ {"a[", "x", false, ErrBadPattern},
+ {"a/b[", "x", false, ErrBadPattern},
{"*x", "xxx", true, nil},
}
@@ -155,9 +157,11 @@ func TestGlob(t *testing.T) {
}
func TestGlobError(t *testing.T) {
- _, err := Glob("[]")
- if err == nil {
- t.Error("expected error for bad pattern; got none")
+ bad := []string{`[]`, `nonexist/[]`}
+ for _, pattern := range bad {
+ if _, err := Glob(pattern); err != ErrBadPattern {
+ t.Errorf("Glob(%#q) returned err=%v, want ErrBadPattern", pattern, err)
+ }
}
}
diff --git a/src/path/match.go b/src/path/match.go
index 837eb8bb8b..918624c60e 100644
--- a/src/path/match.go
+++ b/src/path/match.go
@@ -75,6 +75,14 @@ Pattern:
}
}
}
+ // Before returning false with no error,
+ // check that the remainder of the pattern is syntactically valid.
+ for len(pattern) > 0 {
+ _, chunk, pattern = scanChunk(pattern)
+ if _, _, err := matchChunk(chunk, ""); err != nil {
+ return false, err
+ }
+ }
return false, nil
}
return len(name) == 0, nil
@@ -114,20 +122,28 @@ Scan:
// If so, it returns the remainder of s (after the match).
// Chunk is all single-character operators: literals, char classes, and ?.
func matchChunk(chunk, s string) (rest string, ok bool, err error) {
+ // failed records whether the match has failed.
+ // After the match fails, the loop continues on processing chunk,
+ // checking that the pattern is well-formed but no longer reading s.
+ failed := false
for len(chunk) > 0 {
- if len(s) == 0 {
- return
+ if !failed && len(s) == 0 {
+ failed = true
}
switch chunk[0] {
case '[':
// character class
- r, n := utf8.DecodeRuneInString(s)
- s = s[n:]
+ var r rune
+ if !failed {
+ var n int
+ r, n = utf8.DecodeRuneInString(s)
+ s = s[n:]
+ }
chunk = chunk[1:]
// possibly negated
- notNegated := true
+ negated := false
if len(chunk) > 0 && chunk[0] == '^' {
- notNegated = false
+ negated = true
chunk = chunk[1:]
}
// parse all ranges
@@ -140,12 +156,12 @@ func matchChunk(chunk, s string) (rest string, ok bool, err error) {
}
var lo, hi rune
if lo, chunk, err = getEsc(chunk); err != nil {
- return
+ return "", false, err
}
hi = lo
if chunk[0] == '-' {
if hi, chunk, err = getEsc(chunk[1:]); err != nil {
- return
+ return "", false, err
}
}
if lo <= r && r <= hi {
@@ -153,34 +169,40 @@ func matchChunk(chunk, s string) (rest string, ok bool, err error) {
}
nrange++
}
- if match != notNegated {
- return
+ if match == negated {
+ failed = true
}
case '?':
- if s[0] == '/' {
- return
+ if !failed {
+ if s[0] == '/' {
+ failed = true
+ }
+ _, n := utf8.DecodeRuneInString(s)
+ s = s[n:]
}
- _, n := utf8.DecodeRuneInString(s)
- s = s[n:]
chunk = chunk[1:]
case '\\':
chunk = chunk[1:]
if len(chunk) == 0 {
- err = ErrBadPattern
- return
+ return "", false, ErrBadPattern
}
fallthrough
default:
- if chunk[0] != s[0] {
- return
+ if !failed {
+ if chunk[0] != s[0] {
+ failed = true
+ }
+ s = s[1:]
}
- s = s[1:]
chunk = chunk[1:]
}
}
+ if failed {
+ return "", false, nil
+ }
return s, true, nil
}
diff --git a/src/path/match_test.go b/src/path/match_test.go
index 3e027e1f68..996bd691eb 100644
--- a/src/path/match_test.go
+++ b/src/path/match_test.go
@@ -67,8 +67,10 @@ var matchTests = []MatchTest{
{"[", "a", false, ErrBadPattern},
{"[^", "a", false, ErrBadPattern},
{"[^bc", "a", false, ErrBadPattern},
- {"a[", "a", false, nil},
+ {"a[", "a", false, ErrBadPattern},
{"a[", "ab", false, ErrBadPattern},
+ {"a[", "x", false, ErrBadPattern},
+ {"a/b[", "x", false, ErrBadPattern},
{"*x", "xxx", true, nil},
}
diff --git a/src/testing/fstest/testfs.go b/src/testing/fstest/testfs.go
index 21cd00e5b6..4912a271b2 100644
--- a/src/testing/fstest/testfs.go
+++ b/src/testing/fstest/testfs.go
@@ -282,6 +282,12 @@ func (t *fsTester) checkGlob(dir string, list []fs.DirEntry) {
glob = strings.Join(elem, "/") + "/"
}
+ // Test that malformed patterns are detected.
+ // The error is likely path.ErrBadPattern but need not be.
+ if _, err := t.fsys.(fs.GlobFS).Glob(glob + "nonexist/[]"); err == nil {
+ t.errorf("%s: Glob(%#q): bad pattern not detected", dir, glob+"nonexist/[]")
+ }
+
// Try to find a letter that appears in only some of the final names.
c := rune('a')
for ; c <= 'z'; c++ {
--
GitLab
From cdb19b4dba58c0e3cabde8b728156dfe273707b3 Mon Sep 17 00:00:00 2001
From: Michael Pratt
Date: Fri, 16 Oct 2020 16:34:52 -0400
Subject: [PATCH 015/253] cmd/compile: intrinsify
runtime/internal/atomic.{And,Or} on ARM64
These are identical to And8 and Or8, just using LDAXRW/STLXRW instead of
LDAXRB/STLXRB.
Change-Id: I5308832ae165064550bee4bb245809ab952f4cc8
Reviewed-on: https://go-review.googlesource.com/c/go/+/263148
Run-TryBot: Michael Pratt
TryBot-Result: Go Bot
Trust: Michael Pratt
Reviewed-by: Cherry Zhang
---
src/cmd/compile/internal/arm64/ssa.go | 18 ++++++---
src/cmd/compile/internal/gc/ssa.go | 4 +-
src/cmd/compile/internal/ssa/gen/ARM64.rules | 6 ++-
src/cmd/compile/internal/ssa/gen/ARM64Ops.go | 6 ++-
src/cmd/compile/internal/ssa/opGen.go | 38 ++++++++++++++++++
src/cmd/compile/internal/ssa/rewriteARM64.go | 42 ++++++++++++++++++++
6 files changed, 103 insertions(+), 11 deletions(-)
diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go
index 1d6ea6b9d8..5c695ef84c 100644
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -688,15 +688,23 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p5.To.Reg = out
gc.Patch(p2, p5)
case ssa.OpARM64LoweredAtomicAnd8,
- ssa.OpARM64LoweredAtomicOr8:
- // LDAXRB (Rarg0), Rout
+ ssa.OpARM64LoweredAtomicAnd32,
+ ssa.OpARM64LoweredAtomicOr8,
+ ssa.OpARM64LoweredAtomicOr32:
+ // LDAXRB/LDAXRW (Rarg0), Rout
// AND/OR Rarg1, Rout
- // STLXRB Rout, (Rarg0), Rtmp
+ // STLXRB/STLXRB Rout, (Rarg0), Rtmp
// CBNZ Rtmp, -3(PC)
+ ld := arm64.ALDAXRB
+ st := arm64.ASTLXRB
+ if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 {
+ ld = arm64.ALDAXRW
+ st = arm64.ASTLXRW
+ }
r0 := v.Args[0].Reg()
r1 := v.Args[1].Reg()
out := v.Reg0()
- p := s.Prog(arm64.ALDAXRB)
+ p := s.Prog(ld)
p.From.Type = obj.TYPE_MEM
p.From.Reg = r0
p.To.Type = obj.TYPE_REG
@@ -706,7 +714,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p1.From.Reg = r1
p1.To.Type = obj.TYPE_REG
p1.To.Reg = out
- p2 := s.Prog(arm64.ASTLXRB)
+ p2 := s.Prog(st)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = out
p2.To.Type = obj.TYPE_MEM
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 10d0d5fb56..1561fe2106 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -3546,7 +3546,7 @@ func init() {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64) // TODO: same arches as And8.
+ sys.AMD64, sys.ARM64) // TODO: same arches as And8.
addF("runtime/internal/atomic", "Or8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
@@ -3558,7 +3558,7 @@ func init() {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64) // TODO: same arches as Or8.
+ sys.AMD64, sys.ARM64) // TODO: same arches as Or8.
alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...)
alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...)
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules
index c4a3532632..f2d2fb6cf6 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -548,8 +548,10 @@
(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...)
// Currently the updated value is not used, but we need a register to temporarily hold it.
-(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem))
-(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem))
+(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem))
+(AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem))
+(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem))
+(AtomicOr32 ptr val mem) => (Select1 (LoweredAtomicOr32 ptr val mem))
(AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...)
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
index 9ff53f7e4e..fe9edbf933 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -656,12 +656,14 @@ func init() {
// atomic and/or.
// *arg0 &= (|=) arg1. arg2=mem. returns . auxint must be zero.
- // LDAXRB (Rarg0), Rout
+ // LDAXR (Rarg0), Rout
// AND/OR Rarg1, Rout
- // STLXRB Rout, (Rarg0), Rtmp
+ // STLXR Rout, (Rarg0), Rtmp
// CBNZ Rtmp, -3(PC)
{name: "LoweredAtomicAnd8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ {name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It saves all GP registers if necessary,
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 00efc8f38d..f86210e631 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1588,7 +1588,9 @@ const (
OpARM64LoweredAtomicCas64
OpARM64LoweredAtomicCas32
OpARM64LoweredAtomicAnd8
+ OpARM64LoweredAtomicAnd32
OpARM64LoweredAtomicOr8
+ OpARM64LoweredAtomicOr32
OpARM64LoweredWB
OpARM64LoweredPanicBoundsA
OpARM64LoweredPanicBoundsB
@@ -21096,6 +21098,24 @@ var opcodeTable = [...]opInfo{
},
},
},
+ {
+ name: "LoweredAtomicAnd32",
+ argLen: 3,
+ resultNotInArgs: true,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ unsafePoint: true,
+ asm: arm64.AAND,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
{
name: "LoweredAtomicOr8",
argLen: 3,
@@ -21114,6 +21134,24 @@ var opcodeTable = [...]opInfo{
},
},
},
+ {
+ name: "LoweredAtomicOr32",
+ argLen: 3,
+ resultNotInArgs: true,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ unsafePoint: true,
+ asm: arm64.AORR,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
{
name: "LoweredWB",
auxType: auxSym,
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index 6c48812121..327f1674b5 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -424,6 +424,8 @@ func rewriteValueARM64(v *Value) bool {
case OpAtomicAdd64Variant:
v.Op = OpARM64LoweredAtomicAdd64Variant
return true
+ case OpAtomicAnd32:
+ return rewriteValueARM64_OpAtomicAnd32(v)
case OpAtomicAnd8:
return rewriteValueARM64_OpAtomicAnd8(v)
case OpAtomicCompareAndSwap32:
@@ -450,6 +452,8 @@ func rewriteValueARM64(v *Value) bool {
case OpAtomicLoadPtr:
v.Op = OpARM64LDAR
return true
+ case OpAtomicOr32:
+ return rewriteValueARM64_OpAtomicOr32(v)
case OpAtomicOr8:
return rewriteValueARM64_OpAtomicOr8(v)
case OpAtomicStore32:
@@ -21340,6 +21344,25 @@ func rewriteValueARM64_OpAddr(v *Value) bool {
return true
}
}
+func rewriteValueARM64_OpAtomicAnd32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (AtomicAnd32 ptr val mem)
+ // result: (Select1 (LoweredAtomicAnd32 ptr val mem))
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpSelect1)
+ v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd32, types.NewTuple(typ.UInt32, types.TypeMem))
+ v0.AddArg3(ptr, val, mem)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueARM64_OpAtomicAnd8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
@@ -21359,6 +21382,25 @@ func rewriteValueARM64_OpAtomicAnd8(v *Value) bool {
return true
}
}
+func rewriteValueARM64_OpAtomicOr32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (AtomicOr32 ptr val mem)
+ // result: (Select1 (LoweredAtomicOr32 ptr val mem))
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpSelect1)
+ v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr32, types.NewTuple(typ.UInt32, types.TypeMem))
+ v0.AddArg3(ptr, val, mem)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
--
GitLab
From e223c6cf073fe1228c54050a43042b9c5721d4f8 Mon Sep 17 00:00:00 2001
From: Michael Pratt
Date: Fri, 16 Oct 2020 17:07:14 -0400
Subject: [PATCH 016/253] cmd/compile: intrinsify
runtime/internal/atomic.{And,Or} on PPC64
This is a simple case of changing the operand size of the existing 8-bit
And/Or.
I've also updated a few operand descriptions that were out-of-sync with
the implementation.
Change-Id: I95ac4445d08f7958768aec9a233698a2d652a39a
Reviewed-on: https://go-review.googlesource.com/c/go/+/263150
Run-TryBot: Michael Pratt
TryBot-Result: Go Bot
Trust: Michael Pratt
Reviewed-by: Cherry Zhang
---
src/cmd/compile/internal/gc/ssa.go | 4 +--
src/cmd/compile/internal/ppc64/ssa.go | 22 +++++++++++----
src/cmd/compile/internal/ssa/gen/PPC64.rules | 6 +++--
src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 16 +++++------
src/cmd/compile/internal/ssa/opGen.go | 28 ++++++++++++++++++++
src/cmd/compile/internal/ssa/rewritePPC64.go | 6 +++++
6 files changed, 64 insertions(+), 18 deletions(-)
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 1561fe2106..2b64b358ed 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -3546,7 +3546,7 @@ func init() {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.ARM64) // TODO: same arches as And8.
+ sys.AMD64, sys.ARM64, sys.PPC64) // TODO: same arches as And8.
addF("runtime/internal/atomic", "Or8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
@@ -3558,7 +3558,7 @@ func init() {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.ARM64) // TODO: same arches as Or8.
+ sys.AMD64, sys.ARM64, sys.PPC64) // TODO: same arches as And8.
alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...)
alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...)
diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go
index 1ece4d999f..79f18bfebb 100644
--- a/src/cmd/compile/internal/ppc64/ssa.go
+++ b/src/cmd/compile/internal/ppc64/ssa.go
@@ -166,34 +166,46 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p2.To.Reg = v.Reg1()
case ssa.OpPPC64LoweredAtomicAnd8,
- ssa.OpPPC64LoweredAtomicOr8:
+ ssa.OpPPC64LoweredAtomicAnd32,
+ ssa.OpPPC64LoweredAtomicOr8,
+ ssa.OpPPC64LoweredAtomicOr32:
// LWSYNC
- // LBAR (Rarg0), Rtmp
+ // LBAR/LWAR (Rarg0), Rtmp
// AND/OR Rarg1, Rtmp
- // STBCCC Rtmp, (Rarg0)
+ // STBCCC/STWCCC Rtmp, (Rarg0)
// BNE -3(PC)
+ ld := ppc64.ALBAR
+ st := ppc64.ASTBCCC
+ if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
+ ld = ppc64.ALWAR
+ st = ppc64.ASTWCCC
+ }
r0 := v.Args[0].Reg()
r1 := v.Args[1].Reg()
// LWSYNC - Assuming shared data not write-through-required nor
// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
plwsync := s.Prog(ppc64.ALWSYNC)
plwsync.To.Type = obj.TYPE_NONE
- p := s.Prog(ppc64.ALBAR)
+ // LBAR or LWAR
+ p := s.Prog(ld)
p.From.Type = obj.TYPE_MEM
p.From.Reg = r0
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP
+ // AND/OR reg1,out
p1 := s.Prog(v.Op.Asm())
p1.From.Type = obj.TYPE_REG
p1.From.Reg = r1
p1.To.Type = obj.TYPE_REG
p1.To.Reg = ppc64.REGTMP
- p2 := s.Prog(ppc64.ASTBCCC)
+ // STBCCC or STWCCC
+ p2 := s.Prog(st)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = ppc64.REGTMP
p2.To.Type = obj.TYPE_MEM
p2.To.Reg = r0
p2.RegTo2 = ppc64.REGTMP
+ // BNE retry
p3 := s.Prog(ppc64.ABNE)
p3.To.Type = obj.TYPE_BRANCH
gc.Patch(p3, p)
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules
index 11b1a318fe..6175b42b89 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
@@ -980,8 +980,10 @@
(AtomicCompareAndSwap(32|64) ptr old new_ mem) => (LoweredAtomicCas(32|64) [1] ptr old new_ mem)
(AtomicCompareAndSwapRel32 ptr old new_ mem) => (LoweredAtomicCas32 [0] ptr old new_ mem)
-(AtomicAnd8 ...) => (LoweredAtomicAnd8 ...)
-(AtomicOr8 ...) => (LoweredAtomicOr8 ...)
+(AtomicAnd8 ...) => (LoweredAtomicAnd8 ...)
+(AtomicAnd32 ...) => (LoweredAtomicAnd32 ...)
+(AtomicOr8 ...) => (LoweredAtomicOr8 ...)
+(AtomicOr32 ...) => (LoweredAtomicOr32 ...)
(Slicemask x) => (SRADconst (NEG x) [63])
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
index 5885660597..f4a53262f0 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
@@ -602,25 +602,22 @@ func init() {
{name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
// atomic add32, 64
- // SYNC
+ // LWSYNC
// LDAR (Rarg0), Rout
// ADD Rarg1, Rout
// STDCCC Rout, (Rarg0)
// BNE -3(PC)
- // ISYNC
// return new sum
-
{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
// atomic exchange32, 64
- // SYNC
+ // LWSYNC
// LDAR (Rarg0), Rout
// STDCCC Rarg1, (Rarg0)
// BNE -2(PC)
// ISYNC
// return old val
-
{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
@@ -643,15 +640,16 @@ func init() {
{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
- // atomic 8 and/or.
+ // atomic 8/32 and/or.
// *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
- // LBAR (Rarg0), Rtmp
+ // LBAR/LWAT (Rarg0), Rtmp
// AND/OR Rarg1, Rtmp
- // STBCCC Rtmp, (Rarg0), Rtmp
+ // STBCCC/STWCCC Rtmp, (Rarg0), Rtmp
// BNE Rtmp, -3(PC)
-
{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
+ {name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
+ {name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and its arguments R20 and R21,
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index f86210e631..5afb4abf5c 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -2026,7 +2026,9 @@ const (
OpPPC64LoweredAtomicCas64
OpPPC64LoweredAtomicCas32
OpPPC64LoweredAtomicAnd8
+ OpPPC64LoweredAtomicAnd32
OpPPC64LoweredAtomicOr8
+ OpPPC64LoweredAtomicOr32
OpPPC64LoweredWB
OpPPC64LoweredPanicBoundsA
OpPPC64LoweredPanicBoundsB
@@ -27022,6 +27024,19 @@ var opcodeTable = [...]opInfo{
},
},
},
+ {
+ name: "LoweredAtomicAnd32",
+ argLen: 3,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ asm: ppc64.AAND,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
{
name: "LoweredAtomicOr8",
argLen: 3,
@@ -27035,6 +27050,19 @@ var opcodeTable = [...]opInfo{
},
},
},
+ {
+ name: "LoweredAtomicOr32",
+ argLen: 3,
+ faultOnNilArg0: true,
+ hasSideEffects: true,
+ asm: ppc64.AOR,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
{
name: "LoweredWB",
auxType: auxSym,
diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go
index a820bc0c4e..84938fe27a 100644
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
@@ -59,6 +59,9 @@ func rewriteValuePPC64(v *Value) bool {
case OpAtomicAdd64:
v.Op = OpPPC64LoweredAtomicAdd64
return true
+ case OpAtomicAnd32:
+ v.Op = OpPPC64LoweredAtomicAnd32
+ return true
case OpAtomicAnd8:
v.Op = OpPPC64LoweredAtomicAnd8
return true
@@ -86,6 +89,9 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpAtomicLoadAcq64(v)
case OpAtomicLoadPtr:
return rewriteValuePPC64_OpAtomicLoadPtr(v)
+ case OpAtomicOr32:
+ v.Op = OpPPC64LoweredAtomicOr32
+ return true
case OpAtomicOr8:
v.Op = OpPPC64LoweredAtomicOr8
return true
--
GitLab
From 646531c52aab82a11216f8c79c0ad0e2382a943f Mon Sep 17 00:00:00 2001
From: Michael Pratt
Date: Fri, 16 Oct 2020 17:29:00 -0400
Subject: [PATCH 017/253] cmd/compile: intrinsify
runtime/internal/atomic.{And,Or} on S390X
This is a simplification of LANfloor/LAOfloor since we have a whole
word.
Change-Id: I791641fb4068cad3f73660ce51699ed4653ae0e6
Reviewed-on: https://go-review.googlesource.com/c/go/+/263151
Run-TryBot: Michael Pratt
TryBot-Result: Go Bot
Trust: Michael Pratt
Reviewed-by: Cherry Zhang
---
src/cmd/compile/internal/gc/ssa.go | 4 +--
src/cmd/compile/internal/s390x/ssa.go | 8 +++++
src/cmd/compile/internal/ssa/gen/S390X.rules | 3 ++
src/cmd/compile/internal/ssa/gen/S390XOps.go | 4 ++-
src/cmd/compile/internal/ssa/opGen.go | 38 +++++++++++++++++---
src/cmd/compile/internal/ssa/rewriteS390X.go | 6 ++++
6 files changed, 55 insertions(+), 8 deletions(-)
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 2b64b358ed..e70e5a969b 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -3546,7 +3546,7 @@ func init() {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.ARM64, sys.PPC64) // TODO: same arches as And8.
+ sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X) // TODO: same arches as And8.
addF("runtime/internal/atomic", "Or8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
@@ -3558,7 +3558,7 @@ func init() {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.ARM64, sys.PPC64) // TODO: same arches as And8.
+ sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X) // TODO: same arches as And8.
alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...)
alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...)
diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go
index 00d253c95a..e23b31f385 100644
--- a/src/cmd/compile/internal/s390x/ssa.go
+++ b/src/cmd/compile/internal/s390x/ssa.go
@@ -761,6 +761,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v)
+ case ssa.OpS390XLAN, ssa.OpS390XLAO:
+ // LA(N|O) Ry, TMP, 0(Rx)
+ op := s.Prog(v.Op.Asm())
+ op.From.Type = obj.TYPE_REG
+ op.From.Reg = v.Args[1].Reg()
+ op.Reg = s390x.REGTMP
+ op.To.Type = obj.TYPE_MEM
+ op.To.Reg = v.Args[0].Reg()
case ssa.OpS390XLANfloor, ssa.OpS390XLAOfloor:
r := v.Args[0].Reg() // clobbered, assumed R1 in comments
diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules
index e564f638d3..2d6f091a4e 100644
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -198,6 +198,9 @@
(RXSBG {s390x.NewRotateParams(59, 60, 3)} (MOVDconst [3<<3]) ptr))
mem)
+(AtomicAnd32 ...) => (LAN ...)
+(AtomicOr32 ...) => (LAO ...)
+
// Lowering extension
// Note: we always extend to 64 bits even though some ops don't need that many result bits.
(SignExt8to(16|32|64) ...) => (MOVBreg ...)
diff --git a/src/cmd/compile/internal/ssa/gen/S390XOps.go b/src/cmd/compile/internal/ssa/gen/S390XOps.go
index 417b33cf91..728cfb5508 100644
--- a/src/cmd/compile/internal/ssa/gen/S390XOps.go
+++ b/src/cmd/compile/internal/ssa/gen/S390XOps.go
@@ -547,8 +547,10 @@ func init() {
// Atomic bitwise operations.
// Note: 'floor' operations round the pointer down to the nearest word boundary
// which reflects how they are used in the runtime.
- {name: "LAOfloor", argLength: 3, reg: gpstorelab, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) |= arg1. arg2 = mem.
+ {name: "LAN", argLength: 3, reg: gpstore, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 &= arg1. arg2 = mem.
{name: "LANfloor", argLength: 3, reg: gpstorelab, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) &= arg1. arg2 = mem.
+ {name: "LAO", argLength: 3, reg: gpstore, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 |= arg1. arg2 = mem.
+ {name: "LAOfloor", argLength: 3, reg: gpstorelab, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) |= arg1. arg2 = mem.
// Compare and swap.
// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory.
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 5afb4abf5c..a4938a4992 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -2374,8 +2374,10 @@ const (
OpS390XLAAG
OpS390XAddTupleFirst32
OpS390XAddTupleFirst64
- OpS390XLAOfloor
+ OpS390XLAN
OpS390XLANfloor
+ OpS390XLAO
+ OpS390XLAOfloor
OpS390XLoweredAtomicCas32
OpS390XLoweredAtomicCas64
OpS390XLoweredAtomicExchange32
@@ -31905,11 +31907,24 @@ var opcodeTable = [...]opInfo{
reg: regInfo{},
},
{
- name: "LAOfloor",
+ name: "LAN",
argLen: 3,
clobberFlags: true,
hasSideEffects: true,
- asm: s390x.ALAO,
+ asm: s390x.ALAN,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4295023614}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 SP SB
+ {1, 56319}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 SP
+ },
+ },
+ },
+ {
+ name: "LANfloor",
+ argLen: 3,
+ clobberFlags: true,
+ hasSideEffects: true,
+ asm: s390x.ALAN,
reg: regInfo{
inputs: []inputInfo{
{0, 2}, // R1
@@ -31919,11 +31934,24 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "LANfloor",
+ name: "LAO",
argLen: 3,
clobberFlags: true,
hasSideEffects: true,
- asm: s390x.ALAN,
+ asm: s390x.ALAO,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4295023614}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 SP SB
+ {1, 56319}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 SP
+ },
+ },
+ },
+ {
+ name: "LAOfloor",
+ argLen: 3,
+ clobberFlags: true,
+ hasSideEffects: true,
+ asm: s390x.ALAO,
reg: regInfo{
inputs: []inputInfo{
{0, 2}, // R1
diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go
index 78a57c2388..35b691c12d 100644
--- a/src/cmd/compile/internal/ssa/rewriteS390X.go
+++ b/src/cmd/compile/internal/ssa/rewriteS390X.go
@@ -49,6 +49,9 @@ func rewriteValueS390X(v *Value) bool {
return rewriteValueS390X_OpAtomicAdd32(v)
case OpAtomicAdd64:
return rewriteValueS390X_OpAtomicAdd64(v)
+ case OpAtomicAnd32:
+ v.Op = OpS390XLAN
+ return true
case OpAtomicAnd8:
return rewriteValueS390X_OpAtomicAnd8(v)
case OpAtomicCompareAndSwap32:
@@ -69,6 +72,9 @@ func rewriteValueS390X(v *Value) bool {
return rewriteValueS390X_OpAtomicLoadAcq32(v)
case OpAtomicLoadPtr:
return rewriteValueS390X_OpAtomicLoadPtr(v)
+ case OpAtomicOr32:
+ v.Op = OpS390XLAO
+ return true
case OpAtomicOr8:
return rewriteValueS390X_OpAtomicOr8(v)
case OpAtomicStore32:
--
GitLab
From 4f597abe77338588011a5b91ffefb0f7e11aa868 Mon Sep 17 00:00:00 2001
From: Meng Zhuo
Date: Thu, 22 Oct 2020 14:03:11 +0800
Subject: [PATCH 018/253] internal/bytealg: improve mips64x equal on large size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
name old time/op new time/op delta
Equal/0 9.94ns ± 4% 9.12ns ± 5% -8.26% (p=0.000 n=10+10)
Equal/1 24.5ns ± 0% 27.2ns ± 1% +11.22% (p=0.000 n=9+10)
Equal/6 28.1ns ± 0% 32.1ns ± 1% +14.20% (p=0.000 n=8+10)
Equal/9 37.1ns ± 0% 37.8ns ± 1% +1.95% (p=0.000 n=8+9)
Equal/15 47.3ns ± 0% 44.3ns ± 0% -6.34% (p=0.000 n=9+10)
Equal/16 42.9ns ± 0% 24.6ns ± 0% -42.66% (p=0.000 n=10+7)
Equal/20 44.3ns ± 0% 57.4ns ± 0% +29.57% (p=0.000 n=9+10)
Equal/32 63.2ns ± 0% 35.8ns ± 0% -43.35% (p=0.000 n=10+10)
Equal/4K 6.49µs ± 0% 0.50µs ± 0% -92.27% (p=0.000 n=10+8)
Equal/4M 6.70ms ± 0% 0.48ms ± 0% -92.78% (p=0.000 n=8+10)
Equal/64M 110ms ± 0% 8ms ± 0% -92.65% (p=0.000 n=9+9)
CompareBytesEqual 36.6ns ± 0% 35.9ns ± 0% -1.83% (p=0.000 n=10+9)
name old speed new speed delta
Equal/1 40.8MB/s ± 0% 36.7MB/s ± 0% -10.16% (p=0.000 n=10+10)
Equal/6 213MB/s ± 0% 187MB/s ± 1% -12.32% (p=0.000 n=10+10)
Equal/9 243MB/s ± 0% 238MB/s ± 1% -1.94% (p=0.000 n=9+10)
Equal/15 317MB/s ± 0% 339MB/s ± 0% +6.86% (p=0.000 n=9+9)
Equal/16 373MB/s ± 0% 651MB/s ± 0% +74.70% (p=0.000 n=8+10)
Equal/20 452MB/s ± 0% 348MB/s ± 0% -22.90% (p=0.000 n=8+10)
Equal/32 506MB/s ± 0% 893MB/s ± 0% +76.53% (p=0.000 n=10+9)
Equal/4K 631MB/s ± 0% 8166MB/s ± 0% +1194.73% (p=0.000 n=10+10)
Equal/4M 626MB/s ± 0% 8673MB/s ± 0% +1284.94% (p=0.000 n=8+10)
Equal/64M 608MB/s ± 0% 8277MB/s ± 0% +1260.83% (p=0.000 n=9+9)
Change-Id: I1cd14ade16390a5097a8d4e9721d5e822fa6218f
Reviewed-on: https://go-review.googlesource.com/c/go/+/199597
Run-TryBot: Meng Zhuo
TryBot-Result: Go Bot
Reviewed-by: Cherry Zhang
Trust: Meng Zhuo
---
src/internal/bytealg/equal_mips64x.s | 72 ++++++++++++++++++++++++++--
1 file changed, 68 insertions(+), 4 deletions(-)
diff --git a/src/internal/bytealg/equal_mips64x.s b/src/internal/bytealg/equal_mips64x.s
index 58dc4303b4..641e3ff06c 100644
--- a/src/internal/bytealg/equal_mips64x.s
+++ b/src/internal/bytealg/equal_mips64x.s
@@ -16,18 +16,82 @@ TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
BEQ R1, R2, eq
MOVV size+16(FP), R3
ADDV R1, R3, R4
-loop:
- BNE R1, R4, test
+
+ // chunk size is 16
+ SGTU $16, R3, R8
+ BEQ R0, R8, chunk_entry
+
+byte_loop:
+ BNE R1, R4, byte_test
MOVV $1, R1
MOVB R1, ret+24(FP)
RET
-test:
+byte_test:
MOVBU (R1), R6
ADDV $1, R1
MOVBU (R2), R7
ADDV $1, R2
- BEQ R6, R7, loop
+ BEQ R6, R7, byte_loop
+ JMP not_eq
+
+chunk_entry:
+ // make sure both a and b are aligned
+ OR R1, R2, R9
+ AND $0x7, R9
+ BNE R0, R9, byte_loop
+ JMP chunk_loop_1
+
+chunk_loop:
+ // chunk size is 16
+ SGTU $16, R3, R8
+ BNE R0, R8, chunk_tail_8
+chunk_loop_1:
+ MOVV (R1), R6
+ MOVV (R2), R7
+ BNE R6, R7, not_eq
+ MOVV 8(R1), R12
+ MOVV 8(R2), R13
+ ADDV $16, R1
+ ADDV $16, R2
+ SUBV $16, R3
+ BEQ R12, R13, chunk_loop
+ JMP not_eq
+
+chunk_tail_8:
+ AND $8, R3, R14
+ BEQ R0, R14, chunk_tail_4
+ MOVV (R1), R6
+ MOVV (R2), R7
+ BNE R6, R7, not_eq
+ ADDV $8, R1
+ ADDV $8, R2
+
+chunk_tail_4:
+ AND $4, R3, R14
+ BEQ R0, R14, chunk_tail_2
+ MOVWU (R1), R6
+ MOVWU (R2), R7
+ BNE R6, R7, not_eq
+ ADDV $4, R1
+ ADDV $4, R2
+
+chunk_tail_2:
+ AND $2, R3, R14
+ BEQ R0, R14, chunk_tail_1
+ MOVHU (R1), R6
+ MOVHU (R2), R7
+ BNE R6, R7, not_eq
+ ADDV $2, R1
+ ADDV $2, R2
+
+chunk_tail_1:
+ AND $1, R3, R14
+ BEQ R0, R14, eq
+ MOVBU (R1), R6
+ MOVBU (R2), R7
+ BEQ R6, R7, eq
+not_eq:
MOVB R0, ret+24(FP)
RET
eq:
--
GitLab
From 7e25bdba5e79d223566745fca2410f725a6dedda Mon Sep 17 00:00:00 2001
From: Meng Zhuo
Date: Fri, 16 Oct 2020 09:19:00 +0800
Subject: [PATCH 019/253] cmd/link: use xcode strip for macho combine dwarf
The GNU strip will shrink text section while xcodetool strip don't.
We have to use xcodetool strip from system explicitly.
Fixes #41967
Change-Id: Ida372869e0ebc9e93f883640b1614836cea3672f
Reviewed-on: https://go-review.googlesource.com/c/go/+/262398
Run-TryBot: Cherry Zhang
TryBot-Result: Go Bot
Reviewed-by: Cherry Zhang
Trust: Meng Zhuo
---
src/cmd/link/internal/ld/lib.go | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go
index a68725bef9..aaf443903c 100644
--- a/src/cmd/link/internal/ld/lib.go
+++ b/src/cmd/link/internal/ld/lib.go
@@ -1614,12 +1614,12 @@ func (ctxt *Link) hostlink() {
if combineDwarf {
dsym := filepath.Join(*flagTmpdir, "go.dwarf")
- if out, err := exec.Command("dsymutil", "-f", *flagOutfile, "-o", dsym).CombinedOutput(); err != nil {
+ if out, err := exec.Command("xcrun", "dsymutil", "-f", *flagOutfile, "-o", dsym).CombinedOutput(); err != nil {
Exitf("%s: running dsymutil failed: %v\n%s", os.Args[0], err, out)
}
// Remove STAB (symbolic debugging) symbols after we are done with them (by dsymutil).
// They contain temporary file paths and make the build not reproducible.
- if out, err := exec.Command("strip", "-S", *flagOutfile).CombinedOutput(); err != nil {
+ if out, err := exec.Command("xcrun", "strip", "-S", *flagOutfile).CombinedOutput(); err != nil {
Exitf("%s: running strip failed: %v\n%s", os.Args[0], err, out)
}
// Skip combining if `dsymutil` didn't generate a file. See #11994.
--
GitLab
From e313fd7448ed0dabf98dc725bee2361e905f208b Mon Sep 17 00:00:00 2001
From: Michael Pratt
Date: Thu, 9 Jul 2020 16:20:48 -0400
Subject: [PATCH 020/253] runtime: drop unused work.ndone field
This field is unused since golang.org/cl/134785 and thus can be
trivially removed.
Change-Id: I1a87f8e78ffdf662440409404f0251c40bc56a4f
Reviewed-on: https://go-review.googlesource.com/c/go/+/241741
Trust: Michael Pratt
Run-TryBot: Michael Pratt
TryBot-Result: Go Bot
Reviewed-by: Michael Knyszek
---
src/runtime/mgc.go | 1 -
1 file changed, 1 deletion(-)
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 0a4d5616a5..65ac654b14 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -983,7 +983,6 @@ var work struct {
nproc uint32
tstart int64
nwait uint32
- ndone uint32
// Number of roots of various root types. Set by gcMarkRootPrepare.
nFlushCacheRoots int
--
GitLab
From e5ad73508e5ab5cadfba25e25d6cc3b025865e29 Mon Sep 17 00:00:00 2001
From: Michael Pratt
Date: Fri, 16 Oct 2020 16:49:56 -0400
Subject: [PATCH 021/253] cmd/compile: intrinsify
runtime/internal/atomic.{And,Or} on MIPS
This one is trivial, as there are already 32-bit AND and OR ops used to
implement the more complex 8-bit versions.
Change-Id: Ic48a53ea291d0067ebeab8e96c82e054daf20ae7
Reviewed-on: https://go-review.googlesource.com/c/go/+/263149
Run-TryBot: Michael Pratt
TryBot-Result: Go Bot
Trust: Michael Pratt
Reviewed-by: Cherry Zhang
---
src/cmd/compile/internal/gc/ssa.go | 4 ++--
src/cmd/compile/internal/ssa/gen/MIPS.rules | 3 +++
src/cmd/compile/internal/ssa/rewriteMIPS.go | 6 ++++++
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index e70e5a969b..979a092ba1 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -3546,7 +3546,7 @@ func init() {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X) // TODO: same arches as And8.
+ sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
addF("runtime/internal/atomic", "Or8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
@@ -3558,7 +3558,7 @@ func init() {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
- sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X) // TODO: same arches as And8.
+ sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...)
alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...)
diff --git a/src/cmd/compile/internal/ssa/gen/MIPS.rules b/src/cmd/compile/internal/ssa/gen/MIPS.rules
index 96feaf9234..246b4ebdbc 100644
--- a/src/cmd/compile/internal/ssa/gen/MIPS.rules
+++ b/src/cmd/compile/internal/ssa/gen/MIPS.rules
@@ -383,6 +383,9 @@
(ANDconst [3]
(XORconst [3] ptr)))))) mem)
+(AtomicAnd32 ...) => (LoweredAtomicAnd ...)
+(AtomicOr32 ...) => (LoweredAtomicOr ...)
+
// checks
(NilCheck ...) => (LoweredNilCheck ...)
diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS.go b/src/cmd/compile/internal/ssa/rewriteMIPS.go
index bdafa9a957..87d1aa378f 100644
--- a/src/cmd/compile/internal/ssa/rewriteMIPS.go
+++ b/src/cmd/compile/internal/ssa/rewriteMIPS.go
@@ -44,6 +44,9 @@ func rewriteValueMIPS(v *Value) bool {
case OpAtomicAdd32:
v.Op = OpMIPSLoweredAtomicAdd
return true
+ case OpAtomicAnd32:
+ v.Op = OpMIPSLoweredAtomicAnd
+ return true
case OpAtomicAnd8:
return rewriteValueMIPS_OpAtomicAnd8(v)
case OpAtomicCompareAndSwap32:
@@ -61,6 +64,9 @@ func rewriteValueMIPS(v *Value) bool {
case OpAtomicLoadPtr:
v.Op = OpMIPSLoweredAtomicLoad32
return true
+ case OpAtomicOr32:
+ v.Op = OpMIPSLoweredAtomicOr
+ return true
case OpAtomicOr8:
return rewriteValueMIPS_OpAtomicOr8(v)
case OpAtomicStore32:
--
GitLab
From ad642727247383079c8546ca365172859641a800 Mon Sep 17 00:00:00 2001
From: Michael Pratt
Date: Tue, 25 Aug 2020 12:34:02 -0400
Subject: [PATCH 022/253] runtime: rename pageAlloc receiver
The history of pageAlloc using 's' as a receiver are lost to the depths
of time (perhaps it used to be called summary?), but it doesn't make
much sense anymore. Rename it to 'p'.
Generated with:
$ cd src/runtime
$ grep -R -b "func (s \*pageAlloc" . | awk -F : '{ print $1 ":#" $2+6 }' | xargs -n 1 -I {} env GOROOT=$(pwd)/../../ gorename -offset {} -to p -v
$ grep -R -b "func (s \*pageAlloc" . | awk -F : '{ print $1 ":#" $2+6 }' | xargs -n 1 -I {} env GOROOT=$(pwd)/../../ GOARCH=386 gorename -offset {} -to p -v
$ GOROOT=$(pwd)/../../ gorename -offset mpagecache.go:#2397 -to p -v
($2+6 to advance past "func (".)
Plus manual comment fixups.
Change-Id: I2d521a1cbf6ebe2ef6aae92e654bfc33c63d1aa9
Reviewed-on: https://go-review.googlesource.com/c/go/+/250517
Trust: Michael Pratt
Run-TryBot: Michael Pratt
TryBot-Result: Go Bot
Reviewed-by: Michael Knyszek
---
src/runtime/mgcscavenge.go | 102 ++++++++--------
src/runtime/mpagealloc.go | 200 ++++++++++++++++----------------
src/runtime/mpagealloc_32bit.go | 14 +--
src/runtime/mpagealloc_64bit.go | 30 ++---
src/runtime/mpagecache.go | 42 +++----
5 files changed, 194 insertions(+), 194 deletions(-)
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index 9d6f551768..34646828e5 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -390,13 +390,13 @@ func bgscavenge(c chan int) {
//
// Returns the amount of memory scavenged in bytes.
//
-// s.mheapLock must be held, but may be temporarily released if
+// p.mheapLock must be held, but may be temporarily released if
// mayUnlock == true.
//
-// Must run on the system stack because s.mheapLock must be held.
+// Must run on the system stack because p.mheapLock must be held.
//
//go:systemstack
-func (s *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
+func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
var (
addrs addrRange
gen uint32
@@ -404,17 +404,17 @@ func (s *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
released := uintptr(0)
for released < nbytes {
if addrs.size() == 0 {
- if addrs, gen = s.scavengeReserve(); addrs.size() == 0 {
+ if addrs, gen = p.scavengeReserve(); addrs.size() == 0 {
break
}
}
- r, a := s.scavengeOne(addrs, nbytes-released, mayUnlock)
+ r, a := p.scavengeOne(addrs, nbytes-released, mayUnlock)
released += r
addrs = a
}
// Only unreserve the space which hasn't been scavenged or searched
// to ensure we always make progress.
- s.scavengeUnreserve(addrs, gen)
+ p.scavengeUnreserve(addrs, gen)
return released
}
@@ -440,46 +440,46 @@ func printScavTrace(gen uint32, released uintptr, forced bool) {
// scavengeStartGen starts a new scavenge generation, resetting
// the scavenger's search space to the full in-use address space.
//
-// s.mheapLock must be held.
+// p.mheapLock must be held.
//
-// Must run on the system stack because s.mheapLock must be held.
+// Must run on the system stack because p.mheapLock must be held.
//
//go:systemstack
-func (s *pageAlloc) scavengeStartGen() {
+func (p *pageAlloc) scavengeStartGen() {
if debug.scavtrace > 0 {
- printScavTrace(s.scav.gen, s.scav.released, false)
+ printScavTrace(p.scav.gen, p.scav.released, false)
}
- s.inUse.cloneInto(&s.scav.inUse)
+ p.inUse.cloneInto(&p.scav.inUse)
// Pick the new starting address for the scavenger cycle.
var startAddr offAddr
- if s.scav.scavLWM.lessThan(s.scav.freeHWM) {
+ if p.scav.scavLWM.lessThan(p.scav.freeHWM) {
// The "free" high watermark exceeds the "scavenged" low watermark,
// so there are free scavengable pages in parts of the address space
// that the scavenger already searched, the high watermark being the
// highest one. Pick that as our new starting point to ensure we
// see those pages.
- startAddr = s.scav.freeHWM
+ startAddr = p.scav.freeHWM
} else {
// The "free" high watermark does not exceed the "scavenged" low
// watermark. This means the allocator didn't free any memory in
// the range we scavenged last cycle, so we might as well continue
// scavenging from where we were.
- startAddr = s.scav.scavLWM
+ startAddr = p.scav.scavLWM
}
- s.scav.inUse.removeGreaterEqual(startAddr.addr())
+ p.scav.inUse.removeGreaterEqual(startAddr.addr())
- // reservationBytes may be zero if s.inUse.totalBytes is small, or if
+ // reservationBytes may be zero if p.inUse.totalBytes is small, or if
// scavengeReservationShards is large. This case is fine as the scavenger
// will simply be turned off, but it does mean that scavengeReservationShards,
// in concert with pallocChunkBytes, dictates the minimum heap size at which
// the scavenger triggers. In practice this minimum is generally less than an
// arena in size, so virtually every heap has the scavenger on.
- s.scav.reservationBytes = alignUp(s.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards
- s.scav.gen++
- s.scav.released = 0
- s.scav.freeHWM = minOffAddr
- s.scav.scavLWM = maxOffAddr
+ p.scav.reservationBytes = alignUp(p.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards
+ p.scav.gen++
+ p.scav.released = 0
+ p.scav.freeHWM = minOffAddr
+ p.scav.scavLWM = maxOffAddr
}
// scavengeReserve reserves a contiguous range of the address space
@@ -489,19 +489,19 @@ func (s *pageAlloc) scavengeStartGen() {
//
// Returns the reserved range and the scavenge generation number for it.
//
-// s.mheapLock must be held.
+// p.mheapLock must be held.
//
-// Must run on the system stack because s.mheapLock must be held.
+// Must run on the system stack because p.mheapLock must be held.
//
//go:systemstack
-func (s *pageAlloc) scavengeReserve() (addrRange, uint32) {
+func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
// Start by reserving the minimum.
- r := s.scav.inUse.removeLast(s.scav.reservationBytes)
+ r := p.scav.inUse.removeLast(p.scav.reservationBytes)
// Return early if the size is zero; we don't want to use
// the bogus address below.
if r.size() == 0 {
- return r, s.scav.gen
+ return r, p.scav.gen
}
// The scavenger requires that base be aligned to a
@@ -511,27 +511,27 @@ func (s *pageAlloc) scavengeReserve() (addrRange, uint32) {
newBase := alignDown(r.base.addr(), pallocChunkBytes)
// Remove from inUse however much extra we just pulled out.
- s.scav.inUse.removeGreaterEqual(newBase)
+ p.scav.inUse.removeGreaterEqual(newBase)
r.base = offAddr{newBase}
- return r, s.scav.gen
+ return r, p.scav.gen
}
// scavengeUnreserve returns an unscavenged portion of a range that was
// previously reserved with scavengeReserve.
//
-// s.mheapLock must be held.
+// p.mheapLock must be held.
//
-// Must run on the system stack because s.mheapLock must be held.
+// Must run on the system stack because p.mheapLock must be held.
//
//go:systemstack
-func (s *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
- if r.size() == 0 || gen != s.scav.gen {
+func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
+ if r.size() == 0 || gen != p.scav.gen {
return
}
if r.base.addr()%pallocChunkBytes != 0 {
throw("unreserving unaligned region")
}
- s.scav.inUse.add(r)
+ p.scav.inUse.add(r)
}
// scavengeOne walks over address range work until it finds
@@ -545,13 +545,13 @@ func (s *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
//
// work's base address must be aligned to pallocChunkBytes.
//
-// s.mheapLock must be held, but may be temporarily released if
+// p.mheapLock must be held, but may be temporarily released if
// mayUnlock == true.
//
-// Must run on the system stack because s.mheapLock must be held.
+// Must run on the system stack because p.mheapLock must be held.
//
//go:systemstack
-func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) {
+func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) {
// Defensively check if we've recieved an empty address range.
// If so, just return.
if work.size() == 0 {
@@ -586,12 +586,12 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
// Helpers for locking and unlocking only if mayUnlock == true.
lockHeap := func() {
if mayUnlock {
- lock(s.mheapLock)
+ lock(p.mheapLock)
}
}
unlockHeap := func() {
if mayUnlock {
- unlock(s.mheapLock)
+ unlock(p.mheapLock)
}
}
@@ -602,14 +602,14 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
// by subtracting 1.
maxAddr := work.limit.addr() - 1
maxChunk := chunkIndex(maxAddr)
- if s.summary[len(s.summary)-1][maxChunk].max() >= uint(minPages) {
+ if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) {
// We only bother looking for a candidate if there at least
// minPages free pages at all.
- base, npages := s.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
+ base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
// If we found something, scavenge it and return!
if npages != 0 {
- work.limit = offAddr{s.scavengeRangeLocked(maxChunk, base, npages)}
+ work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
return uintptr(npages) * pageSize, work
}
}
@@ -631,7 +631,7 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
// that's fine. We're being optimistic anyway.
// Check quickly if there are enough free pages at all.
- if s.summary[len(s.summary)-1][i].max() < uint(minPages) {
+ if p.summary[len(p.summary)-1][i].max() < uint(minPages) {
continue
}
@@ -641,7 +641,7 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
// avoid races with heap growth. It may or may not be possible to also
// see a nil pointer in this case if we do race with heap growth, but
// just defensively ignore the nils. This operation is optimistic anyway.
- l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&s.chunks[i.l1()])))
+ l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&p.chunks[i.l1()])))
if l2 != nil && l2[i.l2()].hasScavengeCandidate(minPages) {
return i, true
}
@@ -670,10 +670,10 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
}
// Find, verify, and scavenge if we can.
- chunk := s.chunkOf(candidateChunkIdx)
+ chunk := p.chunkOf(candidateChunkIdx)
base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
if npages > 0 {
- work.limit = offAddr{s.scavengeRangeLocked(candidateChunkIdx, base, npages)}
+ work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)}
return uintptr(npages) * pageSize, work
}
@@ -690,21 +690,21 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
//
// Returns the base address of the scavenged region.
//
-// s.mheapLock must be held.
-func (s *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
- s.chunkOf(ci).scavenged.setRange(base, npages)
+// p.mheapLock must be held.
+func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
+ p.chunkOf(ci).scavenged.setRange(base, npages)
// Compute the full address for the start of the range.
addr := chunkBase(ci) + uintptr(base)*pageSize
// Update the scavenge low watermark.
- if oAddr := (offAddr{addr}); oAddr.lessThan(s.scav.scavLWM) {
- s.scav.scavLWM = oAddr
+ if oAddr := (offAddr{addr}); oAddr.lessThan(p.scav.scavLWM) {
+ p.scav.scavLWM = oAddr
}
// Only perform the actual scavenging if we're not in a test.
// It's dangerous to do so otherwise.
- if s.test {
+ if p.test {
return addr
}
sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index c90a6378bd..560babed03 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -299,7 +299,7 @@ type pageAlloc struct {
test bool
}
-func (s *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
+func (p *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
if levelLogPages[0] > logMaxPackedValue {
// We can't represent 1< s.end {
- s.end = end
+ if end > p.end {
+ p.end = end
}
// Note that [base, limit) will never overlap with any existing
// range inUse because grow only ever adds never-used memory
// regions to the page allocator.
- s.inUse.add(makeAddrRange(base, limit))
+ p.inUse.add(makeAddrRange(base, limit))
// A grow operation is a lot like a free operation, so if our
- // chunk ends up below s.searchAddr, update s.searchAddr to the
+ // chunk ends up below p.searchAddr, update p.searchAddr to the
// new address, just like in free.
- if b := (offAddr{base}); b.lessThan(s.searchAddr) {
- s.searchAddr = b
+ if b := (offAddr{base}); b.lessThan(p.searchAddr) {
+ p.searchAddr = b
}
// Add entries into chunks, which is sparse, if needed. Then,
@@ -387,21 +387,21 @@ func (s *pageAlloc) grow(base, size uintptr) {
// Newly-grown memory is always considered scavenged.
// Set all the bits in the scavenged bitmaps high.
for c := chunkIndex(base); c < chunkIndex(limit); c++ {
- if s.chunks[c.l1()] == nil {
+ if p.chunks[c.l1()] == nil {
// Create the necessary l2 entry.
//
// Store it atomically to avoid races with readers which
// don't acquire the heap lock.
- r := sysAlloc(unsafe.Sizeof(*s.chunks[0]), s.sysStat)
- atomic.StorepNoWB(unsafe.Pointer(&s.chunks[c.l1()]), r)
+ r := sysAlloc(unsafe.Sizeof(*p.chunks[0]), p.sysStat)
+ atomic.StorepNoWB(unsafe.Pointer(&p.chunks[c.l1()]), r)
}
- s.chunkOf(c).scavenged.setRange(0, pallocChunkPages)
+ p.chunkOf(c).scavenged.setRange(0, pallocChunkPages)
}
// Update summaries accordingly. The grow acts like a free, so
// we need to ensure this newly-free memory is visible in the
// summaries.
- s.update(base, size/pageSize, true, false)
+ p.update(base, size/pageSize, true, false)
}
// update updates heap metadata. It must be called each time the bitmap
@@ -411,8 +411,8 @@ func (s *pageAlloc) grow(base, size uintptr) {
// a contiguous allocation or free between addr and addr+npages. alloc indicates
// whether the operation performed was an allocation or a free.
//
-// s.mheapLock must be held.
-func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
+// p.mheapLock must be held.
+func (p *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
// base, limit, start, and end are inclusive.
limit := base + npages*pageSize - 1
sc, ec := chunkIndex(base), chunkIndex(limit)
@@ -421,23 +421,23 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
if sc == ec {
// Fast path: the allocation doesn't span more than one chunk,
// so update this one and if the summary didn't change, return.
- x := s.summary[len(s.summary)-1][sc]
- y := s.chunkOf(sc).summarize()
+ x := p.summary[len(p.summary)-1][sc]
+ y := p.chunkOf(sc).summarize()
if x == y {
return
}
- s.summary[len(s.summary)-1][sc] = y
+ p.summary[len(p.summary)-1][sc] = y
} else if contig {
// Slow contiguous path: the allocation spans more than one chunk
// and at least one summary is guaranteed to change.
- summary := s.summary[len(s.summary)-1]
+ summary := p.summary[len(p.summary)-1]
// Update the summary for chunk sc.
- summary[sc] = s.chunkOf(sc).summarize()
+ summary[sc] = p.chunkOf(sc).summarize()
// Update the summaries for chunks in between, which are
// either totally allocated or freed.
- whole := s.summary[len(s.summary)-1][sc+1 : ec]
+ whole := p.summary[len(p.summary)-1][sc+1 : ec]
if alloc {
// Should optimize into a memclr.
for i := range whole {
@@ -450,22 +450,22 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
}
// Update the summary for chunk ec.
- summary[ec] = s.chunkOf(ec).summarize()
+ summary[ec] = p.chunkOf(ec).summarize()
} else {
// Slow general path: the allocation spans more than one chunk
// and at least one summary is guaranteed to change.
//
// We can't assume a contiguous allocation happened, so walk over
// every chunk in the range and manually recompute the summary.
- summary := s.summary[len(s.summary)-1]
+ summary := p.summary[len(p.summary)-1]
for c := sc; c <= ec; c++ {
- summary[c] = s.chunkOf(c).summarize()
+ summary[c] = p.chunkOf(c).summarize()
}
}
// Walk up the radix tree and update the summaries appropriately.
changed := true
- for l := len(s.summary) - 2; l >= 0 && changed; l-- {
+ for l := len(p.summary) - 2; l >= 0 && changed; l-- {
// Update summaries at level l from summaries at level l+1.
changed = false
@@ -479,12 +479,12 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
// Iterate over each block, updating the corresponding summary in the less-granular level.
for i := lo; i < hi; i++ {
- children := s.summary[l+1][i<= s.end {
+ if chunkIndex(p.searchAddr.addr()) >= p.end {
return 0, 0
}
// If npages has a chance of fitting in the chunk where the searchAddr is,
// search it directly.
searchAddr := minOffAddr
- if pallocChunkPages-chunkPageIndex(s.searchAddr.addr()) >= uint(npages) {
+ if pallocChunkPages-chunkPageIndex(p.searchAddr.addr()) >= uint(npages) {
// npages is guaranteed to be no greater than pallocChunkPages here.
- i := chunkIndex(s.searchAddr.addr())
- if max := s.summary[len(s.summary)-1][i].max(); max >= uint(npages) {
- j, searchIdx := s.chunkOf(i).find(npages, chunkPageIndex(s.searchAddr.addr()))
+ i := chunkIndex(p.searchAddr.addr())
+ if max := p.summary[len(p.summary)-1][i].max(); max >= uint(npages) {
+ j, searchIdx := p.chunkOf(i).find(npages, chunkPageIndex(p.searchAddr.addr()))
if j == ^uint(0) {
print("runtime: max = ", max, ", npages = ", npages, "\n")
- print("runtime: searchIdx = ", chunkPageIndex(s.searchAddr.addr()), ", s.searchAddr = ", hex(s.searchAddr.addr()), "\n")
+ print("runtime: searchIdx = ", chunkPageIndex(p.searchAddr.addr()), ", p.searchAddr = ", hex(p.searchAddr.addr()), "\n")
throw("bad summary data")
}
addr = chunkBase(i) + uintptr(j)*pageSize
@@ -813,7 +813,7 @@ func (s *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) {
}
// We failed to use a searchAddr for one reason or another, so try
// the slow path.
- addr, searchAddr = s.find(npages)
+ addr, searchAddr = p.find(npages)
if addr == 0 {
if npages == 1 {
// We failed to find a single free page, the smallest unit
@@ -821,41 +821,41 @@ func (s *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) {
// exhausted. Otherwise, the heap still might have free
// space in it, just not enough contiguous space to
// accommodate npages.
- s.searchAddr = maxSearchAddr
+ p.searchAddr = maxSearchAddr
}
return 0, 0
}
Found:
// Go ahead and actually mark the bits now that we have an address.
- scav = s.allocRange(addr, npages)
+ scav = p.allocRange(addr, npages)
// If we found a higher searchAddr, we know that all the
// heap memory before that searchAddr in an offset address space is
- // allocated, so bump s.searchAddr up to the new one.
- if s.searchAddr.lessThan(searchAddr) {
- s.searchAddr = searchAddr
+ // allocated, so bump p.searchAddr up to the new one.
+ if p.searchAddr.lessThan(searchAddr) {
+ p.searchAddr = searchAddr
}
return addr, scav
}
// free returns npages worth of memory starting at base back to the page heap.
//
-// s.mheapLock must be held.
-func (s *pageAlloc) free(base, npages uintptr) {
- // If we're freeing pages below the s.searchAddr, update searchAddr.
- if b := (offAddr{base}); b.lessThan(s.searchAddr) {
- s.searchAddr = b
+// p.mheapLock must be held.
+func (p *pageAlloc) free(base, npages uintptr) {
+ // If we're freeing pages below the p.searchAddr, update searchAddr.
+ if b := (offAddr{base}); b.lessThan(p.searchAddr) {
+ p.searchAddr = b
}
// Update the free high watermark for the scavenger.
limit := base + npages*pageSize - 1
- if offLimit := (offAddr{limit}); s.scav.freeHWM.lessThan(offLimit) {
- s.scav.freeHWM = offLimit
+ if offLimit := (offAddr{limit}); p.scav.freeHWM.lessThan(offLimit) {
+ p.scav.freeHWM = offLimit
}
if npages == 1 {
// Fast path: we're clearing a single bit, and we know exactly
// where it is, so mark it directly.
i := chunkIndex(base)
- s.chunkOf(i).free1(chunkPageIndex(base))
+ p.chunkOf(i).free1(chunkPageIndex(base))
} else {
// Slow path: we're clearing more bits so we may need to iterate.
sc, ec := chunkIndex(base), chunkIndex(limit)
@@ -863,17 +863,17 @@ func (s *pageAlloc) free(base, npages uintptr) {
if sc == ec {
// The range doesn't cross any chunk boundaries.
- s.chunkOf(sc).free(si, ei+1-si)
+ p.chunkOf(sc).free(si, ei+1-si)
} else {
// The range crosses at least one chunk boundary.
- s.chunkOf(sc).free(si, pallocChunkPages-si)
+ p.chunkOf(sc).free(si, pallocChunkPages-si)
for c := sc + 1; c < ec; c++ {
- s.chunkOf(c).freeAll()
+ p.chunkOf(c).freeAll()
}
- s.chunkOf(ec).free(0, ei+1)
+ p.chunkOf(ec).free(0, ei+1)
}
}
- s.update(base, npages, true, false)
+ p.update(base, npages, true, false)
}
const (
diff --git a/src/runtime/mpagealloc_32bit.go b/src/runtime/mpagealloc_32bit.go
index 90f1e54d6c..331dadade9 100644
--- a/src/runtime/mpagealloc_32bit.go
+++ b/src/runtime/mpagealloc_32bit.go
@@ -60,7 +60,7 @@ var levelLogPages = [summaryLevels]uint{
}
// See mpagealloc_64bit.go for details.
-func (s *pageAlloc) sysInit() {
+func (p *pageAlloc) sysInit() {
// Calculate how much memory all our entries will take up.
//
// This should be around 12 KiB or less.
@@ -76,7 +76,7 @@ func (s *pageAlloc) sysInit() {
throw("failed to reserve page summary memory")
}
// There isn't much. Just map it and mark it as used immediately.
- sysMap(reservation, totalSize, s.sysStat)
+ sysMap(reservation, totalSize, p.sysStat)
sysUsed(reservation, totalSize)
// Iterate over the reservation and cut it up into slices.
@@ -88,29 +88,29 @@ func (s *pageAlloc) sysInit() {
// Put this reservation into a slice.
sl := notInHeapSlice{(*notInHeap)(reservation), 0, entries}
- s.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
+ p.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
reservation = add(reservation, uintptr(entries)*pallocSumBytes)
}
}
// See mpagealloc_64bit.go for details.
-func (s *pageAlloc) sysGrow(base, limit uintptr) {
+func (p *pageAlloc) sysGrow(base, limit uintptr) {
if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
throw("sysGrow bounds not aligned to pallocChunkBytes")
}
// Walk up the tree and update the summary slices.
- for l := len(s.summary) - 1; l >= 0; l-- {
+ for l := len(p.summary) - 1; l >= 0; l-- {
// Figure out what part of the summary array this new address space needs.
// Note that we need to align the ranges to the block width (1< len(s.summary[l]) {
- s.summary[l] = s.summary[l][:hi]
+ if hi > len(p.summary[l]) {
+ p.summary[l] = p.summary[l][:hi]
}
}
}
diff --git a/src/runtime/mpagealloc_64bit.go b/src/runtime/mpagealloc_64bit.go
index a1691ba802..ffacb46c18 100644
--- a/src/runtime/mpagealloc_64bit.go
+++ b/src/runtime/mpagealloc_64bit.go
@@ -67,7 +67,7 @@ var levelLogPages = [summaryLevels]uint{
// sysInit performs architecture-dependent initialization of fields
// in pageAlloc. pageAlloc should be uninitialized except for sysStat
// if any runtime statistic should be updated.
-func (s *pageAlloc) sysInit() {
+func (p *pageAlloc) sysInit() {
// Reserve memory for each level. This will get mapped in
// as R/W by setArenas.
for l, shift := range levelShift {
@@ -82,21 +82,21 @@ func (s *pageAlloc) sysInit() {
// Put this reservation into a slice.
sl := notInHeapSlice{(*notInHeap)(r), 0, entries}
- s.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
+ p.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
}
}
// sysGrow performs architecture-dependent operations on heap
// growth for the page allocator, such as mapping in new memory
// for summaries. It also updates the length of the slices in
-// s.summary.
+// [.summary.
//
// base is the base of the newly-added heap memory and limit is
// the first address past the end of the newly-added heap memory.
// Both must be aligned to pallocChunkBytes.
//
-// The caller must update s.start and s.end after calling sysGrow.
-func (s *pageAlloc) sysGrow(base, limit uintptr) {
+// The caller must update p.start and p.end after calling sysGrow.
+func (p *pageAlloc) sysGrow(base, limit uintptr) {
if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
throw("sysGrow bounds not aligned to pallocChunkBytes")
@@ -111,12 +111,12 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) {
}
// summaryRangeToSumAddrRange converts a range of indices in any
- // level of s.summary into page-aligned addresses which cover that
+ // level of p.summary into page-aligned addresses which cover that
// range of indices.
summaryRangeToSumAddrRange := func(level, sumIdxBase, sumIdxLimit int) addrRange {
baseOffset := alignDown(uintptr(sumIdxBase)*pallocSumBytes, physPageSize)
limitOffset := alignUp(uintptr(sumIdxLimit)*pallocSumBytes, physPageSize)
- base := unsafe.Pointer(&s.summary[level][0])
+ base := unsafe.Pointer(&p.summary[level][0])
return addrRange{
offAddr{uintptr(add(base, baseOffset))},
offAddr{uintptr(add(base, limitOffset))},
@@ -140,10 +140,10 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) {
//
// This will be used to look at what memory in the summary array is already
// mapped before and after this new range.
- inUseIndex := s.inUse.findSucc(base)
+ inUseIndex := p.inUse.findSucc(base)
// Walk up the radix tree and map summaries in as needed.
- for l := range s.summary {
+ for l := range p.summary {
// Figure out what part of the summary array this new address space needs.
needIdxBase, needIdxLimit := addrRangeToSummaryRange(l, makeAddrRange(base, limit))
@@ -151,8 +151,8 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) {
// we get tight bounds checks on at least the top bound.
//
// We must do this regardless of whether we map new memory.
- if needIdxLimit > len(s.summary[l]) {
- s.summary[l] = s.summary[l][:needIdxLimit]
+ if needIdxLimit > len(p.summary[l]) {
+ p.summary[l] = p.summary[l][:needIdxLimit]
}
// Compute the needed address range in the summary array for level l.
@@ -163,10 +163,10 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) {
// for mapping. prune's invariants are guaranteed by the fact that this
// function will never be asked to remap the same memory twice.
if inUseIndex > 0 {
- need = need.subtract(addrRangeToSumAddrRange(l, s.inUse.ranges[inUseIndex-1]))
+ need = need.subtract(addrRangeToSumAddrRange(l, p.inUse.ranges[inUseIndex-1]))
}
- if inUseIndex < len(s.inUse.ranges) {
- need = need.subtract(addrRangeToSumAddrRange(l, s.inUse.ranges[inUseIndex]))
+ if inUseIndex < len(p.inUse.ranges) {
+ need = need.subtract(addrRangeToSumAddrRange(l, p.inUse.ranges[inUseIndex]))
}
// It's possible that after our pruning above, there's nothing new to map.
if need.size() == 0 {
@@ -174,7 +174,7 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) {
}
// Map and commit need.
- sysMap(unsafe.Pointer(need.base.addr()), need.size(), s.sysStat)
+ sysMap(unsafe.Pointer(need.base.addr()), need.size(), p.sysStat)
sysUsed(unsafe.Pointer(need.base.addr()), need.size())
}
}
diff --git a/src/runtime/mpagecache.go b/src/runtime/mpagecache.go
index 683a997136..5f76501a1c 100644
--- a/src/runtime/mpagecache.go
+++ b/src/runtime/mpagecache.go
@@ -71,8 +71,8 @@ func (c *pageCache) allocN(npages uintptr) (uintptr, uintptr) {
// into s. Then, it clears the cache, such that empty returns
// true.
//
-// s.mheapLock must be held or the world must be stopped.
-func (c *pageCache) flush(s *pageAlloc) {
+// p.mheapLock must be held or the world must be stopped.
+func (c *pageCache) flush(p *pageAlloc) {
if c.empty() {
return
}
@@ -83,18 +83,18 @@ func (c *pageCache) flush(s *pageAlloc) {
// slower, safer thing by iterating over each bit individually.
for i := uint(0); i < 64; i++ {
if c.cache&(1<= s.end {
+ if chunkIndex(p.searchAddr.addr()) >= p.end {
return pageCache{}
}
c := pageCache{}
- ci := chunkIndex(s.searchAddr.addr()) // chunk index
- if s.summary[len(s.summary)-1][ci] != 0 {
+ ci := chunkIndex(p.searchAddr.addr()) // chunk index
+ if p.summary[len(p.summary)-1][ci] != 0 {
// Fast path: there's free pages at or near the searchAddr address.
- chunk := s.chunkOf(ci)
- j, _ := chunk.find(1, chunkPageIndex(s.searchAddr.addr()))
+ chunk := p.chunkOf(ci)
+ j, _ := chunk.find(1, chunkPageIndex(p.searchAddr.addr()))
if j == ^uint(0) {
throw("bad summary data")
}
@@ -126,15 +126,15 @@ func (s *pageAlloc) allocToCache() pageCache {
} else {
// Slow path: the searchAddr address had nothing there, so go find
// the first free page the slow way.
- addr, _ := s.find(1)
+ addr, _ := p.find(1)
if addr == 0 {
// We failed to find adequate free space, so mark the searchAddr as OoM
// and return an empty pageCache.
- s.searchAddr = maxSearchAddr
+ p.searchAddr = maxSearchAddr
return pageCache{}
}
ci := chunkIndex(addr)
- chunk := s.chunkOf(ci)
+ chunk := p.chunkOf(ci)
c = pageCache{
base: alignDown(addr, 64*pageSize),
cache: ^chunk.pages64(chunkPageIndex(addr)),
@@ -143,19 +143,19 @@ func (s *pageAlloc) allocToCache() pageCache {
}
// Set the bits as allocated and clear the scavenged bits.
- s.allocRange(c.base, pageCachePages)
+ p.allocRange(c.base, pageCachePages)
// Update as an allocation, but note that it's not contiguous.
- s.update(c.base, pageCachePages, false, true)
+ p.update(c.base, pageCachePages, false, true)
// Set the search address to the last page represented by the cache.
// Since all of the pages in this block are going to the cache, and we
// searched for the first free page, we can confidently start at the
// next page.
//
- // However, s.searchAddr is not allowed to point into unmapped heap memory
+ // However, p.searchAddr is not allowed to point into unmapped heap memory
// unless it is maxSearchAddr, so make it the last page as opposed to
// the page after.
- s.searchAddr = offAddr{c.base + pageSize*(pageCachePages-1)}
+ p.searchAddr = offAddr{c.base + pageSize*(pageCachePages-1)}
return c
}
--
GitLab
From c3fe874f25ff55f73e4422bea7aa0b0e0e268f3e Mon Sep 17 00:00:00 2001
From: David Chase
Date: Mon, 17 Aug 2020 16:57:22 -0400
Subject: [PATCH 023/253] cmd/compile: avoid generating CSEs; do all
aggregates; maintain debug names
This adds a pass to detect common selection operations,
to avoid generating duplicates. Duplicate offsets are
also detected.
All aggregate types are now handled; there is some freedom in where
expand_calls is run, though it must run before softfloat.
Debug-name-maintenance is now incremental both in decompose builtin
and in expand_calls; it might be good to push this into all the
decompose passes.
(this is a smash of 5 CLs that rewrote some of the same code several
times to deal with phase-ordering problems, and included an abandoned
attempt.)
For #40724.
Change-Id: I2a0c32f20660bf8b99e2bcecd33545d97d2bd3c6
Reviewed-on: https://go-review.googlesource.com/c/go/+/249458
Trust: David Chase
Run-TryBot: David Chase
TryBot-Result: Go Bot
Reviewed-by: Cherry Zhang
---
src/cmd/compile/fmtmap_test.go | 1 +
src/cmd/compile/internal/gc/ssa.go | 104 +--
src/cmd/compile/internal/ssa/compile.go | 2 +-
src/cmd/compile/internal/ssa/config.go | 1 +
src/cmd/compile/internal/ssa/decompose.go | 87 +-
src/cmd/compile/internal/ssa/expand_calls.go | 555 ++++++++++---
src/cmd/compile/internal/ssa/export_test.go | 4 +
src/cmd/compile/internal/ssa/gen/dec64.rules | 137 ++--
src/cmd/compile/internal/ssa/rewritedec64.go | 796 +++++++++++++------
9 files changed, 1163 insertions(+), 524 deletions(-)
diff --git a/src/cmd/compile/fmtmap_test.go b/src/cmd/compile/fmtmap_test.go
index 179c60187f..f8c33ec1f9 100644
--- a/src/cmd/compile/fmtmap_test.go
+++ b/src/cmd/compile/fmtmap_test.go
@@ -136,6 +136,7 @@ var knownFormats = map[string]string{
"cmd/compile/internal/types.EType %s": "",
"cmd/compile/internal/types.EType %v": "",
"cmd/internal/obj.ABI %v": "",
+ "cmd/internal/src.XPos %v": "",
"error %v": "",
"float64 %.2f": "",
"float64 %.3f": "",
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 979a092ba1..f840ef4066 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -4740,7 +4740,7 @@ func (s *state) getClosureAndRcvr(fn *Node) (*ssa.Value, *ssa.Value) {
s.nilCheck(itab)
itabidx := fn.Xoffset + 2*int64(Widthptr) + 8 // offset of fun field in runtime.itab
closure := s.newValue1I(ssa.OpOffPtr, s.f.Config.Types.UintptrPtr, itabidx, itab)
- rcvr := s.newValue1(ssa.OpIData, types.Types[TUINTPTR], i)
+ rcvr := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, i)
return closure, rcvr
}
@@ -6904,56 +6904,38 @@ func (e *ssafn) Auto(pos src.XPos, t *types.Type) ssa.GCNode {
}
func (e *ssafn) SplitString(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
- n := name.N.(*Node)
ptrType := types.NewPtr(types.Types[TUINT8])
lenType := types.Types[TINT]
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- // Split this string up into two separate variables.
- p := e.splitSlot(&name, ".ptr", 0, ptrType)
- l := e.splitSlot(&name, ".len", ptrType.Size(), lenType)
- return p, l
- }
- // Return the two parts of the larger variable.
- return ssa.LocalSlot{N: n, Type: ptrType, Off: name.Off}, ssa.LocalSlot{N: n, Type: lenType, Off: name.Off + int64(Widthptr)}
+ // Split this string up into two separate variables.
+ p := e.SplitSlot(&name, ".ptr", 0, ptrType)
+ l := e.SplitSlot(&name, ".len", ptrType.Size(), lenType)
+ return p, l
}
func (e *ssafn) SplitInterface(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
n := name.N.(*Node)
u := types.Types[TUINTPTR]
t := types.NewPtr(types.Types[TUINT8])
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- // Split this interface up into two separate variables.
- f := ".itab"
- if n.Type.IsEmptyInterface() {
- f = ".type"
- }
- c := e.splitSlot(&name, f, 0, u) // see comment in plive.go:onebitwalktype1.
- d := e.splitSlot(&name, ".data", u.Size(), t)
- return c, d
+ // Split this interface up into two separate variables.
+ f := ".itab"
+ if n.Type.IsEmptyInterface() {
+ f = ".type"
}
- // Return the two parts of the larger variable.
- return ssa.LocalSlot{N: n, Type: u, Off: name.Off}, ssa.LocalSlot{N: n, Type: t, Off: name.Off + int64(Widthptr)}
+ c := e.SplitSlot(&name, f, 0, u) // see comment in plive.go:onebitwalktype1.
+ d := e.SplitSlot(&name, ".data", u.Size(), t)
+ return c, d
}
func (e *ssafn) SplitSlice(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot, ssa.LocalSlot) {
- n := name.N.(*Node)
ptrType := types.NewPtr(name.Type.Elem())
lenType := types.Types[TINT]
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- // Split this slice up into three separate variables.
- p := e.splitSlot(&name, ".ptr", 0, ptrType)
- l := e.splitSlot(&name, ".len", ptrType.Size(), lenType)
- c := e.splitSlot(&name, ".cap", ptrType.Size()+lenType.Size(), lenType)
- return p, l, c
- }
- // Return the three parts of the larger variable.
- return ssa.LocalSlot{N: n, Type: ptrType, Off: name.Off},
- ssa.LocalSlot{N: n, Type: lenType, Off: name.Off + int64(Widthptr)},
- ssa.LocalSlot{N: n, Type: lenType, Off: name.Off + int64(2*Widthptr)}
+ p := e.SplitSlot(&name, ".ptr", 0, ptrType)
+ l := e.SplitSlot(&name, ".len", ptrType.Size(), lenType)
+ c := e.SplitSlot(&name, ".cap", ptrType.Size()+lenType.Size(), lenType)
+ return p, l, c
}
func (e *ssafn) SplitComplex(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
- n := name.N.(*Node)
s := name.Type.Size() / 2
var t *types.Type
if s == 8 {
@@ -6961,53 +6943,35 @@ func (e *ssafn) SplitComplex(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot)
} else {
t = types.Types[TFLOAT32]
}
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- // Split this complex up into two separate variables.
- r := e.splitSlot(&name, ".real", 0, t)
- i := e.splitSlot(&name, ".imag", t.Size(), t)
- return r, i
- }
- // Return the two parts of the larger variable.
- return ssa.LocalSlot{N: n, Type: t, Off: name.Off}, ssa.LocalSlot{N: n, Type: t, Off: name.Off + s}
+ r := e.SplitSlot(&name, ".real", 0, t)
+ i := e.SplitSlot(&name, ".imag", t.Size(), t)
+ return r, i
}
func (e *ssafn) SplitInt64(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
- n := name.N.(*Node)
var t *types.Type
if name.Type.IsSigned() {
t = types.Types[TINT32]
} else {
t = types.Types[TUINT32]
}
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- // Split this int64 up into two separate variables.
- if thearch.LinkArch.ByteOrder == binary.BigEndian {
- return e.splitSlot(&name, ".hi", 0, t), e.splitSlot(&name, ".lo", t.Size(), types.Types[TUINT32])
- }
- return e.splitSlot(&name, ".hi", t.Size(), t), e.splitSlot(&name, ".lo", 0, types.Types[TUINT32])
- }
- // Return the two parts of the larger variable.
if thearch.LinkArch.ByteOrder == binary.BigEndian {
- return ssa.LocalSlot{N: n, Type: t, Off: name.Off}, ssa.LocalSlot{N: n, Type: types.Types[TUINT32], Off: name.Off + 4}
+ return e.SplitSlot(&name, ".hi", 0, t), e.SplitSlot(&name, ".lo", t.Size(), types.Types[TUINT32])
}
- return ssa.LocalSlot{N: n, Type: t, Off: name.Off + 4}, ssa.LocalSlot{N: n, Type: types.Types[TUINT32], Off: name.Off}
+ return e.SplitSlot(&name, ".hi", t.Size(), t), e.SplitSlot(&name, ".lo", 0, types.Types[TUINT32])
}
func (e *ssafn) SplitStruct(name ssa.LocalSlot, i int) ssa.LocalSlot {
- n := name.N.(*Node)
st := name.Type
ft := st.FieldType(i)
var offset int64
for f := 0; f < i; f++ {
offset += st.FieldType(f).Size()
}
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- // Note: the _ field may appear several times. But
- // have no fear, identically-named but distinct Autos are
- // ok, albeit maybe confusing for a debugger.
- return e.splitSlot(&name, "."+st.FieldName(i), offset, ft)
- }
- return ssa.LocalSlot{N: n, Type: ft, Off: name.Off + st.FieldOff(i)}
+ // Note: the _ field may appear several times. But
+ // have no fear, identically-named but distinct Autos are
+ // ok, albeit maybe confusing for a debugger.
+ return e.SplitSlot(&name, "."+st.FieldName(i), offset, ft)
}
func (e *ssafn) SplitArray(name ssa.LocalSlot) ssa.LocalSlot {
@@ -7017,19 +6981,23 @@ func (e *ssafn) SplitArray(name ssa.LocalSlot) ssa.LocalSlot {
e.Fatalf(n.Pos, "bad array size")
}
et := at.Elem()
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- return e.splitSlot(&name, "[0]", 0, et)
- }
- return ssa.LocalSlot{N: n, Type: et, Off: name.Off}
+ return e.SplitSlot(&name, "[0]", 0, et)
}
func (e *ssafn) DerefItab(it *obj.LSym, offset int64) *obj.LSym {
return itabsym(it, offset)
}
-// splitSlot returns a slot representing the data of parent starting at offset.
-func (e *ssafn) splitSlot(parent *ssa.LocalSlot, suffix string, offset int64, t *types.Type) ssa.LocalSlot {
- s := &types.Sym{Name: parent.N.(*Node).Sym.Name + suffix, Pkg: localpkg}
+// SplitSlot returns a slot representing the data of parent starting at offset.
+func (e *ssafn) SplitSlot(parent *ssa.LocalSlot, suffix string, offset int64, t *types.Type) ssa.LocalSlot {
+ node := parent.N.(*Node)
+
+ if node.Class() != PAUTO || node.Name.Addrtaken() {
+ // addressed things and non-autos retain their parents (i.e., cannot truly be split)
+ return ssa.LocalSlot{N: node, Type: t, Off: parent.Off + offset}
+ }
+
+ s := &types.Sym{Name: node.Sym.Name + suffix, Pkg: localpkg}
n := &Node{
Name: new(Name),
diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go
index 0664c0ba46..bddd271273 100644
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@@ -441,8 +441,8 @@ var passes = [...]pass{
{name: "nilcheckelim", fn: nilcheckelim},
{name: "prove", fn: prove},
{name: "early fuse", fn: fuseEarly},
- {name: "expand calls", fn: expandCalls, required: true},
{name: "decompose builtin", fn: decomposeBuiltIn, required: true},
+ {name: "expand calls", fn: expandCalls, required: true},
{name: "softfloat", fn: softfloat, required: true},
{name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
{name: "dead auto elim", fn: elimDeadAutosGeneric},
diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go
index f1a748309c..cb6f6fe7a1 100644
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -149,6 +149,7 @@ type Frontend interface {
SplitStruct(LocalSlot, int) LocalSlot
SplitArray(LocalSlot) LocalSlot // array must be length 1
SplitInt64(LocalSlot) (LocalSlot, LocalSlot) // returns (hi, lo)
+ SplitSlot(parent *LocalSlot, suffix string, offset int64, t *types.Type) LocalSlot
// DerefItab dereferences an itab function
// entry, given the symbol of the itab and
diff --git a/src/cmd/compile/internal/ssa/decompose.go b/src/cmd/compile/internal/ssa/decompose.go
index ab27ba85ae..bf7f1e826b 100644
--- a/src/cmd/compile/internal/ssa/decompose.go
+++ b/src/cmd/compile/internal/ssa/decompose.go
@@ -6,6 +6,7 @@ package ssa
import (
"cmd/compile/internal/types"
+ "sort"
)
// decompose converts phi ops on compound builtin types into phi
@@ -31,77 +32,79 @@ func decomposeBuiltIn(f *Func) {
}
// Split up named values into their components.
+ // accumulate old names for aggregates (that are decomposed) in toDelete for efficient bulk deletion,
+ // accumulate new LocalSlots in newNames for addition after the iteration. This decomposition is for
+ // builtin types with leaf components, and thus there is no need to reprocess the newly create LocalSlots.
+ var toDelete []namedVal
var newNames []LocalSlot
- for _, name := range f.Names {
+ for i, name := range f.Names {
t := name.Type
switch {
case t.IsInteger() && t.Size() > f.Config.RegSize:
hiName, loName := f.fe.SplitInt64(name)
newNames = append(newNames, hiName, loName)
- for _, v := range f.NamedValues[name] {
+ for j, v := range f.NamedValues[name] {
if v.Op != OpInt64Make {
continue
}
f.NamedValues[hiName] = append(f.NamedValues[hiName], v.Args[0])
f.NamedValues[loName] = append(f.NamedValues[loName], v.Args[1])
+ toDelete = append(toDelete, namedVal{i, j})
}
- delete(f.NamedValues, name)
case t.IsComplex():
rName, iName := f.fe.SplitComplex(name)
newNames = append(newNames, rName, iName)
- for _, v := range f.NamedValues[name] {
+ for j, v := range f.NamedValues[name] {
if v.Op != OpComplexMake {
continue
}
f.NamedValues[rName] = append(f.NamedValues[rName], v.Args[0])
f.NamedValues[iName] = append(f.NamedValues[iName], v.Args[1])
-
+ toDelete = append(toDelete, namedVal{i, j})
}
- delete(f.NamedValues, name)
case t.IsString():
ptrName, lenName := f.fe.SplitString(name)
newNames = append(newNames, ptrName, lenName)
- for _, v := range f.NamedValues[name] {
+ for j, v := range f.NamedValues[name] {
if v.Op != OpStringMake {
continue
}
f.NamedValues[ptrName] = append(f.NamedValues[ptrName], v.Args[0])
f.NamedValues[lenName] = append(f.NamedValues[lenName], v.Args[1])
+ toDelete = append(toDelete, namedVal{i, j})
}
- delete(f.NamedValues, name)
case t.IsSlice():
ptrName, lenName, capName := f.fe.SplitSlice(name)
newNames = append(newNames, ptrName, lenName, capName)
- for _, v := range f.NamedValues[name] {
+ for j, v := range f.NamedValues[name] {
if v.Op != OpSliceMake {
continue
}
f.NamedValues[ptrName] = append(f.NamedValues[ptrName], v.Args[0])
f.NamedValues[lenName] = append(f.NamedValues[lenName], v.Args[1])
f.NamedValues[capName] = append(f.NamedValues[capName], v.Args[2])
+ toDelete = append(toDelete, namedVal{i, j})
}
- delete(f.NamedValues, name)
case t.IsInterface():
typeName, dataName := f.fe.SplitInterface(name)
newNames = append(newNames, typeName, dataName)
- for _, v := range f.NamedValues[name] {
+ for j, v := range f.NamedValues[name] {
if v.Op != OpIMake {
continue
}
f.NamedValues[typeName] = append(f.NamedValues[typeName], v.Args[0])
f.NamedValues[dataName] = append(f.NamedValues[dataName], v.Args[1])
+ toDelete = append(toDelete, namedVal{i, j})
}
- delete(f.NamedValues, name)
case t.IsFloat():
// floats are never decomposed, even ones bigger than RegSize
- newNames = append(newNames, name)
case t.Size() > f.Config.RegSize:
f.Fatalf("undecomposed named type %s %v", name, t)
- default:
- newNames = append(newNames, name)
}
}
- f.Names = newNames
+
+ deleteNamedVals(f, toDelete)
+ f.Names = append(f.Names, newNames...)
}
func decomposeBuiltInPhi(v *Value) {
@@ -263,14 +266,20 @@ func decomposeUserArrayInto(f *Func, name LocalSlot, slots []LocalSlot) []LocalS
f.Fatalf("array not of size 1")
}
elemName := f.fe.SplitArray(name)
+ var keep []*Value
for _, v := range f.NamedValues[name] {
if v.Op != OpArrayMake1 {
+ keep = append(keep, v)
continue
}
f.NamedValues[elemName] = append(f.NamedValues[elemName], v.Args[0])
}
- // delete the name for the array as a whole
- delete(f.NamedValues, name)
+ if len(keep) == 0 {
+ // delete the name for the array as a whole
+ delete(f.NamedValues, name)
+ } else {
+ f.NamedValues[name] = keep
+ }
if t.Elem().IsArray() {
return decomposeUserArrayInto(f, elemName, slots)
@@ -300,17 +309,23 @@ func decomposeUserStructInto(f *Func, name LocalSlot, slots []LocalSlot) []Local
}
makeOp := StructMakeOp(n)
+ var keep []*Value
// create named values for each struct field
for _, v := range f.NamedValues[name] {
if v.Op != makeOp {
+ keep = append(keep, v)
continue
}
for i := 0; i < len(fnames); i++ {
f.NamedValues[fnames[i]] = append(f.NamedValues[fnames[i]], v.Args[i])
}
}
- // remove the name of the struct as a whole
- delete(f.NamedValues, name)
+ if len(keep) == 0 {
+ // delete the name for the struct as a whole
+ delete(f.NamedValues, name)
+ } else {
+ f.NamedValues[name] = keep
+ }
// now that this f.NamedValues contains values for the struct
// fields, recurse into nested structs
@@ -400,3 +415,35 @@ func StructMakeOp(nf int) Op {
}
panic("too many fields in an SSAable struct")
}
+
+type namedVal struct {
+ locIndex, valIndex int // f.NamedValues[f.Names[locIndex]][valIndex] = key
+}
+
+// deleteNamedVals removes particular values with debugger names from f's naming data structures
+func deleteNamedVals(f *Func, toDelete []namedVal) {
+ // Arrange to delete from larger indices to smaller, to ensure swap-with-end deletion does not invalid pending indices.
+ sort.Slice(toDelete, func(i, j int) bool {
+ if toDelete[i].locIndex != toDelete[j].locIndex {
+ return toDelete[i].locIndex > toDelete[j].locIndex
+ }
+ return toDelete[i].valIndex > toDelete[j].valIndex
+
+ })
+
+ // Get rid of obsolete names
+ for _, d := range toDelete {
+ loc := f.Names[d.locIndex]
+ vals := f.NamedValues[loc]
+ l := len(vals) - 1
+ if l > 0 {
+ vals[d.valIndex] = vals[l]
+ f.NamedValues[loc] = vals[:l]
+ } else {
+ delete(f.NamedValues, loc)
+ l = len(f.Names) - 1
+ f.Names[d.locIndex] = f.Names[l]
+ f.Names = f.Names[:l]
+ }
+ }
+}
diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go
index bbd9aeee51..3e3573ff39 100644
--- a/src/cmd/compile/internal/ssa/expand_calls.go
+++ b/src/cmd/compile/internal/ssa/expand_calls.go
@@ -11,27 +11,44 @@ import (
"sort"
)
+type selKey struct {
+ from *Value
+ offset int64
+ size int64
+ typ types.EType
+}
+
+type offsetKey struct {
+ from *Value
+ offset int64
+ pt *types.Type
+}
+
// expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
// that is more oriented to a platform's ABI. The SelectN operations that extract results are rewritten into
// more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
-// reached (for now, Strings, Slices, Complex, and Interface are not decomposed because they are rewritten in
-// a subsequent phase, but that may need to change for a register ABI in case one of those composite values is
-// split between registers and memory).
-//
-// TODO: when it comes time to use registers, might want to include builtin selectors as well, but currently that happens in lower.
+// reached.
func expandCalls(f *Func) {
+ // Calls that need lowering have some number of inputs, including a memory input,
+ // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
+
+ // With the current ABI those inputs need to be converted into stores to memory,
+ // rethreading the call's memory input to the first, and the new call now receiving the last.
+
+ // With the current ABI, the outputs need to be converted to loads, which will all use the call's
+ // memory output as their input.
if !LateCallExpansionEnabledWithin(f) {
return
}
+ debug := f.pass.debug > 0
+
canSSAType := f.fe.CanSSA
regSize := f.Config.RegSize
sp, _ := f.spSb()
+ typ := &f.Config.Types
+ ptrSize := f.Config.PtrSize
- debug := f.pass.debug > 0
-
- // For 32-bit, need to deal with decomposition of 64-bit integers
- tUint32 := types.Types[types.TUINT32]
- tInt32 := types.Types[types.TINT32]
+ // For 32-bit, need to deal with decomposition of 64-bit integers, which depends on endianness.
var hiOffset, lowOffset int64
if f.Config.BigEndian {
lowOffset = 4
@@ -39,25 +56,63 @@ func expandCalls(f *Func) {
hiOffset = 4
}
+ namedSelects := make(map[*Value][]namedVal)
+
// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
// that has no 64-bit integer registers.
intPairTypes := func(et types.EType) (tHi, tLo *types.Type) {
- tHi = tUint32
+ tHi = typ.UInt32
if et == types.TINT64 {
- tHi = tInt32
+ tHi = typ.Int32
}
- tLo = tUint32
+ tLo = typ.UInt32
return
}
// isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type
- // that was expanded in an earlier phase (small user-defined arrays and structs, lowered in decomposeUser).
- // Other aggregate types are expanded in decomposeBuiltin, which comes later.
+ // that was expanded in an earlier phase (currently, expand_calls is intended to run after decomposeBuiltin,
+ // so this is all aggregate types -- small struct and array, complex, interface, string, slice, and 64-bit
+ // integer on 32-bit).
isAlreadyExpandedAggregateType := func(t *types.Type) bool {
if !canSSAType(t) {
return false
}
- return t.IsStruct() || t.IsArray() || regSize == 4 && t.Size() > 4 && t.IsInteger()
+ return t.IsStruct() || t.IsArray() || t.IsComplex() || t.IsInterface() || t.IsString() || t.IsSlice() ||
+ t.Size() > regSize && t.IsInteger()
+ }
+
+ offsets := make(map[offsetKey]*Value)
+
+ // offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
+ // TODO should also optimize offsets from SB?
+ offsetFrom := func(from *Value, offset int64, pt *types.Type) *Value {
+ if offset == 0 && from.Type == pt { // this is not actually likely
+ return from
+ }
+ // Simplify, canonicalize
+ for from.Op == OpOffPtr {
+ offset += from.AuxInt
+ from = from.Args[0]
+ }
+ if from == sp {
+ return f.ConstOffPtrSP(pt, offset, sp)
+ }
+ key := offsetKey{from, offset, pt}
+ v := offsets[key]
+ if v != nil {
+ return v
+ }
+ v = from.Block.NewValue1I(from.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
+ offsets[key] = v
+ return v
+ }
+
+ splitSlots := func(ls []LocalSlot, sfx string, offset int64, ty *types.Type) []LocalSlot {
+ var locs []LocalSlot
+ for i := range ls {
+ locs = append(locs, f.fe.SplitSlot(&ls[i], sfx, offset, ty))
+ }
+ return locs
}
// removeTrivialWrapperTypes unwraps layers of
@@ -97,11 +152,16 @@ func expandCalls(f *Func) {
// end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering).
// It emits the code necessary to implement the leaf select operation that leads to the call.
// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
- var rewriteSelect func(leaf *Value, selector *Value, offset int64)
- rewriteSelect = func(leaf *Value, selector *Value, offset int64) {
+ var rewriteSelect func(leaf *Value, selector *Value, offset int64) []LocalSlot
+ rewriteSelect = func(leaf *Value, selector *Value, offset int64) []LocalSlot {
+ var locs []LocalSlot
+ leafType := leaf.Type
switch selector.Op {
case OpSelectN:
// TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
+ for _, s := range namedSelects[selector] {
+ locs = append(locs, f.Names[s.locIndex])
+ }
call := selector.Args[0]
aux := call.Aux.(*AuxCall)
which := selector.AuxInt
@@ -110,9 +170,13 @@ func expandCalls(f *Func) {
leaf.copyOf(call)
} else {
leafType := removeTrivialWrapperTypes(leaf.Type)
- pt := types.NewPtr(leafType)
if canSSAType(leafType) {
- off := f.ConstOffPtrSP(pt, offset+aux.OffsetOfResult(which), sp)
+ for leafType.Etype == types.TSTRUCT && leafType.NumFields() == 1 {
+ // This may not be adequately general -- consider [1]etc but this is caused by immediate IDATA
+ leafType = leafType.Field(0).Type
+ }
+ pt := types.NewPtr(leafType)
+ off := offsetFrom(sp, offset+aux.OffsetOfResult(which), pt)
// Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
if leaf.Block == call.Block {
leaf.reset(OpLoad)
@@ -123,46 +187,110 @@ func expandCalls(f *Func) {
leaf.copyOf(w)
}
} else {
- panic("Should not have non-SSA-able OpSelectN")
+ f.Fatalf("Should not have non-SSA-able OpSelectN, selector=%s", selector.LongString())
}
}
case OpStructSelect:
w := selector.Args[0]
+ var ls []LocalSlot
if w.Type.Etype != types.TSTRUCT {
- fmt.Printf("Bad type for w:\nv=%v\nsel=%v\nw=%v\n,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name)
+ f.Fatalf("Bad type for w: v=%v; sel=%v; w=%v; ,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name)
+ // Artifact of immediate interface idata
+ ls = rewriteSelect(leaf, w, offset)
+ } else {
+ ls = rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))
+ for _, l := range ls {
+ locs = append(locs, f.fe.SplitStruct(l, int(selector.AuxInt)))
+ }
}
- rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))
+
+ case OpArraySelect:
+ w := selector.Args[0]
+ rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt)
case OpInt64Hi:
w := selector.Args[0]
- rewriteSelect(leaf, w, offset+hiOffset)
+ ls := rewriteSelect(leaf, w, offset+hiOffset)
+ locs = splitSlots(ls, ".hi", hiOffset, leafType)
case OpInt64Lo:
w := selector.Args[0]
- rewriteSelect(leaf, w, offset+lowOffset)
+ ls := rewriteSelect(leaf, w, offset+lowOffset)
+ locs = splitSlots(ls, ".lo", lowOffset, leafType)
- case OpArraySelect:
+ case OpStringPtr:
+ ls := rewriteSelect(leaf, selector.Args[0], offset)
+ locs = splitSlots(ls, ".ptr", 0, typ.BytePtr)
+ //for i := range ls {
+ // locs = append(locs, f.fe.SplitSlot(&ls[i], ".ptr", 0, typ.BytePtr))
+ //}
+ case OpSlicePtr:
w := selector.Args[0]
- rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt)
+ ls := rewriteSelect(leaf, w, offset)
+ locs = splitSlots(ls, ".ptr", 0, types.NewPtr(w.Type.Elem()))
+
+ case OpITab:
+ w := selector.Args[0]
+ ls := rewriteSelect(leaf, w, offset)
+ sfx := ".itab"
+ if w.Type.IsEmptyInterface() {
+ sfx = ".type"
+ }
+ locs = splitSlots(ls, sfx, 0, typ.Uintptr)
+
+ case OpComplexReal:
+ ls := rewriteSelect(leaf, selector.Args[0], offset)
+ locs = splitSlots(ls, ".real", 0, leafType)
+
+ case OpComplexImag:
+ ls := rewriteSelect(leaf, selector.Args[0], offset+leafType.Width) // result is FloatNN, width of result is offset of imaginary part.
+ locs = splitSlots(ls, ".imag", leafType.Width, leafType)
+
+ case OpStringLen, OpSliceLen:
+ ls := rewriteSelect(leaf, selector.Args[0], offset+ptrSize)
+ locs = splitSlots(ls, ".len", ptrSize, leafType)
+
+ case OpIData:
+ ls := rewriteSelect(leaf, selector.Args[0], offset+ptrSize)
+ locs = splitSlots(ls, ".data", ptrSize, leafType)
+
+ case OpSliceCap:
+ ls := rewriteSelect(leaf, selector.Args[0], offset+2*ptrSize)
+ locs = splitSlots(ls, ".cap", 2*ptrSize, leafType)
+
+ case OpCopy: // If it's an intermediate result, recurse
+ locs = rewriteSelect(leaf, selector.Args[0], offset)
+ for _, s := range namedSelects[selector] {
+ // this copy may have had its own name, preserve that, too.
+ locs = append(locs, f.Names[s.locIndex])
+ }
+
default:
- // Ignore dead ends; on 32-bit, these can occur running before decompose builtins.
+ // Ignore dead ends. These can occur if this phase is run before decompose builtin (which is not intended, but allowed).
}
+
+ return locs
}
// storeArg converts stores of SSA-able aggregate arguments (passed to a call) into a series of stores of
// smaller types into individual parameter slots.
- // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
var storeArg func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value
storeArg = func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value {
+ if debug {
+ fmt.Printf("\tstoreArg(%s; %s; %v; %d; %s)\n", b, a.LongString(), t, offset, mem.String())
+ }
+
switch a.Op {
case OpArrayMake0, OpStructMake0:
return mem
+
case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
for i := 0; i < t.NumFields(); i++ {
fld := t.Field(i)
mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem)
}
return mem
+
case OpArrayMake1:
return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem)
@@ -170,55 +298,51 @@ func expandCalls(f *Func) {
tHi, tLo := intPairTypes(t.Etype)
mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem)
return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem)
+
+ case OpComplexMake:
+ tPart := typ.Float32
+ wPart := t.Width / 2
+ if wPart == 8 {
+ tPart = typ.Float64
+ }
+ mem = storeArg(pos, b, a.Args[0], tPart, offset, mem)
+ return storeArg(pos, b, a.Args[1], tPart, offset+wPart, mem)
+
+ case OpIMake:
+ mem = storeArg(pos, b, a.Args[0], typ.Uintptr, offset, mem)
+ return storeArg(pos, b, a.Args[1], typ.BytePtr, offset+ptrSize, mem)
+
+ case OpStringMake:
+ mem = storeArg(pos, b, a.Args[0], typ.BytePtr, offset, mem)
+ return storeArg(pos, b, a.Args[1], typ.Int, offset+ptrSize, mem)
+
+ case OpSliceMake:
+ mem = storeArg(pos, b, a.Args[0], typ.BytePtr, offset, mem)
+ mem = storeArg(pos, b, a.Args[1], typ.Int, offset+ptrSize, mem)
+ return storeArg(pos, b, a.Args[2], typ.Int, offset+2*ptrSize, mem)
}
- dst := f.ConstOffPtrSP(types.NewPtr(t), offset, sp)
+
+ dst := offsetFrom(sp, offset, types.NewPtr(t))
x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem)
if debug {
- fmt.Printf("storeArg(%v) returns %s\n", a, x.LongString())
+ fmt.Printf("\t\tstoreArg returns %s\n", x.LongString())
}
return x
}
- // offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
- // TODO should also optimize offsets from SB?
- offsetFrom := func(dst *Value, offset int64, t *types.Type) *Value {
- pt := types.NewPtr(t)
- if offset == 0 && dst.Type == pt { // this is not actually likely
- return dst
- }
- if dst.Op != OpOffPtr {
- return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, dst)
- }
- // Simplify OpOffPtr
- from := dst.Args[0]
- offset += dst.AuxInt
- if from == sp {
- return f.ConstOffPtrSP(pt, offset, sp)
- }
- return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
- }
-
// splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting
// appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts.
- var splitStore func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value
- splitStore = func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value {
- // TODO might be worth commoning up duplicate selectors, but since they go dead, maybe no point.
+ // This has to handle aggregate types that have already been lowered by an earlier phase.
+ var splitStore func(dest, source, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value
+ splitStore = func(dest, source, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value {
+ if debug {
+ fmt.Printf("\tsplitStore(%s; %s; %s; %s; %v; %d; %v)\n", dest.LongString(), source.LongString(), mem.String(), v.LongString(), t, offset, firstStorePos)
+ }
pos := v.Pos.WithNotStmt()
switch t.Etype {
- case types.TINT64, types.TUINT64:
- if t.Width == regSize {
- break
- }
- tHi, tLo := intPairTypes(t.Etype)
- sel := src.Block.NewValue1(pos, OpInt64Hi, tHi, src)
- mem = splitStore(dst, sel, mem, v, tHi, offset+hiOffset, firstStorePos)
- firstStorePos = firstStorePos.WithNotStmt()
- sel = src.Block.NewValue1(pos, OpInt64Lo, tLo, src)
- return splitStore(dst, sel, mem, v, tLo, offset+lowOffset, firstStorePos)
-
case types.TARRAY:
elt := t.Elem()
- if src.Op == OpIData && t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize {
+ if t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize {
t = removeTrivialWrapperTypes(t)
if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
@@ -226,13 +350,14 @@ func expandCalls(f *Func) {
break // handle the leaf type.
}
for i := int64(0); i < t.NumElem(); i++ {
- sel := src.Block.NewValue1I(pos, OpArraySelect, elt, i, src)
- mem = splitStore(dst, sel, mem, v, elt, offset+i*elt.Width, firstStorePos)
+ sel := source.Block.NewValue1I(pos, OpArraySelect, elt, i, source)
+ mem = splitStore(dest, sel, mem, v, elt, offset+i*elt.Width, firstStorePos)
firstStorePos = firstStorePos.WithNotStmt()
}
return mem
+
case types.TSTRUCT:
- if src.Op == OpIData && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
+ if t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
// This peculiar test deals with accesses to immediate interface data.
// It works okay because everything is the same size.
// Example code that triggers this can be found in go/constant/value.go, function ToComplex
@@ -240,26 +365,87 @@ func expandCalls(f *Func) {
// v121 (+882) = StaticLECall {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1
// This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)"
// Guard against "struct{struct{*foo}}"
+ // Other rewriting phases create minor glitches when they transform IData, for instance the
+ // interface-typed Arg "x" of ToFloat in go/constant/value.go
+ // v6 (858) = Arg {x} (x[Value], x[Value])
+ // is rewritten by decomposeArgs into
+ // v141 (858) = Arg {x}
+ // v139 (858) = Arg <*uint8> {x} [8]
+ // because of a type case clause on line 862 of go/constant/value.go
+ // case intVal:
+ // return itof(x)
+ // v139 is later stored as an intVal == struct{val *big.Int} which naively requires the fields of
+ // of a *uint8, which does not succeed.
t = removeTrivialWrapperTypes(t)
- if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
- f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
- }
- break // handle the leaf type.
+
+ // it could be a leaf type, but the "leaf" could be complex64 (for example)
+ return splitStore(dest, source, mem, v, t, offset, firstStorePos)
}
+
for i := 0; i < t.NumFields(); i++ {
fld := t.Field(i)
- sel := src.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), src)
- mem = splitStore(dst, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos)
+ sel := source.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), source)
+ mem = splitStore(dest, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos)
firstStorePos = firstStorePos.WithNotStmt()
}
return mem
+
+ case types.TINT64, types.TUINT64:
+ if t.Width == regSize {
+ break
+ }
+ tHi, tLo := intPairTypes(t.Etype)
+ sel := source.Block.NewValue1(pos, OpInt64Hi, tHi, source)
+ mem = splitStore(dest, sel, mem, v, tHi, offset+hiOffset, firstStorePos)
+ firstStorePos = firstStorePos.WithNotStmt()
+ sel = source.Block.NewValue1(pos, OpInt64Lo, tLo, source)
+ return splitStore(dest, sel, mem, v, tLo, offset+lowOffset, firstStorePos)
+
+ case types.TINTER:
+ sel := source.Block.NewValue1(pos, OpITab, typ.BytePtr, source)
+ mem = splitStore(dest, sel, mem, v, typ.BytePtr, offset, firstStorePos)
+ firstStorePos = firstStorePos.WithNotStmt()
+ sel = source.Block.NewValue1(pos, OpIData, typ.BytePtr, source)
+ return splitStore(dest, sel, mem, v, typ.BytePtr, offset+ptrSize, firstStorePos)
+
+ case types.TSTRING:
+ sel := source.Block.NewValue1(pos, OpStringPtr, typ.BytePtr, source)
+ mem = splitStore(dest, sel, mem, v, typ.BytePtr, offset, firstStorePos)
+ firstStorePos = firstStorePos.WithNotStmt()
+ sel = source.Block.NewValue1(pos, OpStringLen, typ.Int, source)
+ return splitStore(dest, sel, mem, v, typ.Int, offset+ptrSize, firstStorePos)
+
+ case types.TSLICE:
+ et := types.NewPtr(t.Elem())
+ sel := source.Block.NewValue1(pos, OpSlicePtr, et, source)
+ mem = splitStore(dest, sel, mem, v, et, offset, firstStorePos)
+ firstStorePos = firstStorePos.WithNotStmt()
+ sel = source.Block.NewValue1(pos, OpSliceLen, typ.Int, source)
+ mem = splitStore(dest, sel, mem, v, typ.Int, offset+ptrSize, firstStorePos)
+ sel = source.Block.NewValue1(pos, OpSliceCap, typ.Int, source)
+ return splitStore(dest, sel, mem, v, typ.Int, offset+2*ptrSize, firstStorePos)
+
+ case types.TCOMPLEX64:
+ sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float32, source)
+ mem = splitStore(dest, sel, mem, v, typ.Float32, offset, firstStorePos)
+ firstStorePos = firstStorePos.WithNotStmt()
+ sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float32, source)
+ return splitStore(dest, sel, mem, v, typ.Float32, offset+4, firstStorePos)
+
+ case types.TCOMPLEX128:
+ sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float64, source)
+ mem = splitStore(dest, sel, mem, v, typ.Float64, offset, firstStorePos)
+ firstStorePos = firstStorePos.WithNotStmt()
+ sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float64, source)
+ return splitStore(dest, sel, mem, v, typ.Float64, offset+8, firstStorePos)
}
// Default, including for aggregates whose single element exactly fills their container
// TODO this will be a problem for cast interfaces containing floats when we move to registers.
- x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dst, offset, t), src, mem)
+ x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dest, offset, types.NewPtr(t)), source, mem)
if debug {
- fmt.Printf("splitStore(%v, %v, %v, %v) returns %s\n", dst, src, mem, v, x.LongString())
+ fmt.Printf("\t\tsplitStore returns %s\n", x.LongString())
}
+
return x
}
@@ -286,21 +472,24 @@ func expandCalls(f *Func) {
}
// "Dereference" of addressed (probably not-SSA-eligible) value becomes Move
// TODO this will be more complicated with registers in the picture.
- src := a.Args[0]
- dst := f.ConstOffPtrSP(src.Type, aux.OffsetOfArg(auxI), sp)
+ source := a.Args[0]
+ dst := f.ConstOffPtrSP(source.Type, aux.OffsetOfArg(auxI), sp)
if a.Uses == 1 && a.Block == v.Block {
a.reset(OpMove)
a.Pos = pos
a.Type = types.TypeMem
a.Aux = aux.TypeOfArg(auxI)
a.AuxInt = aux.SizeOfArg(auxI)
- a.SetArgs3(dst, src, mem)
+ a.SetArgs3(dst, source, mem)
mem = a
} else {
- mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, src, mem)
+ mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, source, mem)
mem.AuxInt = aux.SizeOfArg(auxI)
}
} else {
+ if debug {
+ fmt.Printf("storeArg %s, %v, %d\n", a.LongString(), aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI))
+ }
mem = storeArg(pos, v.Block, a, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI), mem)
}
}
@@ -308,6 +497,8 @@ func expandCalls(f *Func) {
return mem
}
+ // TODO if too slow, whole program iteration can be replaced w/ slices of appropriate values, accumulated in first loop here.
+
// Step 0: rewrite the calls to convert incoming args to stores.
for _, b := range f.Blocks {
for _, v := range b.Values {
@@ -328,15 +519,40 @@ func expandCalls(f *Func) {
}
}
+ for i, name := range f.Names {
+ t := name.Type
+ if isAlreadyExpandedAggregateType(t) {
+ for j, v := range f.NamedValues[name] {
+ if v.Op == OpSelectN {
+ ns := namedSelects[v]
+ namedSelects[v] = append(ns, namedVal{locIndex: i, valIndex: j})
+ }
+ }
+ }
+ }
+
// Step 1: any stores of aggregates remaining are believed to be sourced from call results.
// Decompose those stores into a series of smaller stores, adding selection ops as necessary.
for _, b := range f.Blocks {
for _, v := range b.Values {
if v.Op == OpStore {
t := v.Aux.(*types.Type)
- if isAlreadyExpandedAggregateType(t) {
- dst, src, mem := v.Args[0], v.Args[1], v.Args[2]
- mem = splitStore(dst, src, mem, v, t, 0, v.Pos)
+ iAEATt := isAlreadyExpandedAggregateType(t)
+ if !iAEATt {
+ // guarding against store immediate struct into interface data field -- store type is *uint8
+ // TODO can this happen recursively?
+ tSrc := v.Args[1].Type
+ iAEATt = isAlreadyExpandedAggregateType(tSrc)
+ if iAEATt {
+ t = tSrc
+ }
+ }
+ if iAEATt {
+ if debug {
+ fmt.Printf("Splitting store %s\n", v.LongString())
+ }
+ dst, source, mem := v.Args[0], v.Args[1], v.Args[2]
+ mem = splitStore(dst, source, mem, v, t, 0, v.Pos)
v.copyOf(mem)
}
}
@@ -345,23 +561,32 @@ func expandCalls(f *Func) {
val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering.
- // Step 2: accumulate selection operations for rewrite in topological order.
+ // Step 2: transform or accumulate selection operations for rewrite in topological order.
+ //
+ // Aggregate types that have already (in earlier phases) been transformed must be lowered comprehensively to finish
+ // the transformation (user-defined structs and arrays, slices, strings, interfaces, complex, 64-bit on 32-bit architectures),
+ //
// Any select-for-addressing applied to call results can be transformed directly.
- // TODO this is overkill; with the transformation of aggregate references into series of leaf references, it is only necessary to remember and recurse on the leaves.
for _, b := range f.Blocks {
for _, v := range b.Values {
// Accumulate chains of selectors for processing in topological order
switch v.Op {
- case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo:
+ case OpStructSelect, OpArraySelect,
+ OpIData, OpITab,
+ OpStringPtr, OpStringLen,
+ OpSlicePtr, OpSliceLen, OpSliceCap,
+ OpComplexReal, OpComplexImag,
+ OpInt64Hi, OpInt64Lo:
w := v.Args[0]
switch w.Op {
- case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo, OpSelectN:
+ case OpStructSelect, OpArraySelect, OpSelectN:
val2Preds[w] += 1
if debug {
fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
}
}
fallthrough
+
case OpSelectN:
if _, ok := val2Preds[v]; !ok {
val2Preds[v] = 0
@@ -369,53 +594,153 @@ func expandCalls(f *Func) {
fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
}
}
+
case OpSelectNAddr:
// Do these directly, there are no chains of selectors.
call := v.Args[0]
which := v.AuxInt
aux := call.Aux.(*AuxCall)
pt := v.Type
- off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp)
+ off := offsetFrom(sp, aux.OffsetOfResult(which), pt)
v.copyOf(off)
}
}
}
- // Compilation must be deterministic
- var ordered []*Value
- less := func(i, j int) bool { return ordered[i].ID < ordered[j].ID }
+ // Step 3: Compute topological order of selectors,
+ // then process it in reverse to eliminate duplicates,
+ // then forwards to rewrite selectors.
+ //
+ // All chains of selectors end up in same block as the call.
+ sdom := f.Sdom()
+
+ // Compilation must be deterministic, so sort after extracting first zeroes from map.
+ // Sorting allows dominators-last order within each batch,
+ // so that the backwards scan for duplicates will most often find copies from dominating blocks (it is best-effort).
+ var toProcess []*Value
+ less := func(i, j int) bool {
+ vi, vj := toProcess[i], toProcess[j]
+ bi, bj := vi.Block, vj.Block
+ if bi == bj {
+ return vi.ID < vj.ID
+ }
+ return sdom.domorder(bi) > sdom.domorder(bj) // reverse the order to put dominators last.
+ }
- // Step 3: Rewrite in topological order. All chains of selectors end up in same block as the call.
+ // Accumulate order in allOrdered
+ var allOrdered []*Value
+ for v, n := range val2Preds {
+ if n == 0 {
+ allOrdered = append(allOrdered, v)
+ }
+ }
+ last := 0 // allOrdered[0:last] has been top-sorted and processed
for len(val2Preds) > 0 {
- ordered = ordered[:0]
- for v, n := range val2Preds {
- if n == 0 {
- ordered = append(ordered, v)
+ toProcess = allOrdered[last:]
+ last = len(allOrdered)
+ sort.SliceStable(toProcess, less)
+ for _, v := range toProcess {
+ w := v.Args[0]
+ delete(val2Preds, v)
+ n, ok := val2Preds[w]
+ if !ok {
+ continue
}
+ if n == 1 {
+ allOrdered = append(allOrdered, w)
+ delete(val2Preds, w)
+ continue
+ }
+ val2Preds[w] = n - 1
}
- sort.Slice(ordered, less)
- for _, v := range ordered {
- for {
- w := v.Args[0]
- if debug {
- fmt.Printf("About to rewrite %s, args[0]=%s\n", v.LongString(), w.LongString())
- }
- delete(val2Preds, v)
- rewriteSelect(v, v, 0)
- v = w
- n, ok := val2Preds[v]
- if !ok {
- break
- }
- if n != 1 {
- val2Preds[v] = n - 1
- break
- }
- // Loop on new v; val2Preds[v] == 1 will be deleted in that iteration, no need to store zero.
+ }
+
+ common := make(map[selKey]*Value)
+ // Rewrite duplicate selectors as copies where possible.
+ for i := len(allOrdered) - 1; i >= 0; i-- {
+ v := allOrdered[i]
+ w := v.Args[0]
+ for w.Op == OpCopy {
+ w = w.Args[0]
+ }
+ typ := v.Type
+ if typ.IsMemory() {
+ continue // handled elsewhere, not an indexable result
+ }
+ size := typ.Width
+ offset := int64(0)
+ switch v.Op {
+ case OpStructSelect:
+ if w.Type.Etype == types.TSTRUCT {
+ offset = w.Type.FieldOff(int(v.AuxInt))
+ } else { // Immediate interface data artifact, offset is zero.
+ f.Fatalf("Expand calls interface data problem, func %s, v=%s, w=%s\n", f.Name, v.LongString(), w.LongString())
}
+ case OpArraySelect:
+ offset = size * v.AuxInt
+ case OpSelectN:
+ offset = w.Aux.(*AuxCall).OffsetOfResult(v.AuxInt)
+ case OpInt64Hi:
+ offset = hiOffset
+ case OpInt64Lo:
+ offset = lowOffset
+ case OpStringLen, OpSliceLen, OpIData:
+ offset = ptrSize
+ case OpSliceCap:
+ offset = 2 * ptrSize
+ case OpComplexImag:
+ offset = size
+ }
+ sk := selKey{from: w, size: size, offset: offset, typ: typ.Etype}
+ dupe := common[sk]
+ if dupe == nil {
+ common[sk] = v
+ } else if sdom.IsAncestorEq(dupe.Block, v.Block) {
+ v.copyOf(dupe)
+ } else {
+ // Because values are processed in dominator order, the old common[s] will never dominate after a miss is seen.
+ // Installing the new value might match some future values.
+ common[sk] = v
}
}
+ // Indices of entries in f.Names that need to be deleted.
+ var toDelete []namedVal
+
+ // Rewrite selectors.
+ for i, v := range allOrdered {
+ if debug {
+ b := v.Block
+ fmt.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses)
+ }
+ if v.Uses == 0 {
+ v.reset(OpInvalid)
+ continue
+ }
+ if v.Op == OpCopy {
+ continue
+ }
+ locs := rewriteSelect(v, v, 0)
+ // Install new names.
+ if v.Type.IsMemory() {
+ continue
+ }
+ // Leaf types may have debug locations
+ if !isAlreadyExpandedAggregateType(v.Type) {
+ for _, l := range locs {
+ f.NamedValues[l] = append(f.NamedValues[l], v)
+ }
+ f.Names = append(f.Names, locs...)
+ continue
+ }
+ // Not-leaf types that had debug locations need to lose them.
+ if ns, ok := namedSelects[v]; ok {
+ toDelete = append(toDelete, ns...)
+ }
+ }
+
+ deleteNamedVals(f, toDelete)
+
// Step 4: rewrite the calls themselves, correcting the type
for _, b := range f.Blocks {
for _, v := range b.Values {
diff --git a/src/cmd/compile/internal/ssa/export_test.go b/src/cmd/compile/internal/ssa/export_test.go
index 51665c60e2..b4c3e5cfdf 100644
--- a/src/cmd/compile/internal/ssa/export_test.go
+++ b/src/cmd/compile/internal/ssa/export_test.go
@@ -125,6 +125,10 @@ func (d DummyFrontend) SplitStruct(s LocalSlot, i int) LocalSlot {
func (d DummyFrontend) SplitArray(s LocalSlot) LocalSlot {
return LocalSlot{N: s.N, Type: s.Type.Elem(), Off: s.Off}
}
+
+func (d DummyFrontend) SplitSlot(parent *LocalSlot, suffix string, offset int64, t *types.Type) LocalSlot {
+ return LocalSlot{N: parent.N, Type: t, Off: offset}
+}
func (DummyFrontend) Line(_ src.XPos) string {
return "unknown.go:0"
}
diff --git a/src/cmd/compile/internal/ssa/gen/dec64.rules b/src/cmd/compile/internal/ssa/gen/dec64.rules
index 4f9e863f90..07607960fa 100644
--- a/src/cmd/compile/internal/ssa/gen/dec64.rules
+++ b/src/cmd/compile/internal/ssa/gen/dec64.rules
@@ -9,7 +9,6 @@
(Int64Hi (Int64Make hi _)) => hi
(Int64Lo (Int64Make _ lo)) => lo
-
(Load ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() =>
(Int64Make
(Load (OffPtr [4] ptr) mem)
@@ -143,6 +142,10 @@
(Trunc64to32 (Int64Make _ lo)) => lo
(Trunc64to16 (Int64Make _ lo)) => (Trunc32to16 lo)
(Trunc64to8 (Int64Make _ lo)) => (Trunc32to8 lo)
+// Most general
+(Trunc64to32 x) => (Int64Lo x)
+(Trunc64to16 x) => (Trunc32to16 (Int64Lo x))
+(Trunc64to8 x) => (Trunc32to8 (Int64Lo x))
(Lsh32x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0])
(Rsh32x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Signmask x)
@@ -175,156 +178,174 @@
// turn x64 non-constant shifts to x32 shifts
// if high 32-bit of the shift is nonzero, make a huge shift
(Lsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Lsh64x32 x (Or32 (Zeromask hi) lo))
+ (Lsh64x32 x (Or32 (Zeromask hi) lo))
(Rsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh64x32 x (Or32 (Zeromask hi) lo))
+ (Rsh64x32 x (Or32 (Zeromask hi) lo))
(Rsh64Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh64Ux32 x (Or32 (Zeromask hi) lo))
+ (Rsh64Ux32 x (Or32 (Zeromask hi) lo))
(Lsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Lsh32x32 x (Or32 (Zeromask hi) lo))
+ (Lsh32x32 x (Or32 (Zeromask hi) lo))
(Rsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh32x32 x (Or32 (Zeromask hi) lo))
+ (Rsh32x32 x (Or32 (Zeromask hi) lo))
(Rsh32Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh32Ux32 x (Or32 (Zeromask hi) lo))
+ (Rsh32Ux32 x (Or32 (Zeromask hi) lo))
(Lsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Lsh16x32 x (Or32 (Zeromask hi) lo))
+ (Lsh16x32 x (Or32 (Zeromask hi) lo))
(Rsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh16x32 x (Or32 (Zeromask hi) lo))
+ (Rsh16x32 x (Or32 (Zeromask hi) lo))
(Rsh16Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh16Ux32 x (Or32 (Zeromask hi) lo))
+ (Rsh16Ux32 x (Or32 (Zeromask hi) lo))
(Lsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Lsh8x32 x (Or32 (Zeromask hi) lo))
+ (Lsh8x32 x (Or32 (Zeromask hi) lo))
(Rsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh8x32 x (Or32 (Zeromask hi) lo))
+ (Rsh8x32 x (Or32 (Zeromask hi) lo))
(Rsh8Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh8Ux32 x (Or32 (Zeromask hi) lo))
+ (Rsh8Ux32 x (Or32 (Zeromask hi) lo))
+
+// Most general
+(Lsh64x64 x y) => (Lsh64x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh64x64 x y) => (Rsh64x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh64Ux64 x y) => (Rsh64Ux32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Lsh32x64 x y) => (Lsh32x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh32x64 x y) => (Rsh32x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh32Ux64 x y) => (Rsh32Ux32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Lsh16x64 x y) => (Lsh16x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh16x64 x y) => (Rsh16x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh16Ux64 x y) => (Rsh16Ux32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Lsh8x64 x y) => (Lsh8x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh8x64 x y) => (Rsh8x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh8Ux64 x y) => (Rsh8Ux32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y)))
+
+// Clean up constants a little
+(Or32 (Zeromask (Const32 [c])) y) && c == 0 => y
+(Or32 (Zeromask (Const32 [c])) y) && c != 0 => (Const32 [-1])
// 64x left shift
// result.hi = hi<>(32-s) | lo<<(s-32) // >> is unsigned, large shifts result 0
// result.lo = lo<
+(Lsh64x32 x s) =>
(Int64Make
(Or32
(Or32
- (Lsh32x32 hi s)
+ (Lsh32x32 (Int64Hi x) s)
(Rsh32Ux32
- lo
+ (Int64Lo x)
(Sub32 (Const32 [32]) s)))
(Lsh32x32
- lo
+ (Int64Lo x)
(Sub32 s (Const32 [32]))))
- (Lsh32x32 lo s))
-(Lsh64x16 (Int64Make hi lo) s) =>
+ (Lsh32x32 (Int64Lo x) s))
+(Lsh64x16 x s) =>
(Int64Make
(Or32
(Or32
- (Lsh32x16 hi s)
+ (Lsh32x16 (Int64Hi x) s)
(Rsh32Ux16
- lo
+ (Int64Lo x)
(Sub16 (Const16 [32]) s)))
(Lsh32x16
- lo
+ (Int64Lo x)
(Sub16 s (Const16 [32]))))
- (Lsh32x16 lo s))
-(Lsh64x8 (Int64Make hi lo) s) =>
+ (Lsh32x16 (Int64Lo x) s))
+(Lsh64x8 x s) =>
(Int64Make
(Or32
(Or32
- (Lsh32x8 hi s)
+ (Lsh32x8 (Int64Hi x) s)
(Rsh32Ux8
- lo
+ (Int64Lo x)
(Sub8 (Const8 [32]) s)))
(Lsh32x8
- lo
+ (Int64Lo x)
(Sub8 s (Const8 [32]))))
- (Lsh32x8 lo s))
+ (Lsh32x8 (Int64Lo x) s))
// 64x unsigned right shift
// result.hi = hi>>s
// result.lo = lo>>s | hi<<(32-s) | hi>>(s-32) // >> is unsigned, large shifts result 0
-(Rsh64Ux32 (Int64Make hi lo) s) =>
+(Rsh64Ux32 x s) =>
(Int64Make
- (Rsh32Ux32 hi s)
+ (Rsh32Ux32 (Int64Hi x) s)
(Or32
(Or32
- (Rsh32Ux32 lo s)
+ (Rsh32Ux32 (Int64Lo x) s)
(Lsh32x32
- hi
+ (Int64Hi x)
(Sub32 (Const32 [32]) s)))
(Rsh32Ux32
- hi
+ (Int64Hi x)
(Sub32 s (Const32 [32])))))
-(Rsh64Ux16 (Int64Make hi lo) s) =>
+(Rsh64Ux16 x s) =>
(Int64Make
- (Rsh32Ux16 hi s)
+ (Rsh32Ux16 (Int64Hi x) s)
(Or32
(Or32
- (Rsh32Ux16 lo s)
+ (Rsh32Ux16 (Int64Lo x) s)
(Lsh32x16
- hi
+ (Int64Hi x)
(Sub16 (Const16 [32]) s)))
(Rsh32Ux16
- hi
+ (Int64Hi x)
(Sub16 s (Const16 [32])))))
-(Rsh64Ux8 (Int64Make hi lo) s) =>
+(Rsh64Ux8 x s) =>
(Int64Make
- (Rsh32Ux8 hi s)
+ (Rsh32Ux8 (Int64Hi x) s)
(Or32
(Or32
- (Rsh32Ux8 lo s)
+ (Rsh32Ux8 (Int64Lo x) s)
(Lsh32x8
- hi
+ (Int64Hi x)
(Sub8 (Const8 [32]) s)))
(Rsh32Ux8
- hi
+ (Int64Hi x)
(Sub8 s (Const8 [32])))))
// 64x signed right shift
// result.hi = hi>>s
// result.lo = lo>>s | hi<<(32-s) | (hi>>(s-32))&zeromask(s>>5) // hi>>(s-32) is signed, large shifts result 0/-1
-(Rsh64x32 (Int64Make hi lo) s) =>
+(Rsh64x32 x s) =>
(Int64Make
- (Rsh32x32 hi s)
+ (Rsh32x32 (Int64Hi x) s)
(Or32
(Or32
- (Rsh32Ux32 lo s)
+ (Rsh32Ux32 (Int64Lo x) s)
(Lsh32x32
- hi
+ (Int64Hi x)
(Sub32 (Const32 [32]) s)))
(And32
(Rsh32x32
- hi
+ (Int64Hi x)
(Sub32 s (Const32 [32])))
(Zeromask
(Rsh32Ux32 s (Const32 [5]))))))
-(Rsh64x16 (Int64Make hi lo) s) =>
+(Rsh64x16 x s) =>
(Int64Make
- (Rsh32x16 hi s)
+ (Rsh32x16 (Int64Hi x) s)
(Or32
(Or32
- (Rsh32Ux16 lo s)
+ (Rsh32Ux16 (Int64Lo x) s)
(Lsh32x16
- hi
+ (Int64Hi x)
(Sub16 (Const16 [32]) s)))
(And32
(Rsh32x16
- hi
+ (Int64Hi x)
(Sub16 s (Const16 [32])))
(Zeromask
(ZeroExt16to32
(Rsh16Ux32 s (Const32 [5])))))))
-(Rsh64x8 (Int64Make hi lo) s) =>
+(Rsh64x8 x s) =>
(Int64Make
- (Rsh32x8 hi s)
+ (Rsh32x8 (Int64Hi x) s)
(Or32
(Or32
- (Rsh32Ux8 lo s)
+ (Rsh32Ux8 (Int64Lo x) s)
(Lsh32x8
- hi
+ (Int64Hi x)
(Sub8