From ac1e188f3bf09eac9a3ab5e0ab7431a2c3db58d8 Mon Sep 17 00:00:00 2001 From: John Kessenich <cepheus@frii.com> Date: Fri, 31 Jan 2014 02:40:19 +0000 Subject: [PATCH] GL_ARB_enhanced_layouts, part 6: Numerical side of uniform offset and align semantics. Included - moving offset calculations for std140/std430 from reflection to linkValidate.cpp - applying the offset/align rules on top of std140/std430 - removing caching the structure's number of components (and correcting that this is components, not size) git-svn-id: https://cvs.khronos.org/svn/repos/ogl/trunk/ecosystem/public/sdk/tools/glslang@25174 e7fa87d3-cd2b-0410-9028-fcbf551c1848 --- Test/440.frag | 51 +++++- Test/baseResults/300layout.vert.out | 14 +- Test/baseResults/440.frag.out | 18 +- Test/baseResults/specExamples.vert.out | 2 +- glslang/Include/Types.h | 30 ++-- glslang/Include/revision.h | 4 +- glslang/MachineIndependent/Constant.cpp | 70 ++++---- glslang/MachineIndependent/Intermediate.cpp | 2 +- glslang/MachineIndependent/ParseHelper.cpp | 58 ++++++- glslang/MachineIndependent/SymbolTable.cpp | 15 -- glslang/MachineIndependent/intermOut.cpp | 2 +- glslang/MachineIndependent/linkValidate.cpp | 159 +++++++++++++++++- .../MachineIndependent/localintermediate.h | 2 + glslang/MachineIndependent/parseConst.cpp | 12 +- glslang/MachineIndependent/reflection.cpp | 147 +++------------- 15 files changed, 367 insertions(+), 219 deletions(-) diff --git a/Test/440.frag b/Test/440.frag index 22d46f56b..a32706c04 100644 --- a/Test/440.frag +++ b/Test/440.frag @@ -79,13 +79,58 @@ uniform ubl11 { layout(std140) uniform block { vec4 a; // a takes offsets 0-15 - layout(offset = 20) vec3 b; // b takes offsets 32-43 + layout(offset = 32) vec3 b; // b takes offsets 32-43 layout(offset = 40) vec2 c; // ERROR, lies within previous member + layout(align = 6) double g; // ERROR, 6 is not a power of 2 + layout(offset=68) double h; // ERROR, offset not aligned +} specExampleErrors; + +layout(std140) uniform block2 { + vec4 a; // a takes offsets 0-15 + layout(offset = 32) vec3 b; // b takes offsets 32-43 layout(offset = 48) vec2 d; // d takes offsets 48-55 layout(align = 16) float e; // e takes offsets 64-67 layout(align = 2) double f; // f takes offsets 72-79 - layout(align = 6) double g; // ERROR, 6 is not a power of 2 layout(offset = 80) float h; // h takes offsets 80-83 layout(align = 64) dvec3 i; // i takes offsets 128-151 - layout(offset = 153, align = 8) float j; // j takes offsets 160-163 + layout(offset = 164, align = 8) float j; // j takes offsets 168-171 } specExample; + +layout(std430) uniform block430 { + vec4 a; // a takes offsets 0-15 + layout(offset = 32) vec3 b; // b takes offsets 32-43 + layout(offset = 40) vec2 c; // ERROR, lies within previous member + layout(align = 6) double g; // ERROR, 6 is not a power of 2 + layout(offset=68) double h; // ERROR, offset not aligned +} specExampleErrors430; + +layout(std430) uniform block2430 { + vec4 a; // a takes offsets 0-15 + layout(offset = 32) vec3 b; // b takes offsets 32-43 + layout(offset = 48) vec2 d; // d takes offsets 48-55 + layout(align = 16) float e; // e takes offsets 64-67 + layout(align = 2) double f; // f takes offsets 72-79 + layout(offset = 80) float h; // h takes offsets 80-83 + layout(align = 64) dvec3 i; // i takes offsets 128-151 + layout(offset = 164, align = 8) float j; // j takes offsets 168-171 +} specExample430; + +layout(std430, align = 128) uniform block24300 { + vec4 a; + vec3 b; + vec2 d; + float e; + double f; + float h; + dvec3 i; +} specExample4300; + +layout(std430, align = 128) uniform block24301 { + vec4 a; + vec3 b; + vec2 d; + layout(offset=388) float e; + layout(align=8) double f; + float h; + dvec3 i; +} specExample4301; diff --git a/Test/baseResults/300layout.vert.out b/Test/baseResults/300layout.vert.out index 0ce4b3c79..4d2c3a5b9 100644 --- a/Test/baseResults/300layout.vert.out +++ b/Test/baseResults/300layout.vert.out @@ -32,12 +32,12 @@ ERROR: node is still EOpNull! 0:46 add (highp 4X4 matrix of float) 0:46 add (highp 4X4 matrix of float) 0:46 add (highp 4X4 matrix of float) -0:46 M1: direct index for structure (layout(row_major std140 ) uniform highp 4X4 matrix of float) -0:46 'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 ) centroid uniform highp float badf, layout(row_major std140 ) uniform highp float badg, layout(row_major std140 ) uniform highp float bad1, layout(row_major shared ) uniform highp float bad2, layout(row_major packed ) uniform highp float bad3}) +0:46 M1: direct index for structure (layout(row_major std140 offset=0 ) uniform highp 4X4 matrix of float) +0:46 'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 offset=0 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 offset=64 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 offset=128 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 offset=176 ) centroid uniform highp float badf, layout(row_major std140 offset=180 ) uniform highp float badg, layout(row_major std140 offset=184 ) uniform highp float bad1, layout(row_major shared offset=188 ) uniform highp float bad2, layout(row_major packed offset=192 ) uniform highp float bad3}) 0:46 Constant: 0:46 0 (const int) -0:46 M2: direct index for structure (layout(column_major std140 ) uniform highp 4X4 matrix of float) -0:46 'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 ) centroid uniform highp float badf, layout(row_major std140 ) uniform highp float badg, layout(row_major std140 ) uniform highp float bad1, layout(row_major shared ) uniform highp float bad2, layout(row_major packed ) uniform highp float bad3}) +0:46 M2: direct index for structure (layout(column_major std140 offset=64 ) uniform highp 4X4 matrix of float) +0:46 'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 offset=0 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 offset=64 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 offset=128 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 offset=176 ) centroid uniform highp float badf, layout(row_major std140 offset=180 ) uniform highp float badg, layout(row_major std140 offset=184 ) uniform highp float bad1, layout(row_major shared offset=188 ) uniform highp float bad2, layout(row_major packed offset=192 ) uniform highp float bad3}) 0:46 Constant: 0:46 1 (const int) 0:46 M4: direct index for structure (layout(row_major shared ) uniform highp 4X4 matrix of float) @@ -56,8 +56,8 @@ ERROR: node is still EOpNull! 0:47 'color' (smooth out highp 3-component vector of float) 0:47 vector-times-matrix (highp 3-component vector of float) 0:47 'c' (layout(location=7 ) in highp 3-component vector of float) -0:47 N1: direct index for structure (layout(row_major std140 ) uniform highp 3X3 matrix of float) -0:47 'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 ) centroid uniform highp float badf, layout(row_major std140 ) uniform highp float badg, layout(row_major std140 ) uniform highp float bad1, layout(row_major shared ) uniform highp float bad2, layout(row_major packed ) uniform highp float bad3}) +0:47 N1: direct index for structure (layout(row_major std140 offset=128 ) uniform highp 3X3 matrix of float) +0:47 'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 offset=0 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 offset=64 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 offset=128 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 offset=176 ) centroid uniform highp float badf, layout(row_major std140 offset=180 ) uniform highp float badg, layout(row_major std140 offset=184 ) uniform highp float bad1, layout(row_major shared offset=188 ) uniform highp float bad2, layout(row_major packed offset=192 ) uniform highp float bad3}) 0:47 Constant: 0:47 2 (const int) 0:? Linker Objects @@ -68,7 +68,7 @@ ERROR: node is still EOpNull! 0:? 'pos' (smooth out highp 4-component vector of float) 0:? 'color' (smooth out highp 3-component vector of float) 0:? 'badm4' (layout(column_major shared ) uniform highp 4X4 matrix of float) -0:? 'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 ) centroid uniform highp float badf, layout(row_major std140 ) uniform highp float badg, layout(row_major std140 ) uniform highp float bad1, layout(row_major shared ) uniform highp float bad2, layout(row_major packed ) uniform highp float bad3}) +0:? 'tblock' (layout(row_major std140 ) uniform block{layout(row_major std140 offset=0 ) uniform highp 4X4 matrix of float M1, layout(column_major std140 offset=64 ) uniform highp 4X4 matrix of float M2, layout(row_major std140 offset=128 ) uniform highp 3X3 matrix of float N1, layout(row_major std140 offset=176 ) centroid uniform highp float badf, layout(row_major std140 offset=180 ) uniform highp float badg, layout(row_major std140 offset=184 ) uniform highp float bad1, layout(row_major shared offset=188 ) uniform highp float bad2, layout(row_major packed offset=192 ) uniform highp float bad3}) 0:? '__anon__0' (layout(row_major shared ) uniform block{layout(row_major shared ) uniform bool b, layout(row_major shared ) uniform highp 4X4 matrix of float t2m}) 0:? '__anon__2' (out block{out highp float f}) 0:? 'badoutA' (layout(location=10 ) smooth out highp 4-component vector of float) diff --git a/Test/baseResults/440.frag.out b/Test/baseResults/440.frag.out index b81870cb1..c020f070d 100644 --- a/Test/baseResults/440.frag.out +++ b/Test/baseResults/440.frag.out @@ -39,8 +39,13 @@ ERROR: 0:58: 'align' : can only be used with std140 or std430 layout packing ERROR: 0:63: 'align' : can only be used with std140 or std430 layout packing ERROR: 0:62: 'layout' : offset/align can only be used on a uniform or buffer ERROR: 0:63: 'layout' : offset/align can only be used on a uniform or buffer -ERROR: 0:87: 'align' : must be a power of 2 -ERROR: 40 compilation errors. No code generated. +ERROR: 0:84: 'align' : must be a power of 2 +ERROR: 0:83: 'offset' : cannot lie in previous members +ERROR: 0:85: 'offset' : must be a multiple of the member's alignment +ERROR: 0:103: 'align' : must be a power of 2 +ERROR: 0:102: 'offset' : cannot lie in previous members +ERROR: 0:104: 'offset' : must be a multiple of the member's alignment +ERROR: 45 compilation errors. No code generated. ERROR: node is still EOpNull! @@ -56,7 +61,7 @@ ERROR: node is still EOpNull! 0:? 'inst1' (layout(column_major shared offset=12 ) uniform block{layout(column_major shared ) uniform int a}) 0:? 'inst2' (layout(offset=12 ) in block{in int a}) 0:? 'inst3' (layout(offset=12 ) out block{out int a}) -0:? 'inst4' (layout(column_major std140 align=16 ) uniform block{layout(column_major std140 align=16 ) uniform int a}) +0:? 'inst4' (layout(column_major std140 align=16 ) uniform block{layout(column_major std140 offset=0 align=16 ) uniform int a}) 0:? 'inst8' (layout(column_major shared align=16 ) uniform block{layout(column_major shared ) uniform int a}) 0:? 'inst5' (layout(align=16 ) in block{in int a}) 0:? 'inst6' (layout(align=16 ) out block{out int a}) @@ -71,7 +76,12 @@ ERROR: node is still EOpNull! 0:? 'inst10' (in block{layout(offset=12 ) in float f, layout(align=4 ) in float g}) 0:? 'inst9' (layout(column_major std430 align=32 ) uniform block{layout(column_major std430 align=32 ) uniform float e, layout(column_major std430 offset=12 align=4 ) uniform float f, layout(column_major std430 offset=20 align=32 ) uniform float g, layout(column_major std430 align=32 ) uniform float h}) 0:? 'inst11' (layout(column_major std430 ) uniform block{layout(column_major std430 offset=12 align=4 ) uniform float f, layout(column_major std430 ) uniform float g}) -0:? 'specExample' (layout(column_major std140 ) uniform block{layout(column_major std140 ) uniform 4-component vector of float a, layout(column_major std140 offset=20 ) uniform 3-component vector of float b, layout(column_major std140 offset=40 ) uniform 2-component vector of float c, layout(column_major std140 offset=48 ) uniform 2-component vector of float d, layout(column_major std140 align=16 ) uniform float e, layout(column_major std140 align=2 ) uniform double f, layout(column_major std140 ) uniform double g, layout(column_major std140 offset=80 ) uniform float h, layout(column_major std140 align=64 ) uniform 3-component vector of double i, layout(column_major std140 offset=153 align=8 ) uniform float j}) +0:? 'specExampleErrors' (layout(column_major std140 ) uniform block{layout(column_major std140 offset=0 ) uniform 4-component vector of float a, layout(column_major std140 offset=32 ) uniform 3-component vector of float b, layout(column_major std140 offset=48 ) uniform 2-component vector of float c, layout(column_major std140 offset=56 ) uniform double g, layout(column_major std140 offset=72 ) uniform double h}) +0:? 'specExample' (layout(column_major std140 ) uniform block{layout(column_major std140 offset=0 ) uniform 4-component vector of float a, layout(column_major std140 offset=32 ) uniform 3-component vector of float b, layout(column_major std140 offset=48 ) uniform 2-component vector of float d, layout(column_major std140 offset=64 align=16 ) uniform float e, layout(column_major std140 offset=72 align=2 ) uniform double f, layout(column_major std140 offset=80 ) uniform float h, layout(column_major std140 offset=128 align=64 ) uniform 3-component vector of double i, layout(column_major std140 offset=168 align=8 ) uniform float j}) +0:? 'specExampleErrors430' (layout(column_major std430 ) uniform block{layout(column_major std430 offset=0 ) uniform 4-component vector of float a, layout(column_major std430 offset=32 ) uniform 3-component vector of float b, layout(column_major std430 offset=48 ) uniform 2-component vector of float c, layout(column_major std430 offset=56 ) uniform double g, layout(column_major std430 offset=72 ) uniform double h}) +0:? 'specExample430' (layout(column_major std430 ) uniform block{layout(column_major std430 offset=0 ) uniform 4-component vector of float a, layout(column_major std430 offset=32 ) uniform 3-component vector of float b, layout(column_major std430 offset=48 ) uniform 2-component vector of float d, layout(column_major std430 offset=64 align=16 ) uniform float e, layout(column_major std430 offset=72 align=2 ) uniform double f, layout(column_major std430 offset=80 ) uniform float h, layout(column_major std430 offset=128 align=64 ) uniform 3-component vector of double i, layout(column_major std430 offset=168 align=8 ) uniform float j}) +0:? 'specExample4300' (layout(column_major std430 align=128 ) uniform block{layout(column_major std430 offset=0 align=128 ) uniform 4-component vector of float a, layout(column_major std430 offset=128 align=128 ) uniform 3-component vector of float b, layout(column_major std430 offset=256 align=128 ) uniform 2-component vector of float d, layout(column_major std430 offset=384 align=128 ) uniform float e, layout(column_major std430 offset=512 align=128 ) uniform double f, layout(column_major std430 offset=640 align=128 ) uniform float h, layout(column_major std430 offset=768 align=128 ) uniform 3-component vector of double i}) +0:? 'specExample4301' (layout(column_major std430 align=128 ) uniform block{layout(column_major std430 offset=0 align=128 ) uniform 4-component vector of float a, layout(column_major std430 offset=128 align=128 ) uniform 3-component vector of float b, layout(column_major std430 offset=256 align=128 ) uniform 2-component vector of float d, layout(column_major std430 offset=512 align=128 ) uniform float e, layout(column_major std430 offset=520 align=8 ) uniform double f, layout(column_major std430 offset=640 align=128 ) uniform float h, layout(column_major std430 offset=768 align=128 ) uniform 3-component vector of double i}) Linked fragment stage: diff --git a/Test/baseResults/specExamples.vert.out b/Test/baseResults/specExamples.vert.out index b69695a9c..0a7c408d9 100644 --- a/Test/baseResults/specExamples.vert.out +++ b/Test/baseResults/specExamples.vert.out @@ -289,7 +289,7 @@ ERROR: node is still EOpNull! 0:? 'var5' (smooth out 4-component vector of float) 0:? '__anon__2' (out block{out 4-component vector of float var6}) 0:? 'var7' (smooth out 4-component vector of float) -0:? '__anon__3' (layout(row_major std140 ) uniform block{layout(row_major std140 ) uniform 4X4 matrix of float M1, layout(column_major std140 ) uniform 4X4 matrix of float M2, layout(row_major std140 ) uniform 3X3 matrix of float N1}) +0:? '__anon__3' (layout(row_major std140 ) uniform block{layout(row_major std140 offset=0 ) uniform 4X4 matrix of float M1, layout(column_major std140 offset=64 ) uniform 4X4 matrix of float M2, layout(row_major std140 offset=128 ) uniform 3X3 matrix of float N1}) 0:? '__anon__4' (layout(column_major shared ) uniform block{layout(column_major shared ) uniform 4X4 matrix of float M13, layout(row_major shared ) uniform 4X4 matrix of float m14, layout(column_major shared ) uniform 3X3 matrix of float N12}) 0:? 's17' (layout(binding=3 ) uniform sampler2D) 0:? 'a2' (layout(binding=2 offset=4 ) uniform int) diff --git a/glslang/Include/Types.h b/glslang/Include/Types.h index a7435d00c..3a712cbbc 100644 --- a/glslang/Include/Types.h +++ b/glslang/Include/Types.h @@ -42,7 +42,7 @@ namespace glslang { -const int GlslangMaxTypeLength = 200; +const int GlslangMaxTypeLength = 200; // TODO: need to print block/struct one member per line, so this can stay bounded // // Details within a sampler type @@ -643,7 +643,7 @@ public: // for "empty" type (no args) or simple scalar/vector/matrix explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0) : basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), arraySizes(0), - structure(0), structureSize(0), fieldName(0), typeName(0) + structure(0), fieldName(0), typeName(0) { sampler.clear(); qualifier.clear(); @@ -652,7 +652,7 @@ public: // for explicit precision qualifier TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0) : basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), arraySizes(0), - structure(0), structureSize(0), fieldName(0), typeName(0) + structure(0), fieldName(0), typeName(0) { sampler.clear(); qualifier.clear(); @@ -663,7 +663,7 @@ public: // for turning a TPublicType into a TType explicit TType(const TPublicType& p) : basicType(p.basicType), vectorSize(p.vectorSize), matrixCols(p.matrixCols), matrixRows(p.matrixRows), arraySizes(p.arraySizes), - structure(0), structureSize(0), fieldName(0), typeName(0) + structure(0), fieldName(0), typeName(0) { if (basicType == EbtSampler) sampler = p.sampler; @@ -723,7 +723,6 @@ public: matrixRows = copyOf.matrixRows; arraySizes = copyOf.arraySizes; structure = copyOf.structure; - structureSize = copyOf.structureSize; fieldName = copyOf.fieldName; typeName = copyOf.typeName; } @@ -1015,24 +1014,25 @@ public: TTypeList* getStruct() { return structure; } TTypeList* getStruct() const { return structure; } - int getObjectSize() const + int computeNumComponents() const { - int totalSize; + int components = 0; - if (getBasicType() == EbtStruct || getBasicType() == EbtBlock) - totalSize = getStructSize(); - else if (matrixCols) - totalSize = matrixCols * matrixRows; + if (getBasicType() == EbtStruct || getBasicType() == EbtBlock) { + for (TTypeList::iterator tl = getStruct()->begin(); tl != getStruct()->end(); tl++) + components += ((*tl).type)->computeNumComponents(); + } else if (matrixCols) + components = matrixCols * matrixRows; else - totalSize = vectorSize; + components = vectorSize; if (isArray()) { // this function can only be used in paths that don't allow unsized arrays assert(getArraySize() > 0); - totalSize *= getArraySize(); + components *= getArraySize(); } - return totalSize; + return components; } // append this type's mangled name to the passed in 'name' @@ -1117,7 +1117,6 @@ protected: TType& operator=(const TType& type); void buildMangledName(TString&); - int getStructSize() const; TBasicType basicType : 8; int vectorSize : 4; @@ -1129,7 +1128,6 @@ protected: TArraySizes* arraySizes; TTypeList* structure; // 0 unless this is a struct - mutable int structureSize; // a cache, updated on first access TString *fieldName; // for structure field names TString *typeName; // for structure type name }; diff --git a/glslang/Include/revision.h b/glslang/Include/revision.h index 1a2b8fe55..a8b56f3c3 100644 --- a/glslang/Include/revision.h +++ b/glslang/Include/revision.h @@ -9,5 +9,5 @@ // source have to figure out how to create revision.h just to get a build // going. However, if it is not updated, it can be a version behind. -#define GLSLANG_REVISION "25043" -#define GLSLANG_DATE "2014/01/27 13:02:12" +#define GLSLANG_REVISION "25092" +#define GLSLANG_DATE "2014/01/28 14:13:59" diff --git a/glslang/MachineIndependent/Constant.cpp b/glslang/MachineIndependent/Constant.cpp index 801cf1d10..4648b559e 100644 --- a/glslang/MachineIndependent/Constant.cpp +++ b/glslang/MachineIndependent/Constant.cpp @@ -101,50 +101,52 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TIntermTyped* const TConstUnionArray rightUnionArray = node->getConstArray(); // Figure out the size of the result - int objectSize; + int newComps; + int constComps; switch(op) { case EOpMatrixTimesMatrix: - objectSize = getMatrixRows() * node->getMatrixCols(); + newComps = getMatrixRows() * node->getMatrixCols(); break; case EOpMatrixTimesVector: - objectSize = getMatrixRows(); + newComps = getMatrixRows(); break; case EOpVectorTimesMatrix: - objectSize = node->getMatrixCols(); + newComps = node->getMatrixCols(); break; default: - objectSize = getType().getObjectSize(); - if (constantNode->getType().getObjectSize() == 1 && getType().getObjectSize() > 1) { + newComps = getType().computeNumComponents(); + constComps = constantNode->getType().computeNumComponents(); + if (constComps == 1 && newComps > 1) { // for a case like vec4 f = vec4(2,3,4,5) + 1.2; - TConstUnionArray smearedArray(objectSize, node->getConstArray()[0]); + TConstUnionArray smearedArray(newComps, node->getConstArray()[0]); rightUnionArray = smearedArray; - } else if (constantNode->getType().getObjectSize() > 1 && getType().getObjectSize() == 1) { + } else if (constComps > 1 && newComps == 1) { // for a case like vec4 f = 1.2 + vec4(2,3,4,5); - objectSize = constantNode->getType().getObjectSize(); + newComps = constComps; rightUnionArray = node->getConstArray(); - TConstUnionArray smearedArray(objectSize, getConstArray()[0]); + TConstUnionArray smearedArray(newComps, getConstArray()[0]); unionArray = smearedArray; returnType.shallowCopy(node->getType()); } break; } - TConstUnionArray newConstArray(objectSize); + TConstUnionArray newConstArray(newComps); switch(op) { case EOpAdd: - for (int i = 0; i < objectSize; i++) + for (int i = 0; i < newComps; i++) newConstArray[i] = unionArray[i] + rightUnionArray[i]; break; case EOpSub: - for (int i = 0; i < objectSize; i++) + for (int i = 0; i < newComps; i++) newConstArray[i] = unionArray[i] - rightUnionArray[i]; break; case EOpMul: case EOpVectorTimesScalar: case EOpMatrixTimesScalar: - for (int i = 0; i < objectSize; i++) + for (int i = 0; i < newComps; i++) newConstArray[i] = unionArray[i] * rightUnionArray[i]; break; case EOpMatrixTimesMatrix: @@ -159,7 +161,7 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TIntermTyped* const returnType.shallowCopy(TType(getType().getBasicType(), EvqConst, 0, getMatrixRows(), node->getMatrixCols())); break; case EOpDiv: - for (int i = 0; i < objectSize; i++) { + for (int i = 0; i < newComps; i++) { switch (getType().getBasicType()) { case EbtDouble: case EbtFloat: @@ -211,7 +213,7 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TIntermTyped* const break; case EOpMod: - for (int i = 0; i < objectSize; i++) { + for (int i = 0; i < newComps; i++) { if (rightUnionArray[i] == 0) newConstArray[i] = unionArray[i]; else @@ -220,40 +222,40 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TIntermTyped* const break; case EOpRightShift: - for (int i = 0; i < objectSize; i++) + for (int i = 0; i < newComps; i++) newConstArray[i] = unionArray[i] >> rightUnionArray[i]; break; case EOpLeftShift: - for (int i = 0; i < objectSize; i++) + for (int i = 0; i < newComps; i++) newConstArray[i] = unionArray[i] << rightUnionArray[i]; break; case EOpAnd: - for (int i = 0; i < objectSize; i++) + for (int i = 0; i < newComps; i++) newConstArray[i] = unionArray[i] & rightUnionArray[i]; break; case EOpInclusiveOr: - for (int i = 0; i < objectSize; i++) + for (int i = 0; i < newComps; i++) newConstArray[i] = unionArray[i] | rightUnionArray[i]; break; case EOpExclusiveOr: - for (int i = 0; i < objectSize; i++) + for (int i = 0; i < newComps; i++) newConstArray[i] = unionArray[i] ^ rightUnionArray[i]; break; case EOpLogicalAnd: // this code is written for possible future use, will not get executed currently - for (int i = 0; i < objectSize; i++) + for (int i = 0; i < newComps; i++) newConstArray[i] = unionArray[i] && rightUnionArray[i]; break; case EOpLogicalOr: // this code is written for possible future use, will not get executed currently - for (int i = 0; i < objectSize; i++) + for (int i = 0; i < newComps; i++) newConstArray[i] = unionArray[i] || rightUnionArray[i]; break; case EOpLogicalXor: - for (int i = 0; i < objectSize; i++) { + for (int i = 0; i < newComps; i++) { switch (getType().getBasicType()) { case EbtBool: newConstArray[i].setBConst((unionArray[i] == rightUnionArray[i]) ? false : true); break; default: assert(false && "Default missing"); @@ -309,6 +311,7 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TType& returnType) int resultSize; bool componentWise = true; + int objectSize = getType().computeNumComponents(); switch (op) { case EOpDeterminant: case EOpAny: @@ -339,18 +342,17 @@ TIntermTyped* TIntermConstantUnion::fold(TOperator op, const TType& returnType) case EOpNormalize: componentWise = false; - resultSize = getType().getObjectSize(); + resultSize = objectSize; break; default: - resultSize = getType().getObjectSize(); + resultSize = objectSize; break; } // Set up for processing TConstUnionArray newConstArray(resultSize); const TConstUnionArray& unionArray = getConstArray(); - int objectSize = getType().getObjectSize(); // Process non-component-wise operations switch (op) { @@ -593,13 +595,13 @@ TIntermTyped* TIntermediate::fold(TIntermAggregate* aggrNode) case EOpVectorEqual: case EOpVectorNotEqual: componentwise = true; - objectSize = children[0]->getAsConstantUnion()->getType().getObjectSize(); + objectSize = children[0]->getAsConstantUnion()->getType().computeNumComponents(); break; case EOpCross: case EOpReflect: case EOpRefract: case EOpFaceForward: - objectSize = children[0]->getAsConstantUnion()->getType().getObjectSize(); + objectSize = children[0]->getAsConstantUnion()->getType().computeNumComponents(); break; case EOpDistance: case EOpDot: @@ -726,7 +728,7 @@ TIntermTyped* TIntermediate::fold(TIntermAggregate* aggrNode) } else { // Non-componentwise... - int numComps = children[0]->getAsConstantUnion()->getType().getObjectSize(); + int numComps = children[0]->getAsConstantUnion()->getType().computeNumComponents(); double dot; switch (aggrNode->getOp()) { @@ -788,7 +790,7 @@ TIntermTyped* TIntermediate::fold(TIntermAggregate* aggrNode) case EOpOuterProduct: { int numRows = numComps; - int numCols = children[1]->getAsConstantUnion()->getType().getObjectSize(); + int numCols = children[1]->getAsConstantUnion()->getType().computeNumComponents(); for (int row = 0; row < numRows; ++row) for (int col = 0; col < numCols; ++col) newConstArray[col * numRows + row] = childConstUnions[0][row] * childConstUnions[1][col]; @@ -828,7 +830,7 @@ TIntermTyped* TIntermediate::foldConstructor(TIntermAggregate* aggrNode) { bool error = false; - TConstUnionArray unionArray(aggrNode->getType().getObjectSize()); + TConstUnionArray unionArray(aggrNode->getType().computeNumComponents()); if (aggrNode->getSequence().size() == 1) error = parseConstTree(aggrNode, unionArray, aggrNode->getOp(), aggrNode->getType(), true); else @@ -850,13 +852,13 @@ TIntermTyped* TIntermediate::foldDereference(TIntermTyped* node, int index, TSou TType dereferencedType(node->getType(), index); dereferencedType.getQualifier().storage = EvqConst; TIntermTyped* result = 0; - int size = dereferencedType.getObjectSize(); + int size = dereferencedType.computeNumComponents(); int start; if (node->isStruct()) { start = 0; for (int i = 0; i < index; ++i) - start += (*node->getType().getStruct())[i].type->getObjectSize(); + start += (*node->getType().getStruct())[i].type->computeNumComponents(); } else start = size * index; diff --git a/glslang/MachineIndependent/Intermediate.cpp b/glslang/MachineIndependent/Intermediate.cpp index 952d103e7..80c98f1be 100644 --- a/glslang/MachineIndependent/Intermediate.cpp +++ b/glslang/MachineIndependent/Intermediate.cpp @@ -1415,7 +1415,7 @@ void TIntermTyped::propagatePrecision(TPrecisionQualifier newPrecision) TIntermTyped* TIntermediate::promoteConstantUnion(TBasicType promoteTo, TIntermConstantUnion* node) { const TConstUnionArray& rightUnionArray = node->getConstArray(); - int size = node->getType().getObjectSize(); + int size = node->getType().computeNumComponents(); TConstUnionArray leftUnionArray(size); diff --git a/glslang/MachineIndependent/ParseHelper.cpp b/glslang/MachineIndependent/ParseHelper.cpp index 34699c543..91ef2dd2a 100644 --- a/glslang/MachineIndependent/ParseHelper.cpp +++ b/glslang/MachineIndependent/ParseHelper.cpp @@ -1602,13 +1602,13 @@ bool TParseContext::constructorError(TSourceLoc loc, TIntermNode* node, TFunctio bool matrixInMatrix = false; bool arrayArg = false; for (int i = 0; i < function.getParamCount(); ++i) { - size += function[i].type->getObjectSize(); + size += function[i].type->computeNumComponents(); if (constructingMatrix && function[i].type->isMatrix()) matrixInMatrix = true; if (full) overFull = true; - if (op != EOpConstructStruct && ! type.isArray() && size >= type.getObjectSize()) + if (op != EOpConstructStruct && ! type.isArray() && size >= type.computeNumComponents()) full = true; if (function[i].type->getQualifier().storage != EvqConst) constType = false; @@ -1649,8 +1649,8 @@ bool TParseContext::constructorError(TSourceLoc loc, TIntermNode* node, TFunctio return true; } - if ((op != EOpConstructStruct && size != 1 && size < type.getObjectSize()) || - (op == EOpConstructStruct && size < type.getObjectSize())) { + if ((op != EOpConstructStruct && size != 1 && size < type.computeNumComponents()) || + (op == EOpConstructStruct && size < type.computeNumComponents())) { error(loc, "not enough data provided for construction", "constructor", ""); return true; } @@ -4097,10 +4097,58 @@ void TParseContext::fixBlockXfbOffsets(TSourceLoc loc, TQualifier& qualifier, TT qualifier.layoutXfbOffset = TQualifier::layoutXfbOffsetEnd; } +// Calculate and save the offset of each block member, using the recursively +// defined block offset rules and the user-provided offset and align. +// +// Also, compute and save the total size of the block. For the block's size, arrayness +// is not taken into account, as each element is backed by a separate buffer. +// void TParseContext::fixBlockUniformOffsets(TSourceLoc loc, TQualifier& qualifier, TTypeList& typeList) { - if (qualifier.storage != EvqUniform || qualifier.storage != EvqBuffer) + if (qualifier.storage != EvqUniform && qualifier.storage != EvqBuffer) return; + if (qualifier.layoutPacking != ElpStd140 && qualifier.layoutPacking != ElpStd430) + return; + + int offset = 0; + int memberSize; + for (unsigned int member = 0; member < typeList.size(); ++member) { + TQualifier& memberQualifier = typeList[member].type->getQualifier(); + TSourceLoc memberLoc = typeList[member].loc; + + // "When align is applied to an array, it effects only the start of the array, not the array's internal stride." + + int memberAlignment = intermediate.getBaseAlignment(*typeList[member].type, memberSize, qualifier.layoutPacking == ElpStd140); + if (memberQualifier.hasOffset()) { + // "The specified offset must be a multiple + // of the base alignment of the type of the block member it qualifies, or a compile-time error results." + if (! IsMultipleOfPow2(memberQualifier.layoutOffset, memberAlignment)) + error(memberLoc, "must be a multiple of the member's alignment", "offset", ""); + + // "It is a compile-time error to specify an offset that is smaller than the offset of the previous + // member in the block or that lies within the previous member of the block" + if (memberQualifier.layoutOffset < offset) + error(memberLoc, "cannot lie in previous members", "offset", ""); + + // "The offset qualifier forces the qualified member to start at or after the specified + // integral-constant expression, which will be its byte offset from the beginning of the buffer. + // "The actual offset of a member is computed as + // follows: If offset was declared, start with that offset, otherwise start with the next available offset." + offset = std::max(offset, memberQualifier.layoutOffset); + } + + // "The actual alignment of a member will be the greater of the specified align alignment and the standard + // (e.g., std140) base alignment for the member's type." + if (memberQualifier.hasAlign()) + memberAlignment = std::max(memberAlignment, memberQualifier.layoutAlign); + + // "If the resulting offset is not a multiple of the actual alignment, + // increase it to the first offset that is a multiple of + // the actual alignment." + RoundToPow2(offset, memberAlignment); + typeList[member].type->getQualifier().layoutOffset = offset; + offset += memberSize; + } } // For an identifier that is already declared, add more qualification to it. diff --git a/glslang/MachineIndependent/SymbolTable.cpp b/glslang/MachineIndependent/SymbolTable.cpp index 1384c4660..9a2f3b25b 100644 --- a/glslang/MachineIndependent/SymbolTable.cpp +++ b/glslang/MachineIndependent/SymbolTable.cpp @@ -118,20 +118,6 @@ void TType::buildMangledName(TString& mangledName) } } -int TType::getStructSize() const -{ - if (! isStruct()) { - assert(false && "Not a struct"); - return 0; - } - - if (structureSize == 0) - for (TTypeList::iterator tl = getStruct()->begin(); tl != getStruct()->end(); tl++) - structureSize += ((*tl).type)->getObjectSize(); - - return structureSize; -} - // // Dump functions. // @@ -256,7 +242,6 @@ TVariable::TVariable(const TVariable& copyOf) : TSymbol(copyOf) if (! copyOf.unionArray.empty()) { assert(! copyOf.type.isStruct()); - assert(copyOf.type.getObjectSize() == 1); TConstUnionArray newArray(1); newArray[0] = copyOf.unionArray[0]; unionArray = newArray; diff --git a/glslang/MachineIndependent/intermOut.cpp b/glslang/MachineIndependent/intermOut.cpp index f51035340..9f89cadba 100644 --- a/glslang/MachineIndependent/intermOut.cpp +++ b/glslang/MachineIndependent/intermOut.cpp @@ -411,7 +411,7 @@ bool TOutputTraverser::visitSelection(TVisit /* visit */, TIntermSelection* node void OutputConstantUnion(TInfoSink& out, const TIntermTyped* node, const TConstUnionArray& constUnion, int depth) { - int size = node->getType().getObjectSize(); + int size = node->getType().computeNumComponents(); for (int i = 0; i < size; i++) { OutputTreeText(out, node, depth); diff --git a/glslang/MachineIndependent/linkValidate.cpp b/glslang/MachineIndependent/linkValidate.cpp index 491b637aa..ec32c9009 100644 --- a/glslang/MachineIndependent/linkValidate.cpp +++ b/glslang/MachineIndependent/linkValidate.cpp @@ -60,6 +60,10 @@ void TIntermediate::error(TInfoSink& infoSink, const char* message) ++numErrors; } +// TODO: 4.4 offset/align: "Two blocks linked together in the same program with the same block +// name must have the exact same set of members qualified with offset and their integral-constant +// expression values must be the same, or a link-time error results." + // // Merge the information from 'unit' into 'this' // @@ -266,7 +270,9 @@ void TIntermediate::mergeErrorCheck(TInfoSink& infoSink, const TIntermSymbol& sy } // Layouts... - // TODO: 4.4 enhanced layouts: generalize to include offset/align + // TODO: 4.4 enhanced layouts: Generalize to include offset/align: currrent spec + // requires separate user-supplied offset from actual computed offset, but + // current implementation only has one offset. if (symbol.getQualifier().layoutMatrix != unitSymbol.getQualifier().layoutMatrix || symbol.getQualifier().layoutPacking != unitSymbol.getQualifier().layoutPacking || symbol.getQualifier().layoutLocation != unitSymbol.getQualifier().layoutLocation || @@ -321,7 +327,7 @@ void TIntermediate::finalCheck(TInfoSink& infoSink) // "It is a compile-time or link-time error to have // any xfb_offset that overflows xfb_stride, whether stated on declarations before or after the xfb_stride, or - // in different compilation units. While xfb_stridecan be declared multiple times for the same buffer, it is a + // in different compilation units. While xfb_stride can be declared multiple times for the same buffer, it is a // compile-time or link-time error to have different values specified for the stride for the same buffer." if (xfbBuffers[b].stride != TQualifier::layoutXfbStrideEnd && xfbBuffers[b].implicitStride > xfbBuffers[b].stride) { error(infoSink, "xfb_stride is too small to hold all buffer entries:"); @@ -740,4 +746,153 @@ unsigned int TIntermediate::computeTypeXfbSize(const TType& type, bool& contains return 4 * numComponents; } +const int baseAlignmentVec4Std140 = 16; + +// Return the size and alignment of a scalar. +// The size is returned in the 'size' parameter +// Return value is the alignment of the type. +int TIntermediate::getBaseAlignmentScalar(const TType& type, int& size) const +{ + switch (type.getBasicType()) { + case EbtDouble: size = 8; return 8; + default: size = 4; return 4; + } +} + +// Implement base-alignment and size rules from section 7.6.2.2 Standard Uniform Block Layout +// Operates recursively. +// +// If std140 is true, it does the rounding up to vec4 size required by std140, +// otherwise it does not, yielding std430 rules. +// +// The size is returned in the 'size' parameter +// Return value is the alignment of the type. +int TIntermediate::getBaseAlignment(const TType& type, int& size, bool std140) const +{ + int alignment; + + // When using the std140 storage layout, structures will be laid out in buffer + // storage with its members stored in monotonically increasing order based on their + // location in the declaration. A structure and each structure member have a base + // offset and a base alignment, from which an aligned offset is computed by rounding + // the base offset up to a multiple of the base alignment. The base offset of the first + // member of a structure is taken from the aligned offset of the structure itself. The + // base offset of all other structure members is derived by taking the offset of the + // last basic machine unit consumed by the previous member and adding one. Each + // structure member is stored in memory at its aligned offset. The members of a top- + // level uniform block are laid out in buffer storage by treating the uniform block as + // a structure with a base offset of zero. + // + // 1. If the member is a scalar consuming N basic machine units, the base alignment is N. + // + // 2. If the member is a two- or four-component vector with components consuming N basic + // machine units, the base alignment is 2N or 4N, respectively. + // + // 3. If the member is a three-component vector with components consuming N + // basic machine units, the base alignment is 4N. + // + // 4. If the member is an array of scalars or vectors, the base alignment and array + // stride are set to match the base alignment of a single array element, according + // to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The + // array may have padding at the end; the base offset of the member following + // the array is rounded up to the next multiple of the base alignment. + // + // 5. If the member is a column-major matrix with C columns and R rows, the + // matrix is stored identically to an array of C column vectors with R + // components each, according to rule (4). + // + // 6. If the member is an array of S column-major matrices with C columns and + // R rows, the matrix is stored identically to a row of S C column vectors + // with R components each, according to rule (4). + // + // 7. If the member is a row-major matrix with C columns and R rows, the matrix + // is stored identically to an array of R row vectors with C components each, + // according to rule (4). + // + // 8. If the member is an array of S row-major matrices with C columns and R + // rows, the matrix is stored identically to a row of S R row vectors with C + // components each, according to rule (4). + // + // 9. If the member is a structure, the base alignment of the structure is N , where + // N is the largest base alignment value of any of its members, and rounded + // up to the base alignment of a vec4. The individual members of this substructure + // are then assigned offsets by applying this set of rules recursively, + // where the base offset of the first member of the sub-structure is equal to the + // aligned offset of the structure. The structure may have padding at the end; + // the base offset of the member following the sub-structure is rounded up to + // the next multiple of the base alignment of the structure. + // + // 10. If the member is an array of S structures, the S elements of the array are laid + // out in order, according to rule (9). + + // rules 4, 6, and 8 + if (type.isArray()) { + TType derefType(type, 0); + alignment = getBaseAlignment(derefType, size, std140); + if (std140) + alignment = std::max(baseAlignmentVec4Std140, alignment); + RoundToPow2(size, alignment); + size *= type.getArraySize(); + return alignment; + } + + // rule 9 + if (type.getBasicType() == EbtStruct) { + const TTypeList& memberList = *type.getStruct(); + + size = 0; + int maxAlignment = std140 ? baseAlignmentVec4Std140 : 0; + for (size_t m = 0; m < memberList.size(); ++m) { + int memberSize; + int memberAlignment = getBaseAlignment(*memberList[m].type, memberSize, std140); + maxAlignment = std::max(maxAlignment, memberAlignment); + RoundToPow2(size, memberAlignment); + size += memberSize; + } + + return maxAlignment; + } + + // rule 1 + if (type.isScalar()) + return getBaseAlignmentScalar(type, size); + + // rules 2 and 3 + if (type.isVector()) { + int scalarAlign = getBaseAlignmentScalar(type, size); + switch (type.getVectorSize()) { + case 2: + size *= 2; + return 2 * scalarAlign; + default: + size *= type.getVectorSize(); + return 4 * scalarAlign; + } + } + + // rules 5 and 7 + if (type.isMatrix()) { + TType derefType(type, 0); + + // rule 5: deref to row, not to column, meaning the size of vector is num columns instead of num rows + if (type.getQualifier().layoutMatrix == ElmRowMajor) + derefType.setElementType(derefType.getBasicType(), type.getMatrixCols(), 0, 0, 0); + + alignment = getBaseAlignment(derefType, size, std140); + if (std140) + alignment = std::max(baseAlignmentVec4Std140, alignment); + RoundToPow2(size, alignment); + if (type.getQualifier().layoutMatrix == ElmRowMajor) + size *= type.getMatrixRows(); + else + size *= type.getMatrixCols(); + + return alignment; + } + + assert(0); // all cases should be covered above + size = baseAlignmentVec4Std140; + return baseAlignmentVec4Std140; +} + } // end namespace glslang diff --git a/glslang/MachineIndependent/localintermediate.h b/glslang/MachineIndependent/localintermediate.h index c3d7588a8..9bb17099e 100644 --- a/glslang/MachineIndependent/localintermediate.h +++ b/glslang/MachineIndependent/localintermediate.h @@ -239,6 +239,7 @@ public: } int addXfbBufferOffset(const TType&); unsigned int computeTypeXfbSize(const TType&, bool& containsDouble) const; + int getBaseAlignment(const TType&, int& size, bool std140) const; protected: void error(TInfoSink& infoSink, const char*); @@ -249,6 +250,7 @@ protected: void inOutLocationCheck(TInfoSink&); TIntermSequence& findLinkerObjects() const; bool userOutputUsed() const; + int getBaseAlignmentScalar(const TType&, int& size) const; protected: const EShLanguage language; diff --git a/glslang/MachineIndependent/parseConst.cpp b/glslang/MachineIndependent/parseConst.cpp index ac643b680..978dc8a41 100644 --- a/glslang/MachineIndependent/parseConst.cpp +++ b/glslang/MachineIndependent/parseConst.cpp @@ -82,7 +82,7 @@ bool TConstTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node) if (flag) { singleConstantParam = true; constructorType = node->getOp(); - size = node->getType().getObjectSize(); + size = node->getType().computeNumComponents(); if (node->getType().isMatrix()) { isMatrix = true; @@ -115,13 +115,13 @@ bool TConstTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node) void TConstTraverser::visitConstantUnion(TIntermConstantUnion* node) { TConstUnionArray leftUnionArray(unionArray); - int instanceSize = type.getObjectSize(); + int instanceSize = type.computeNumComponents(); if (index >= instanceSize) return; if (! singleConstantParam) { - int rightUnionSize = node->getType().getObjectSize(); + int rightUnionSize = node->getType().computeNumComponents(); const TConstUnionArray& rightUnionArray = node->getConstArray(); for (int i = 0; i < rightUnionSize; i++) { @@ -136,6 +136,7 @@ void TConstTraverser::visitConstantUnion(TIntermConstantUnion* node) const TConstUnionArray& rightUnionArray = node->getConstArray(); if (! isMatrix) { int count = 0; + int nodeComps = node->getType().computeNumComponents(); for (int i = index; i < endIndex; i++) { if (i >= instanceSize) return; @@ -144,7 +145,7 @@ void TConstTraverser::visitConstantUnion(TIntermConstantUnion* node) (index)++; - if (node->getType().getObjectSize() > 1) + if (nodeComps > 1) count++; } } else { @@ -169,6 +170,7 @@ void TConstTraverser::visitConstantUnion(TIntermConstantUnion* node) // matrix from vector int count = 0; const int startIndex = index; + int nodeComps = node->getType().computeNumComponents(); for (int i = startIndex; i < endIndex; i++) { if (i >= instanceSize) return; @@ -179,7 +181,7 @@ void TConstTraverser::visitConstantUnion(TIntermConstantUnion* node) index++; - if (node->getType().getObjectSize() > 1) + if (nodeComps > 1) count++; } } diff --git a/glslang/MachineIndependent/reflection.cpp b/glslang/MachineIndependent/reflection.cpp index 164ced57b..709fc45ec 100644 --- a/glslang/MachineIndependent/reflection.cpp +++ b/glslang/MachineIndependent/reflection.cpp @@ -108,139 +108,42 @@ public: } } - static const int baseAlignmentVec4Std140; - - // align a value: if 'value' is not aligned to 'alignment', move it up to a multiple of alignment - void align(int& value, int alignment) - { - int error = value % alignment; - if (error) - value += alignment - error; - } - - // return the size and alignment of a scalar - int getBaseAlignmentScalar(const TType& type, int& size) - { - switch (type.getBasicType()) { - case EbtDouble: size = 8; return 8; - default: size = 4; return 4; - } - } - - // Implement base-alignment and size rules from section 7.6.2.2 Standard Uniform Block Layout - // Operates recursively. - // If std140 is true, it does the rounding up to vec4 size required by std140, - // otherwise it does not, yielding std430 rules. - // - // Returns the size of the type. - int getBaseAlignment(const TType& type, int& size, bool std140) + // Lookup or calculate the offset of a block member, using the recursively + // defined block offset rules. + int getOffset(const TType& type, int index) { - int alignment; - - // rules 4, 6, and 8 - if (type.isArray()) { - TType derefType(type, 0); - alignment = getBaseAlignment(derefType, size, std140); - if (std140) - alignment = std::max(baseAlignmentVec4Std140, alignment); - align(size, alignment); - size *= type.getArraySize(); - return alignment; - } - - // rule 9 - if (type.getBasicType() == EbtStruct) { - const TTypeList& memberList = *type.getStruct(); - - size = 0; - int maxAlignment = std140 ? baseAlignmentVec4Std140 : 0; - for (size_t m = 0; m < memberList.size(); ++m) { - int memberSize; - int memberAlignment = getBaseAlignment(*memberList[m].type, memberSize, std140); - maxAlignment = std::max(maxAlignment, memberAlignment); - align(size, memberAlignment); - size += memberSize; - } - - return maxAlignment; - } - - // rule 1 - if (type.isScalar()) - return getBaseAlignmentScalar(type, size); - - // rules 2 and 3 - if (type.isVector()) { - int scalarAlign = getBaseAlignmentScalar(type, size); - switch (type.getVectorSize()) { - case 2: - size *= 2; - return 2 * scalarAlign; - default: - size *= type.getVectorSize(); - return 4 * scalarAlign; - } - } - - // rules 5 and 7 - if (type.isMatrix()) { - TType derefType(type, 0); - - // rule 5: deref to row, not to column, meaning the size of vector is num columns instead of num rows - if (type.getQualifier().layoutMatrix == ElmRowMajor) - derefType.setElementType(derefType.getBasicType(), type.getMatrixCols(), 0, 0, 0); - - alignment = getBaseAlignment(derefType, size, std140); - if (std140) - alignment = std::max(baseAlignmentVec4Std140, alignment); - align(size, alignment); - if (type.getQualifier().layoutMatrix == ElmRowMajor) - size *= type.getMatrixRows(); - else - size *= type.getMatrixCols(); - - return alignment; - } + const TTypeList& memberList = *type.getStruct(); - assert(0); // all cases should be covered above - size = baseAlignmentVec4Std140; - return baseAlignmentVec4Std140; - } - - // Calculate the offset of a block member, using the recursively defined - // block offset rules. - int getBlockMemberOffset(const TType& blockType, int index) - { - // TODO: reflection performance: cache intermediate results instead of recomputing them + // Don't calculate offset if one is present, it could be user supplied + // and different than what would be calculated. That is, this is faster, + // but not just an optimization. + if (memberList[index].type->getQualifier().hasOffset()) + return memberList[index].type->getQualifier().layoutOffset; - int offset = 0; - const TTypeList& memberList = *blockType.getStruct(); int memberSize; - for (int m = 0; m < index; ++m) { - int memberAlignment = getBaseAlignment(*memberList[m].type, memberSize, blockType.getQualifier().layoutPacking == ElpStd140); - align(offset, memberAlignment); - offset += memberSize; + int offset = 0; + for (int m = 0; m <= index; ++m) { + int memberAlignment = intermediate.getBaseAlignment(*memberList[m].type, memberSize, type.getQualifier().layoutPacking == ElpStd140); + RoundToPow2(offset, memberAlignment); + if (m < index) + offset += memberSize; } - int memberAlignment = getBaseAlignment(*memberList[index].type, memberSize, blockType.getQualifier().layoutPacking == ElpStd140); - align(offset, memberAlignment); return offset; } // Calculate the block data size. - // Arrayness is not taken into account, each element is backed by a separate buffer. + // Block arrayness is not taken into account, each element is backed by a separate buffer. int getBlockSize(const TType& blockType) { - int size = 0; - const TTypeList& memberList = *blockType.getStruct(); - int memberSize; - for (size_t m = 0; m < memberList.size(); ++m) { - int memberAlignment = getBaseAlignment(*memberList[m].type, memberSize, blockType.getQualifier().layoutPacking == ElpStd140); - align(size, memberAlignment); - size += memberSize; - } + const TTypeList& memberList = *blockType.getStruct(); + int lastIndex = memberList.size() - 1; + int lastOffset = getOffset(blockType, lastIndex); - return size; + int lastMemberSize; + intermediate.getBaseAlignment(*memberList[lastIndex].type, lastMemberSize, blockType.getQualifier().layoutPacking == ElpStd140); + + return lastOffset + lastMemberSize; } // Traverse the provided deref chain, including the base, and @@ -283,7 +186,7 @@ public: case EOpIndexDirectStruct: index = visitNode->getRight()->getAsConstantUnion()->getConstArray()[0].getIConst(); if (offset >= 0) - offset += getBlockMemberOffset(visitNode->getLeft()->getType(), index); + offset += getOffset(visitNode->getLeft()->getType(), index); if (name.size() > 0) name.append("."); name.append((*visitNode->getLeft()->getType().getStruct())[index].type->getFieldName()); @@ -715,8 +618,6 @@ public: std::set<const TIntermNode*> processedDerefs; }; -const int TLiveTraverser::baseAlignmentVec4Std140 = 16; - // // Implement the traversal functions of interest. // -- GitLab