From c7776ec3fd2e702c74ef5090542affc0e07da476 Mon Sep 17 00:00:00 2001 From: John Kessenich <cepheus@frii.com> Date: Sun, 26 Jan 2014 01:37:13 +0000 Subject: [PATCH] GL_ARB_enhanced_layouts, part 4: Numerical side of xfb_*: offset computation, size computation, alias detection, paddings, overflow, implicit strides, gl_Max* checks, etc. git-svn-id: https://cvs.khronos.org/svn/repos/ogl/trunk/ecosystem/public/sdk/tools/glslang@25014 e7fa87d3-cd2b-0410-9028-fcbf551c1848 --- StandAlone/StandAlone.cpp | 6 + Test/400.tesc | 4 +- Test/440.vert | 77 ++++++++- Test/baseResults/440.vert.out | 47 ++++- Test/baseResults/test.conf | 2 + glslang/Include/Common.h | 14 ++ glslang/Include/ResourceLimits.h | 2 + glslang/Include/Types.h | 54 ++++-- glslang/Include/revision.h | 4 +- glslang/MachineIndependent/ParseHelper.cpp | 100 ++++++++++- glslang/MachineIndependent/ParseHelper.h | 1 + glslang/MachineIndependent/linkValidate.cpp | 161 ++++++++++++++++-- .../MachineIndependent/localintermediate.h | 92 +++++++--- 13 files changed, 486 insertions(+), 78 deletions(-) diff --git a/StandAlone/StandAlone.cpp b/StandAlone/StandAlone.cpp index 570054ff1..e917c20b5 100644 --- a/StandAlone/StandAlone.cpp +++ b/StandAlone/StandAlone.cpp @@ -192,6 +192,8 @@ const char* DefaultConfig = "MaxFragmentAtomicCounterBuffers 1\n" "MaxCombinedAtomicCounterBuffers 1\n" "MaxAtomicCounterBufferSize 16384\n" + "MaxTransformFeedbackBuffers 4\n" + "MaxTransformFeedbackInterleavedComponents 64\n" "nonInductiveForLoops 1\n" "whileLoops 1\n" @@ -390,6 +392,10 @@ void ProcessConfigFile() Resources.maxCombinedAtomicCounterBuffers = value; else if (strcmp(token, "MaxAtomicCounterBufferSize") == 0) Resources.maxAtomicCounterBufferSize = value; + else if (strcmp(token, "MaxTransformFeedbackBuffers") == 0) + Resources.maxTransformFeedbackBuffers = value; + else if (strcmp(token, "MaxTransformFeedbackInterleavedComponents") == 0) + Resources.maxTransformFeedbackInterleavedComponents = value; else if (strcmp(token, "nonInductiveForLoops") == 0) Resources.limits.nonInductiveForLoops = (value != 0); diff --git a/Test/400.tesc b/Test/400.tesc index b7aea869c..d167d5d79 100644 --- a/Test/400.tesc +++ b/Test/400.tesc @@ -48,8 +48,8 @@ in vec2 ina; // ERROR, not array in vec2 inb[]; in vec2 inc[18]; // ERROR, wrong size in vec2 ind[gl_MaxPatchVertices]; - -#extension GL_ARB_separate_shader_objects : enable + +#extension GL_ARB_separate_shader_objects : enable layout(location = 3) in vec4 ivla[]; layout(location = 4) in vec4 ivlb[]; diff --git a/Test/440.vert b/Test/440.vert index d02e7d38a..04276690e 100644 --- a/Test/440.vert +++ b/Test/440.vert @@ -68,31 +68,94 @@ out bblck1 { } bbinst1; out bblck2 { - layout(xfb_offset=16) vec4 bbv; + layout(xfb_offset=64) vec4 bbv; } bbinst2; -layout(xfb_buffer = 3, xfb_stride = 16) out; +layout(xfb_buffer = 3, xfb_stride = 64) out; // default buffer is 3 out bblck3 { - layout(xfb_offset=16) vec4 bbv; + layout(xfb_offset=16) vec4 bbv; // in xfb_buffer 3 } bbinst3; uniform ubblck3 { - layout(xfb_offset=16) vec4 bbv; // ERROR + layout(xfb_offset=16) vec4 bbv; // ERROR, not in a uniform } ubbinst3; -layout(xfb_buffer=2, xfb_offset=32, xfb_stride=64) out vec4 bg; +layout(xfb_buffer=2, xfb_offset=48, xfb_stride=80) out vec4 bg; layout( xfb_offset=32, xfb_stride=64) out vec4 bh; layout(xfb_offset=48) out; // ERROR -layout(xfb_stride=32, xfb_buffer=2, xfb_offset=16) out bblck4 { +layout(xfb_stride=80, xfb_buffer=2, xfb_offset=16) out bblck4 { vec4 bbv1; vec4 bbv2; } bbinst4; out bblck5 { layout(xfb_offset=0) vec4 bbv1; - layout(xfb_stride=32, xfb_buffer=3, xfb_offset=16) vec4 bbv2; + layout(xfb_stride=64, xfb_buffer=3, xfb_offset=48) vec4 bbv2; layout(xfb_buffer=2) vec4 bbv3; // ERROR, wrong buffer } bbinst5; + +out layout(xfb_buffer=2) bblck6 { + layout(xfb_offset=0) vec4 bbv1; + layout(xfb_stride=64, xfb_buffer=3, xfb_offset=32) vec4 bbv2; // ERROR, overlap 32 from bh, and buffer contradiction + layout(xfb_buffer=2, xfb_offset=0) vec4 bbv3; // ERROR, overlap 0 from bbinst5 + layout(xfb_buffer=2) vec4 bbv5; + layout(xfb_offset=24) float bbf6; // ERROR, overlap 24 from bbv1 in bbinst4 +} bbinst6; + +layout(xfb_stride=48) out; // ERROR, stride of buffer 3 + +layout(xfb_buffer=1) out; // default buffer is 1 +layout(xfb_offset=4) out float bj; +layout(xfb_offset=0) out ivec2 bk; // ERROR, overlap 4 + +layout(xfb_buffer=3, xfb_stride=48) out; // ERROR, stride of buffer 3 (default is now 3) +layout(xfb_stride=48) out float bl; // ERROR, stride of buffer 3 + +layout(xfb_stride=48) out bblck7 { // ERROR, stride of buffer 3 + layout(xfb_stride=64) vec4 bbv1; + layout(xfb_stride=32) vec4 bbv2; // ERROR, stride of buffer 3 +} bbinst7; + +struct S5 { + int i; // 4 bytes plus 4 byte hole + double d; // 8 bytes + float f; // 4 bytes +}; // total size = 20 + +struct T { + bool b; // 4 plus 4 byte hole + S5 s; // 20 + vec2 v2; // 8 +}; // total size = 36 + +out layout(xfb_buffer=0, xfb_offset=0, xfb_stride=92) bblck8 { // ERROR, stride not multiple of 8 + bool b; // offset 0 + T t; // offset 8, size 40 + int i; // offset 40 + 4 = 48 + mat3x3 m3; // offset 52 + float f; // offset 52 + 9*4 = 88 + float g; // ERROR, overflow stride +} bbinst8; + +out layout(xfb_buffer=4) bblck9 { + layout(xfb_offset=1) bool b; // ERROR + layout(xfb_offset=12) T t; // ERROR + layout(xfb_offset=52) mat3x3 m3; // non-multiple of 8 okay + layout(xfb_offset=90) int i; // ERROR + layout(xfb_offset=98) double d; // ERROR + layout(xfb_offset=108) S s; // non-multiple of 8 okay +} bbinst9; + +layout(xfb_buffer=5, xfb_stride=6) out; // link ERROR, stride not multiple of 4 +layout(xfb_offset=0) out float bm; + +layout(xfb_buffer=6, xfb_stride=2000) out; // ERROR, stride too big + +out layout(xfb_buffer=7, xfb_offset=0) bblck10 { // link ERROR, implicit stride too big + dmat4x4 m1; + dmat4x4 m2; + float f; +} bbinst10; diff --git a/Test/baseResults/440.vert.out b/Test/baseResults/440.vert.out index 6eef4abda..c385efa73 100644 --- a/Test/baseResults/440.vert.out +++ b/Test/baseResults/440.vert.out @@ -22,7 +22,27 @@ ERROR: 0:61: 'location' : cannot declare a default, use a full declaration ERROR: 0:81: 'xfb layout qualifier' : can only be used on an output ERROR: 0:87: 'xfb_offset' : cannot declare a default, use a full declaration ERROR: 0:97: 'xfb_buffer' : member cannot contradict block (or what block inherited from global) -ERROR: 22 compilation errors. No code generated. +ERROR: 0:102: 'xfb_buffer' : member cannot contradict block (or what block inherited from global) +ERROR: 0:102: 'xfb_offset' : overlapping offsets at offset 32 in buffer 3 +ERROR: 0:103: 'xfb_offset' : overlapping offsets at offset 0 in buffer 2 +ERROR: 0:105: 'xfb_offset' : overlapping offsets at offset 24 in buffer 2 +ERROR: 0:108: 'xfb_stride' : all stride settings must match for xfb buffer 15 +ERROR: 0:112: 'xfb_offset' : overlapping offsets at offset 4 in buffer 1 +ERROR: 0:114: 'xfb_stride' : all stride settings must match for xfb buffer 3 +ERROR: 0:115: 'xfb_stride' : all stride settings must match for xfb buffer 3 +ERROR: 0:119: 'xfb_stride' : all stride settings must match for xfb buffer 3 +ERROR: 0:117: 'xfb_stride' : all stride settings must match for xfb buffer 3 +ERROR: 0:138: 'xfb_offset' : overlapping offsets at offset 64 in buffer 0 +ERROR: 0:143: 'xfb_buffer' : buffer is too large: gl_MaxTransformFeedbackBuffers is 4 +ERROR: 0:144: 'xfb_offset' : must be a multiple of size of first component +ERROR: 0:145: 'xfb_offset' : type contains double; xfb_offset must be a multiple of 8 +ERROR: 0:147: 'xfb_offset' : must be a multiple of size of first component +ERROR: 0:148: 'xfb_offset' : type contains double; xfb_offset must be a multiple of 8 +ERROR: 0:152: 'xfb_buffer' : buffer is too large: gl_MaxTransformFeedbackBuffers is 4 +ERROR: 0:155: 'xfb_buffer' : buffer is too large: gl_MaxTransformFeedbackBuffers is 4 +ERROR: 0:155: 'xfb_stride' : 1/4 stride is too large: gl_MaxTransformFeedbackInterleavedComponents is 64 +ERROR: 0:157: 'xfb_buffer' : buffer is too large: gl_MaxTransformFeedbackBuffers is 4 +ERROR: 42 compilation errors. No code generated. in xfb mode @@ -60,13 +80,22 @@ ERROR: node is still EOpNull! 0:? 'be' (layout(location=50 component=3 ) smooth out int) 0:? 'bf' (layout(location=50 component=0 ) smooth out 3-component vector of float) 0:? 'bbinst1' (out block{out 4-component vector of float bbv}) -0:? 'bbinst2' (out block{layout(xfb_buffer=0 xfb_offset=16 ) out 4-component vector of float bbv}) +0:? 'bbinst2' (out block{layout(xfb_buffer=0 xfb_offset=64 ) out 4-component vector of float bbv}) 0:? 'bbinst3' (out block{layout(xfb_buffer=3 xfb_offset=16 ) out 4-component vector of float bbv}) 0:? 'ubbinst3' (layout(column_major shared ) uniform block{layout(column_major shared xfb_offset=16 ) uniform 4-component vector of float bbv}) -0:? 'bg' (layout(xfb_buffer=2 xfb_offset=32 xfb_stride=64 ) smooth out 4-component vector of float) +0:? 'bg' (layout(xfb_buffer=2 xfb_offset=48 xfb_stride=80 ) smooth out 4-component vector of float) 0:? 'bh' (layout(xfb_buffer=3 xfb_offset=32 xfb_stride=64 ) smooth out 4-component vector of float) -0:? 'bbinst4' (layout(xfb_buffer=2 xfb_offset=16 xfb_stride=32 ) out block{layout(xfb_buffer=2 xfb_offset=16 ) out 4-component vector of float bbv1, layout(xfb_buffer=2 xfb_offset=16 ) out 4-component vector of float bbv2}) -0:? 'bbinst5' (out block{layout(xfb_buffer=3 xfb_offset=0 ) out 4-component vector of float bbv1, layout(xfb_buffer=3 xfb_offset=16 xfb_stride=32 ) out 4-component vector of float bbv2, out 4-component vector of float bbv3}) +0:? 'bbinst4' (layout(xfb_stride=80 ) out block{layout(xfb_buffer=2 xfb_offset=16 ) out 4-component vector of float bbv1, layout(xfb_buffer=2 xfb_offset=32 ) out 4-component vector of float bbv2}) +0:? 'bbinst5' (out block{layout(xfb_buffer=3 xfb_offset=0 ) out 4-component vector of float bbv1, layout(xfb_buffer=3 xfb_offset=48 xfb_stride=64 ) out 4-component vector of float bbv2, out 4-component vector of float bbv3}) +0:? 'bbinst6' (out block{layout(xfb_buffer=2 xfb_offset=0 ) out 4-component vector of float bbv1, layout(xfb_buffer=3 xfb_offset=32 xfb_stride=64 ) out 4-component vector of float bbv2, layout(xfb_buffer=2 xfb_offset=0 ) out 4-component vector of float bbv3, out 4-component vector of float bbv5, layout(xfb_buffer=2 xfb_offset=24 ) out float bbf6}) +0:? 'bj' (layout(xfb_buffer=1 xfb_offset=4 ) smooth out float) +0:? 'bk' (layout(xfb_buffer=1 xfb_offset=0 ) smooth out 2-component vector of int) +0:? 'bl' (layout(xfb_stride=48 ) smooth out float) +0:? 'bbinst7' (layout(xfb_stride=48 ) out block{layout(xfb_stride=64 ) out 4-component vector of float bbv1, layout(xfb_stride=32 ) out 4-component vector of float bbv2}) +0:? 'bbinst8' (layout(xfb_stride=92 ) out block{layout(xfb_buffer=0 xfb_offset=0 ) out bool b, layout(xfb_buffer=0 xfb_offset=8 ) out structure{bool b, structure{int i, double d, float f} s, 2-component vector of float v2} t, layout(xfb_buffer=0 xfb_offset=48 ) out int i, layout(xfb_buffer=0 xfb_offset=52 ) out 3X3 matrix of float m3, layout(xfb_buffer=0 xfb_offset=88 ) out float f, layout(xfb_buffer=0 xfb_offset=92 ) out float g}) +0:? 'bbinst9' (out block{layout(xfb_buffer=4 xfb_offset=1 ) out bool b, layout(xfb_buffer=4 xfb_offset=12 ) out structure{bool b, structure{int i, double d, float f} s, 2-component vector of float v2} t, layout(xfb_buffer=4 xfb_offset=52 ) out 3X3 matrix of float m3, layout(xfb_buffer=4 xfb_offset=90 ) out int i, layout(xfb_buffer=4 xfb_offset=98 ) out double d, layout(xfb_buffer=4 xfb_offset=108 ) out structure{int a} s}) +0:? 'bm' (layout(xfb_buffer=5 xfb_offset=0 ) smooth out float) +0:? 'bbinst10' (out block{layout(xfb_buffer=7 xfb_offset=0 ) out 4X4 matrix of double m1, layout(xfb_buffer=7 xfb_offset=128 ) out 4X4 matrix of double m2, layout(xfb_buffer=7 xfb_offset=256 ) out float f}) 0:? 'gl_VertexID' (gl_VertexId int) 0:? 'gl_InstanceID' (gl_InstanceId int) @@ -74,6 +103,14 @@ ERROR: node is still EOpNull! Linked vertex stage: ERROR: Linking vertex stage: Missing entry point: Each stage requires one "void main()" entry point +ERROR: Linking vertex stage: xfb_stride is too small to hold all buffer entries: +ERROR: xfb_buffer 0, xfb_stride 92, minimum stride needed: 96 +ERROR: Linking vertex stage: xfb_stride must be multiple of 8 for buffer holding a double: +ERROR: xfb_buffer 0, xfb_stride 92 +ERROR: Linking vertex stage: xfb_stride must be multiple of 4: +ERROR: xfb_buffer 5, xfb_stride 6 +ERROR: Linking vertex stage: xfb_stride is too large: +ERROR: xfb_buffer 7, components (1/4 stride) needed are 66, gl_MaxTransformFeedbackInterleavedComponents is 64 in xfb mode diff --git a/Test/baseResults/test.conf b/Test/baseResults/test.conf index d77c9cc28..8fc933656 100644 --- a/Test/baseResults/test.conf +++ b/Test/baseResults/test.conf @@ -75,6 +75,8 @@ MaxGeometryAtomicCounterBuffers 0 MaxFragmentAtomicCounterBuffers 1 MaxCombinedAtomicCounterBuffers 1 MaxAtomicCounterBufferSize 16384 +MaxTransformFeedbackBuffers 4 +MaxTransformFeedbackInterleavedComponents 64 nonInductiveForLoops 1 whileLoops 1 doWhileLoops 1 diff --git a/glslang/Include/Common.h b/glslang/Include/Common.h index df07d645d..46e61d402 100644 --- a/glslang/Include/Common.h +++ b/glslang/Include/Common.h @@ -185,6 +185,20 @@ typedef TMap<TString, TString>::tAllocator TPragmaTableAllocator; const int GlslangMaxTokenLength = 1024; +// Round number up to a multiple of the given powerOf2, which is not +// a power, just a number that must be a power of 2. +template <class T> void RoundToPow2(T& number, int powerOf2) +{ + assert((powerOf2 & (powerOf2 - 1)) == 0); + number = (number + powerOf2 - 1) & ~(powerOf2 - 1); +} + +template <class T> bool IsMultipleOfPow2(T number, int powerOf2) +{ + assert((powerOf2 & (powerOf2 - 1)) == 0); + return ! (number & (powerOf2 - 1)); +} + } // end namespace glslang #endif // _COMMON_INCLUDED_ diff --git a/glslang/Include/ResourceLimits.h b/glslang/Include/ResourceLimits.h index 322031f12..03a39fd1d 100644 --- a/glslang/Include/ResourceLimits.h +++ b/glslang/Include/ResourceLimits.h @@ -127,6 +127,8 @@ struct TBuiltInResource { int maxFragmentAtomicCounterBuffers; int maxCombinedAtomicCounterBuffers; int maxAtomicCounterBufferSize; + int maxTransformFeedbackBuffers; + int maxTransformFeedbackInterleavedComponents; TLimits limits; }; diff --git a/glslang/Include/Types.h b/glslang/Include/Types.h index 56d70e455..08ffd990e 100644 --- a/glslang/Include/Types.h +++ b/glslang/Include/Types.h @@ -373,24 +373,32 @@ public: hasStream() || hasXfb(); } - TLayoutMatrix layoutMatrix : 3; - TLayoutPacking layoutPacking : 4; + TLayoutMatrix layoutMatrix : 3; + TLayoutPacking layoutPacking : 4; int layoutOffset; int layoutAlign; - unsigned int layoutLocation : 7; - static const unsigned int layoutLocationEnd = 0x3F; - unsigned int layoutComponent : 3; - static const unsigned int layoutComponentEnd = 4; - unsigned int layoutBinding : 8; - static const unsigned int layoutBindingEnd = 0xFF; - unsigned int layoutStream : 8; - static const unsigned int layoutStreamEnd = 0xFF; - unsigned int layoutXfbBuffer : 4; - static const unsigned int layoutXfbBufferEnd = 0xF; - unsigned int layoutXfbStride : 8; - static const unsigned int layoutXfbStrideEnd = 0xFF; - unsigned int layoutXfbOffset : 8; - static const unsigned int layoutXfbOffsetEnd = 0xFF; + + unsigned int layoutLocation : 7; + static const unsigned int layoutLocationEnd = 0x3F; + + unsigned int layoutComponent : 3; + static const unsigned int layoutComponentEnd = 4; + + unsigned int layoutBinding : 8; + static const unsigned int layoutBindingEnd = 0xFF; + + unsigned int layoutStream : 8; + static const unsigned int layoutStreamEnd = 0xFF; + + unsigned int layoutXfbBuffer : 4; + static const unsigned int layoutXfbBufferEnd = 0xF; + + unsigned int layoutXfbStride : 10; + static const unsigned int layoutXfbStrideEnd = 0x3FF; + + unsigned int layoutXfbOffset : 10; + static const unsigned int layoutXfbOffsetEnd = 0x3FF; + bool hasUniformLayout() const { return layoutMatrix != ElmNone || @@ -805,6 +813,20 @@ public: virtual bool isArray() const { return arraySizes != 0; } virtual bool isStruct() const { return structure != 0; } + // Recursively checks if the type contains the given basic type + virtual bool containsBasicType(TBasicType checkType) const + { + if (basicType == checkType) + return true; + if (! structure) + return false; + for (unsigned int i = 0; i < structure->size(); ++i) { + if ((*structure)[i].type->containsBasicType(checkType)) + return true; + } + return false; + } + // Recursively check the structure for any arrays, needed for some error checks virtual bool containsArray() const { diff --git a/glslang/Include/revision.h b/glslang/Include/revision.h index 2aa77a024..a471382bf 100644 --- a/glslang/Include/revision.h +++ b/glslang/Include/revision.h @@ -9,5 +9,5 @@ // source have to figure out how to create revision.h just to get a build // going. However, if it is not updated, it can be a version behind. -#define GLSLANG_REVISION "24964" -#define GLSLANG_DATE "2014/01/22 17:35:24" +#define GLSLANG_REVISION "24977" +#define GLSLANG_DATE "2014/01/23 14:40:33" diff --git a/glslang/MachineIndependent/ParseHelper.cpp b/glslang/MachineIndependent/ParseHelper.cpp index ce4c8850a..911da93bc 100644 --- a/glslang/MachineIndependent/ParseHelper.cpp +++ b/glslang/MachineIndependent/ParseHelper.cpp @@ -104,11 +104,16 @@ TParseContext::TParseContext(TSymbolTable& symt, TIntermediate& interm, bool pb, globalInputDefaults.clear(); globalOutputDefaults.clear(); + + // "Shaders in the transform + // feedback capturing mode have an initial global default of + // layout(xfb_buffer = 0) out;" if (language == EShLangVertex || language == EShLangTessControl || language == EShLangTessEvaluation || language == EShLangGeometry) globalOutputDefaults.layoutXfbBuffer = 0; + if (language == EShLangGeometry) globalOutputDefaults.layoutStream = 0; } @@ -123,6 +128,8 @@ void TParseContext::setLimits(const TBuiltInResource& r) ! limits.generalUniformIndexing || ! limits.generalVariableIndexing || ! limits.generalVaryingIndexing; + + intermediate.setLimits(resources); } // @@ -2872,27 +2879,39 @@ void TParseContext::setLayoutQualifier(TSourceLoc loc, TPublicType& publicType, publicType.qualifier.layoutComponent = value; return; } else if (id.compare(0, 4, "xfb_") == 0) { + // "Any shader making any static use (after preprocessing) of any of these + // *xfb_* qualifiers will cause the shader to be in a transform feedback + // capturing mode and hence responsible for describing the transform feedback + // setup." intermediate.setXfbMode(); const char* feature = "transform feedback qualifier"; requireStage(loc, (EShLanguageMask)(EShLangVertexMask | EShLangGeometryMask | EShLangTessControlMask | EShLangTessEvaluationMask), feature); requireProfile(loc, ECoreProfile | ECompatibilityProfile, feature); profileRequires(loc, ECoreProfile | ECompatibilityProfile, 440, GL_ARB_enhanced_layouts, feature); if (id == "xfb_buffer") { - if (value >= TQualifier::layoutXfbBufferEnd) // TODO: 4.4 enhanced layouts: also check against gl_MaxTransformFeedbackBuffers - error(loc, "buffer is too large", id.c_str(), ""); + // "It is a compile-time error to specify an *xfb_buffer* that is greater than + // the implementation-dependent constant gl_MaxTransformFeedbackBuffers." + if (value >= resources.maxTransformFeedbackBuffers) + error(loc, "buffer is too large:", id.c_str(), "gl_MaxTransformFeedbackBuffers is %d", resources.maxTransformFeedbackBuffers); + if (value >= TQualifier::layoutXfbBufferEnd) + error(loc, "buffer is too large:", id.c_str(), "internal max is %d", TQualifier::layoutXfbBufferEnd-1); else publicType.qualifier.layoutXfbBuffer = value; return; } else if (id == "xfb_offset") { - if (value >= TQualifier::layoutXfbOffsetEnd) // TODO: 4.4 enhanced layouts: also check against gl_MaxTransformFeedbackInterleavedComponents - error(loc, "offset is too large", id.c_str(), ""); + if (value >= TQualifier::layoutXfbOffsetEnd) + error(loc, "offset is too large:", id.c_str(), "internal max is %d", TQualifier::layoutXfbOffsetEnd-1); else publicType.qualifier.layoutXfbOffset = value; return; } else if (id == "xfb_stride") { - if (value >= TQualifier::layoutXfbStrideEnd) // TODO: 4.4 enhanced layouts: also check against 4*gl_MaxTransformFeedbackInterleavedComponents - error(loc, "stride is too large", id.c_str(), ""); - else + // "The resulting stride (implicit or explicit), when divided by 4, must be less than or equal to the + // implementation-dependent constant gl_MaxTransformFeedbackInterleavedComponents." + if (value > 4 * resources.maxTransformFeedbackInterleavedComponents) + error(loc, "1/4 stride is too large:", id.c_str(), "gl_MaxTransformFeedbackInterleavedComponents is %d", resources.maxTransformFeedbackInterleavedComponents); + else if (value >= TQualifier::layoutXfbStrideEnd) + error(loc, "stride is too large:", id.c_str(), "internal max is %d", TQualifier::layoutXfbStrideEnd-1); + if (value < TQualifier::layoutXfbStrideEnd) publicType.qualifier.layoutXfbStride = value; return; } @@ -2956,8 +2975,6 @@ void TParseContext::mergeObjectLayoutQualifiers(TSourceLoc loc, TQualifier& dst, if (src.hasXfbBuffer()) dst.layoutXfbBuffer = src.layoutXfbBuffer; - if (src.hasXfbOffset()) - dst.layoutXfbOffset = src.layoutXfbOffset; if (! inheritOnly) { if (src.layoutLocation != TQualifier::layoutLocationEnd) @@ -2975,6 +2992,8 @@ void TParseContext::mergeObjectLayoutQualifiers(TSourceLoc loc, TQualifier& dst, if (src.hasXfbStride()) dst.layoutXfbStride = src.layoutXfbStride; + if (src.hasXfbOffset()) + dst.layoutXfbOffset = src.layoutXfbOffset; } } @@ -3069,6 +3088,25 @@ void TParseContext::layoutTypeCheck(TSourceLoc loc, const TType& type) error(loc, "fragment outputs sharing the same location must be the same basic type", "location", "%d", repeated); } + if (qualifier.hasXfbOffset() && qualifier.hasXfbBuffer()) { + int repeated = intermediate.addXfbBufferOffset(type); + if (repeated >= 0) + error(loc, "overlapping offsets at", "xfb_offset", "offset %d in buffer %d", repeated, qualifier.layoutXfbBuffer); + + // "The offset must be a multiple of the size of the first component of the first + // qualified variable or block member, or a compile-time error results. Further, if applied to an aggregate + // containing a double, the offset must also be a multiple of 8..." + if (type.containsBasicType(EbtDouble) && ! IsMultipleOfPow2(qualifier.layoutXfbOffset, 8)) + error(loc, "type contains double; xfb_offset must be a multiple of 8", "xfb_offset", ""); + else if (! IsMultipleOfPow2(qualifier.layoutXfbOffset, 4)) + error(loc, "must be a multiple of size of first component", "xfb_offset", ""); + } + + if (qualifier.hasXfbStride() && qualifier.hasXfbBuffer()) { + if (! intermediate.setXfbBufferStride(qualifier.layoutXfbBuffer, qualifier.layoutXfbStride)) + error(loc, "all stride settings must match for xfb buffer", "xfb_stride", "%d", qualifier.layoutXfbBuffer); + } + if (qualifier.hasBinding()) { // Binding checking, from the spec: // @@ -3844,10 +3882,16 @@ void TParseContext::declareBlock(TSourceLoc loc, TTypeList& typeList, const TStr if (defaultQualification.layoutStream != memberQualifier.layoutStream) error(memberLoc, "member cannot contradict block", "stream", ""); } + + // "This includes a block's inheritance of the + // current global default buffer, a block member's inheritance of the block's + // buffer, and the requirement that any *xfb_buffer* declared on a block + // member must match the buffer inherited from the block." if (memberQualifier.hasXfbBuffer()) { if (defaultQualification.layoutXfbBuffer != memberQualifier.layoutXfbBuffer) error(memberLoc, "member cannot contradict block (or what block inherited from global)", "xfb_buffer", ""); } + if (memberQualifier.layoutPacking != ElpNone) error(memberLoc, "member of block cannot have a packing layout qualifier", typeList[member].type->getFieldName().c_str(), ""); if (memberQualifier.hasLocation()) { @@ -3869,7 +3913,10 @@ void TParseContext::declareBlock(TSourceLoc loc, TTypeList& typeList, const TStr mergeQualifiers(memberLoc, newMemberQualification, memberQualifier, false); memberQualifier = newMemberQualification; } + + // Process the members fixBlockLocations(loc, currentBlockQualifier, typeList, memberWithLocation, memberWithoutLocation); + fixBlockXfbOffsets(loc, currentBlockQualifier, typeList); for (unsigned int member = 0; member < typeList.size(); ++member) layoutTypeCheck(typeList[member].loc, *typeList[member].type); @@ -3983,6 +4030,37 @@ void TParseContext::fixBlockLocations(TSourceLoc loc, TQualifier& qualifier, TTy } } +void TParseContext::fixBlockXfbOffsets(TSourceLoc loc, TQualifier& qualifier, TTypeList& typeList) +{ + // "If a block is qualified with xfb_offset, all its + // members are assigned transform feedback buffer offsets. If a block is not qualified with xfb_offset, any + // members of that block not qualified with an xfb_offsetwill not be assigned transform feedback buffer + // offsets." + + if (! currentBlockQualifier.hasXfbBuffer() || ! currentBlockQualifier.hasXfbOffset()) + return; + + int nextOffset = currentBlockQualifier.layoutXfbOffset; + for (unsigned int member = 0; member < typeList.size(); ++member) { + TQualifier& memberQualifier = typeList[member].type->getQualifier(); + bool containsDouble = false; + int memberSize = intermediate.computeTypeXfbSize(*typeList[member].type, containsDouble); + // see if we need to auto-assign an offset to this member + if (! memberQualifier.hasXfbOffset()) { + // "if applied to an aggregate containing a double, the offset must also be a multiple of 8" + if (containsDouble) + RoundToPow2(nextOffset, 8); + memberQualifier.layoutXfbOffset = nextOffset; + } else + nextOffset = memberQualifier.layoutXfbOffset; + nextOffset += memberSize; + } + + // The above gave all block members an offset, so we can take it off the block now, + // which will avoid double counting the offset usage. + qualifier.layoutXfbOffset = TQualifier::layoutXfbOffsetEnd; +} + // For an identifier that is already declared, add more qualification to it. void TParseContext::addQualifierToExisting(TSourceLoc loc, TQualifier qualifier, const TString& identifier) { @@ -4150,6 +4228,10 @@ void TParseContext::updateStandaloneQualifierDefaults(TSourceLoc loc, const TPub globalOutputDefaults.layoutStream = qualifier.layoutStream; if (qualifier.hasXfbBuffer()) globalOutputDefaults.layoutXfbBuffer = qualifier.layoutXfbBuffer; + if (globalOutputDefaults.hasXfbBuffer() && qualifier.hasXfbStride()) { + if (! intermediate.setXfbBufferStride(globalOutputDefaults.layoutXfbBuffer, qualifier.layoutXfbStride)) + error(loc, "all stride settings must match for xfb buffer", "xfb_stride", "%d", qualifier.layoutXfbBuffer); + } break; default: error(loc, "default qualifier requires 'uniform', 'buffer', 'in', or 'out' storage qualification", "", ""); diff --git a/glslang/MachineIndependent/ParseHelper.h b/glslang/MachineIndependent/ParseHelper.h index 4eeb2d383..813f3237d 100644 --- a/glslang/MachineIndependent/ParseHelper.h +++ b/glslang/MachineIndependent/ParseHelper.h @@ -165,6 +165,7 @@ public: TIntermTyped* constructBuiltIn(const TType&, TOperator, TIntermNode*, TSourceLoc, bool subset); void declareBlock(TSourceLoc, TTypeList& typeList, const TString* instanceName = 0, TArraySizes* arraySizes = 0); void fixBlockLocations(TSourceLoc, TQualifier&, TTypeList&, bool memberWithLocation, bool memberWithoutLocation); + void fixBlockXfbOffsets(TSourceLoc, TQualifier&, TTypeList&); void addQualifierToExisting(TSourceLoc, TQualifier, const TString& identifier); void addQualifierToExisting(TSourceLoc, TQualifier, TIdentifierList&); void invariantCheck(TSourceLoc, const TType&, const TString& identifier); diff --git a/glslang/MachineIndependent/linkValidate.cpp b/glslang/MachineIndependent/linkValidate.cpp index cec1729fc..07a2617aa 100644 --- a/glslang/MachineIndependent/linkValidate.cpp +++ b/glslang/MachineIndependent/linkValidate.cpp @@ -112,6 +112,16 @@ void TIntermediate::merge(TInfoSink& infoSink, TIntermediate& unit) if (unit.xfbMode) xfbMode = true; + for (size_t b = 0; b < xfbBuffers.size(); ++b) { + if (xfbBuffers[b].stride == TQualifier::layoutXfbStrideEnd) + xfbBuffers[b].stride = unit.xfbBuffers[b].stride; + else if (xfbBuffers[b].stride != unit.xfbBuffers[b].stride) + error(infoSink, "Contradictory xfb_stride"); + xfbBuffers[b].implicitStride = std::max(xfbBuffers[b].implicitStride, unit.xfbBuffers[b].implicitStride); + if (unit.xfbBuffers[b].containsDouble) + xfbBuffers[b].containsDouble = true; + // TODO: 4.4 link: enhanced layouts: compare ranges + } if (unit.treeRoot == 0) return; @@ -305,6 +315,44 @@ void TIntermediate::finalCheck(TInfoSink& infoSink) if (inIoAccessed("gl_FragColor") && inIoAccessed("gl_FragData")) error(infoSink, "Cannot use both gl_FragColor and gl_FragData"); + for (size_t b = 0; b < xfbBuffers.size(); ++b) { + if (xfbBuffers[b].containsDouble) + RoundToPow2(xfbBuffers[b].implicitStride, 8); + + // "It is a compile-time or link-time error to have + // any xfb_offset that overflows xfb_stride, whether stated on declarations before or after the xfb_stride, or + // in different compilation units. While xfb_stridecan be declared multiple times for the same buffer, it is a + // compile-time or link-time error to have different values specified for the stride for the same buffer." + if (xfbBuffers[b].stride != TQualifier::layoutXfbStrideEnd && xfbBuffers[b].implicitStride > xfbBuffers[b].stride) { + error(infoSink, "xfb_stride is too small to hold all buffer entries:"); + infoSink.info.prefix(EPrefixError); + infoSink.info << " xfb_buffer " << b << ", xfb_stride " << xfbBuffers[b].stride << ", minimum stride needed: " << xfbBuffers[b].implicitStride << "\n"; + } + if (xfbBuffers[b].stride == TQualifier::layoutXfbStrideEnd) + xfbBuffers[b].stride = xfbBuffers[b].implicitStride; + + // "If the buffer is capturing any + // outputs with double-precision components, the stride must be a multiple of 8, otherwise it must be a + // multiple of 4, or a compile-time or link-time error results." + if (xfbBuffers[b].containsDouble && ! IsMultipleOfPow2(xfbBuffers[b].stride, 8)) { + error(infoSink, "xfb_stride must be multiple of 8 for buffer holding a double:"); + infoSink.info.prefix(EPrefixError); + infoSink.info << " xfb_buffer " << b << ", xfb_stride " << xfbBuffers[b].stride << "\n"; + } else if (! IsMultipleOfPow2(xfbBuffers[b].stride, 4)) { + error(infoSink, "xfb_stride must be multiple of 4:"); + infoSink.info.prefix(EPrefixError); + infoSink.info << " xfb_buffer " << b << ", xfb_stride " << xfbBuffers[b].stride << "\n"; + } + + // "The resulting stride (implicit or explicit), when divided by 4, must be less than or equal to the + // implementation-dependent constant gl_MaxTransformFeedbackInterleavedComponents." + if (xfbBuffers[b].stride > (unsigned int)(4 * resources.maxTransformFeedbackInterleavedComponents)) { + error(infoSink, "xfb_stride is too large:"); + infoSink.info.prefix(EPrefixError); + infoSink.info << " xfb_buffer " << b << ", components (1/4 stride) needed are " << xfbBuffers[b].stride/4 << ", gl_MaxTransformFeedbackInterleavedComponents is " << resources.maxTransformFeedbackInterleavedComponents << "\n"; + } + } + switch (language) { case EShLangVertex: break; @@ -510,6 +558,7 @@ int TIntermediate::addUsedLocation(const TQualifier& qualifier, const TType& typ else size = 1; } else { + // Strip off the outer array dimension for those having an extra one. if (type.isArray() && ! qualifier.patch && (language == EShLangGeometry && qualifier.isPipeInput()) || language == EShLangTessControl || @@ -520,36 +569,31 @@ int TIntermediate::addUsedLocation(const TQualifier& qualifier, const TType& typ size = computeTypeLocationSize(type); } - TRange locationRange = { qualifier.layoutLocation, qualifier.layoutLocation + size - 1 }; - TRange componentRange = { 0, 3 }; + TRange locationRange(qualifier.layoutLocation, qualifier.layoutLocation + size - 1); + TRange componentRange(0, 3); if (qualifier.layoutComponent != TQualifier::layoutComponentEnd) { componentRange.start = qualifier.layoutComponent; componentRange.last = componentRange.start + type.getVectorSize() - 1; } + TIoRange range(locationRange, componentRange, type.getBasicType()); // check for collisions, except for vertex inputs on desktop if (! (profile != EEsProfile && language == EShLangVertex && qualifier.isPipeInput())) { for (size_t r = 0; r < usedIo[set].size(); ++r) { - if (locationRange.last >= usedIo[set][r].location.start && - locationRange.start <= usedIo[set][r].location.last && - componentRange.last >= usedIo[set][r].component.start && - componentRange.start <= usedIo[set][r].component.last) { + if (range.overlap(usedIo[set][r])) { // there is a collision; pick one return std::max(locationRange.start, usedIo[set][r].location.start); - } else if (locationRange.last >= usedIo[set][r].location.start && - locationRange.start <= usedIo[set][r].location.last && - type.getBasicType() != usedIo[set][r].basicType) { + } else if (locationRange.overlap(usedIo[set][r].location) && type.getBasicType() != usedIo[set][r].basicType) { + // aliased-type mismatch typeCollision = true; return std::max(locationRange.start, usedIo[set][r].location.start); } } } - TIoRange range = { locationRange, componentRange, type.getBasicType() }; - usedIo[set].push_back(range); - return -1; + return -1; // no collision } // Recursively figure out how many locations are used up by an input or output type. @@ -559,7 +603,7 @@ int TIntermediate::computeTypeLocationSize(const TType& type) // "If the declared input is an array of size n and each element takes m locations, it will be assigned m * n // consecutive locations..." if (type.isArray()) { - TType elementType(type, 0); + TType elementType(type, 0); if (type.getArraySize() == 0) { // TODO: are there valid cases of having an unsized array with a location? If so, running this code too early. return computeTypeLocationSize(elementType); @@ -605,4 +649,95 @@ int TIntermediate::computeTypeLocationSize(const TType& type) return 1; } +// Accumulate xfb buffer ranges and check for collisions as the accumulation is done. +// +// Returns < 0 if no collision, >= 0 if collision and the value returned is a colliding value. +// +int TIntermediate::addXfbBufferOffset(const TType& type) +{ + const TQualifier& qualifier = type.getQualifier(); + + assert(qualifier.hasXfbOffset() && qualifier.hasXfbBuffer()); + TXfbBuffer& buffer = xfbBuffers[qualifier.layoutXfbBuffer]; + + // compute the range + unsigned int size = computeTypeXfbSize(type, buffer.containsDouble); + buffer.implicitStride = std::max(buffer.implicitStride, qualifier.layoutXfbOffset + size); + TRange range(qualifier.layoutXfbOffset, qualifier.layoutXfbOffset + size - 1); + + // check for collisions + for (size_t r = 0; r < buffer.ranges.size(); ++r) { + if (range.overlap(buffer.ranges[r])) { + // there is a collision; pick an example to return + return std::max(range.start, buffer.ranges[r].start); + } + } + + buffer.ranges.push_back(range); + + return -1; // no collision +} + +// Recursively figure out how many bytes of xfb buffer are used by the given type. +// Return the size of type, in bytes. +// Sets containsDouble to true if the type contains a double. +// N.B. Caller must set containsDouble to false before calling. +unsigned int TIntermediate::computeTypeXfbSize(const TType& type, bool& containsDouble) const +{ + // "...if applied to an aggregate containing a double, the offset must also be a multiple of 8, + // and the space taken in the buffer will be a multiple of 8. + // ...within the qualified entity, subsequent components are each + // assigned, in order, to the next available offset aligned to a multiple of + // that component's size. Aggregate types are flattened down to the component + // level to get this sequence of components." + + if (type.isArray()) { + assert(type.getArraySize() > 0); + TType elementType(type, 0); + return type.getArraySize() * computeTypeXfbSize(elementType, containsDouble); + } + + if (type.isStruct()) { + unsigned int size = 0; + bool structContainsDouble = false; + for (size_t member = 0; member < type.getStruct()->size(); ++member) { + TType memberType(type, member); + // "... if applied to + // an aggregate containing a double, the offset must also be a multiple of 8, + // and the space taken in the buffer will be a multiple of 8." + bool memberContainsDouble = false; + int memberSize = computeTypeXfbSize(memberType, memberContainsDouble); + if (memberContainsDouble) { + structContainsDouble = true; + RoundToPow2(size, 8); + } + size += memberSize; + } + + if (structContainsDouble) { + containsDouble = true; + RoundToPow2(size, 8); + } + return size; + } + + int numComponents; + if (type.isScalar()) + numComponents = 1; + else if (type.isVector()) + numComponents = type.getVectorSize(); + else if (type.isMatrix()) + numComponents = type.getMatrixCols() * type.getMatrixRows(); + else { + assert(0); + numComponents = 1; + } + + if (type.getBasicType() == EbtDouble) { + containsDouble = true; + return 8 * numComponents; + } else + return 4 * numComponents; +} + } // end namespace glslang diff --git a/glslang/MachineIndependent/localintermediate.h b/glslang/MachineIndependent/localintermediate.h index 939950a58..c3d7588a8 100644 --- a/glslang/MachineIndependent/localintermediate.h +++ b/glslang/MachineIndependent/localintermediate.h @@ -51,6 +51,55 @@ struct TVectorFields { int num; }; +// +// Some helper structures for TIntermediate. Their contents are encapsulated +// by TIntermediate. +// + +// Used for detecting recursion: A "call" is a pair: <caller, callee>. +struct TCall { + TCall(const TString& pCaller, const TString& pCallee) : caller(pCaller), callee(pCallee) { } + TString caller; + TString callee; + bool visited; + bool currentPath; + bool errorGiven; +}; + +// A generic 1-D range. +struct TRange { + TRange(int start, int last) : start(start), last(last) { } + bool overlap(const TRange& rhs) const + { + return last >= rhs.start && start <= rhs.last; + } + int start; + int last; +}; + +// A *location* range is a 2-D rectangle; the set of (location, component) pairs all lying +// both within the location range and the component range. Locations don't alias unless +// both dimensions of their range overlap. +struct TIoRange { + TIoRange(TRange location, TRange component, TBasicType basicType) : location(location), component(component), basicType(basicType) { } + bool overlap(const TIoRange& rhs) const + { + return location.overlap(rhs.location) && component.overlap(rhs.component); + } + TRange location; + TRange component; + TBasicType basicType; +}; + +// Things that need to be tracked per xfb buffer. +struct TXfbBuffer { + TXfbBuffer() : stride(TQualifier::layoutXfbStrideEnd), implicitStride(0), containsDouble(false) { } + std::vector<TRange> ranges; // byte offsets that have already been assigned + unsigned int stride; + unsigned int implicitStride; + bool containsDouble; +}; + class TSymbolTable; class TSymbol; @@ -62,7 +111,12 @@ public: explicit TIntermediate(EShLanguage l, int v = 0, EProfile p = ENoProfile) : language(l), treeRoot(0), profile(p), version(v), numMains(0), numErrors(0), recursive(false), invocations(0), vertices(0), inputPrimitive(ElgNone), outputPrimitive(ElgNone), pixelCenterInteger(false), originUpperLeft(false), - vertexSpacing(EvsNone), vertexOrder(EvoNone), pointMode(false), xfbMode(false) { } + vertexSpacing(EvsNone), vertexOrder(EvoNone), pointMode(false), xfbMode(false) + { + xfbBuffers.resize(TQualifier::layoutXfbBufferEnd); + } + void setLimits(const TBuiltInResource& r) { resources = r; } + bool postProcess(TIntermNode*, EShLanguage); void output(TInfoSink&, bool tree); void removeTree(); @@ -176,6 +230,16 @@ public: int addUsedLocation(const TQualifier&, const TType&, bool& typeCollision); int computeTypeLocationSize(const TType&); + bool setXfbBufferStride(int buffer, int stride) + { + if (xfbBuffers[buffer].stride != TQualifier::layoutXfbStrideEnd) + return xfbBuffers[buffer].stride == stride; + xfbBuffers[buffer].stride = stride; + return true; + } + int addXfbBufferOffset(const TType&); + unsigned int computeTypeXfbSize(const TType&, bool& containsDouble) const; + protected: void error(TInfoSink& infoSink, const char*); void mergeBodies(TInfoSink&, TIntermSequence& globals, const TIntermSequence& unitGlobals); @@ -191,6 +255,7 @@ protected: TIntermNode* treeRoot; EProfile profile; int version; + TBuiltInResource resources; int numMains; int numErrors; bool recursive; @@ -205,33 +270,12 @@ protected: bool pointMode; bool xfbMode; - // for detecting recursion: pair is <caller, callee> - struct TCall { - TCall(const TString& pCaller, const TString& pCallee) : caller(pCaller), callee(pCallee) { } - TString caller; - TString callee; - bool visited; - bool currentPath; - bool errorGiven; - }; typedef std::list<TCall> TGraph; TGraph callGraph; - std::set<TString> ioAccessed; // set of names of statically read/written I/O that might need extra checking - - // A location range is a 2-D rectangle; the set of (location, component) pairs all lying - // both within the location range and the component range. - // The following are entirely encapsulated by addUsedLocation(). - struct TRange { - int start; - int last; - }; - struct TIoRange { - TRange location; - TRange component; - TBasicType basicType; - }; + std::set<TString> ioAccessed; // set of names of statically read/written I/O that might need extra checking std::vector<TIoRange> usedIo[3]; // sets of used locations, one for each of in, out, and uniform + std::vector<TXfbBuffer> xfbBuffers; // all the data we need to track per xfb buffer private: void operator=(TIntermediate&); // prevent assignments -- GitLab