From 6e1fe4d9fc308787c7366f46c9dd72004b7b9aca Mon Sep 17 00:00:00 2001 From: CrispyPin Date: Sat, 22 Jul 2023 20:24:12 +0200 Subject: [PATCH] Lenia: remove texture sampler, reduce gpu usage by ~45% --- Assets/automata/Automata.unity | 30 ++++----- Assets/automata/GoL/gol_out.mat | 3 +- Assets/automata/Lenia/lenia.shader | 31 +++++----- .../Lenia/lenia_generated_kernel.cginc | 2 + Assets/automata/Lenia/lenia_out.mat | 6 +- Assets/automata/ca_output.shader | 43 +++++++++++++ ...tput.shader.meta => ca_output.shader.meta} | 2 +- Assets/automata/sim_output.shader | 61 ------------------- lenia-kernel/src/main.rs | 13 +++- 9 files changed, 94 insertions(+), 97 deletions(-) create mode 100644 Assets/automata/ca_output.shader rename Assets/automata/{sim_output.shader.meta => ca_output.shader.meta} (80%) delete mode 100644 Assets/automata/sim_output.shader diff --git a/Assets/automata/Automata.unity b/Assets/automata/Automata.unity index 62ca677..a6a1faa 100644 --- a/Assets/automata/Automata.unity +++ b/Assets/automata/Automata.unity @@ -38,7 +38,7 @@ RenderSettings: m_ReflectionIntensity: 1 m_CustomReflection: {fileID: 0} m_Sun: {fileID: 0} - m_IndirectSpecularColor: {r: 0.18029127, g: 0.22572401, b: 0.3069303, a: 1} + m_IndirectSpecularColor: {r: 0.17288938, g: 0.21605867, b: 0.2980182, a: 1} m_UseRadianceAmbientProbe: 0 --- !u!157 &3 LightmapSettings: @@ -206,13 +206,13 @@ Transform: m_PrefabInstance: {fileID: 0} m_PrefabAsset: {fileID: 0} m_GameObject: {fileID: 88997009} - m_LocalRotation: {x: 0.40821788, y: -0.23456968, z: 0.10938163, w: 0.8754261} + m_LocalRotation: {x: 0.7321534, y: -0.01281865, z: 0.00015350126, w: 0.68101907} m_LocalPosition: {x: 0, y: 3, z: 0} m_LocalScale: {x: 1, y: 1, z: 1} m_Children: [] m_Father: {fileID: 0} m_RootOrder: 1 - m_LocalEulerAnglesHint: {x: 50, y: -30, z: 0} + m_LocalEulerAnglesHint: {x: 85.73, y: -166.615, z: -165.563} --- !u!1 &128608326 GameObject: m_ObjectHideFlags: 0 @@ -830,11 +830,11 @@ Transform: m_PrefabAsset: {fileID: 0} m_GameObject: {fileID: 336599934} m_LocalRotation: {x: 0, y: 0, z: 0, w: 1} - m_LocalPosition: {x: -0.371, y: 0.35, z: 0} - m_LocalScale: {x: 0.1, y: 0.3, z: 0.03} + m_LocalPosition: {x: -8.069, y: 0.308, z: -0.419} + m_LocalScale: {x: 1, y: 1, z: 1} m_Children: [] m_Father: {fileID: 0} - m_RootOrder: 3 + m_RootOrder: 4 m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} --- !u!1 &413786049 GameObject: @@ -1352,7 +1352,7 @@ Transform: m_PrefabAsset: {fileID: 0} m_GameObject: {fileID: 1170317910} m_LocalRotation: {x: 0, y: 1, z: 0, w: 0} - m_LocalPosition: {x: 0, y: 0.6, z: 2} + m_LocalPosition: {x: 0, y: 0.5, z: 2} m_LocalScale: {x: 1, y: 1, z: 1} m_Children: - {fileID: 1089900912} @@ -1362,7 +1362,7 @@ Transform: - {fileID: 413786050} - {fileID: 134281446} m_Father: {fileID: 0} - m_RootOrder: 8 + m_RootOrder: 3 m_LocalEulerAnglesHint: {x: 0, y: 180, z: 0} --- !u!114 &1170317912 MonoBehaviour: @@ -1503,7 +1503,7 @@ Camera: m_Depth: 0 m_CullingMask: serializedVersion: 2 - m_Bits: 2147946295 + m_Bits: 2147946263 m_RenderingPath: -1 m_TargetTexture: {fileID: 8400000, guid: 97ad7e4fd281e001d9f334026cef6605, type: 2} m_TargetDisplay: 0 @@ -1773,7 +1773,7 @@ GameObject: m_Icon: {fileID: 0} m_NavMeshLayer: 0 m_StaticEditorFlags: 0 - m_IsActive: 0 + m_IsActive: 1 --- !u!65 &1377711729 BoxCollider: m_ObjectHideFlags: 0 @@ -1850,7 +1850,7 @@ Transform: - {fileID: 1534265987} - {fileID: 1235347441} m_Father: {fileID: 0} - m_RootOrder: 5 + m_RootOrder: 6 m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} --- !u!114 &1377711733 MonoBehaviour: @@ -2412,7 +2412,7 @@ Transform: - {fileID: 289856512} - {fileID: 1759620714} m_Father: {fileID: 0} - m_RootOrder: 7 + m_RootOrder: 8 m_LocalEulerAnglesHint: {x: 0, y: -180, z: 0} --- !u!1 &1929306243 GameObject: @@ -2709,11 +2709,11 @@ Transform: m_PrefabAsset: {fileID: 0} m_GameObject: {fileID: 2044521647} m_LocalRotation: {x: 0, y: 1, z: 0, w: 0} - m_LocalPosition: {x: -2.402, y: 0.506, z: 0.196} + m_LocalPosition: {x: -2.402, y: 0.506, z: 0.277} m_LocalScale: {x: 0.125, y: 0.125, z: 0.125} m_Children: [] m_Father: {fileID: 0} - m_RootOrder: 6 + m_RootOrder: 7 m_LocalEulerAnglesHint: {x: 0, y: 180, z: 0} --- !u!1 &2129299478 GameObject: @@ -2870,7 +2870,7 @@ Transform: m_Children: - {fileID: 1187452030} m_Father: {fileID: 0} - m_RootOrder: 4 + m_RootOrder: 5 m_LocalEulerAnglesHint: {x: 0, y: -180, z: 0} --- !u!114 &2129299486 MonoBehaviour: diff --git a/Assets/automata/GoL/gol_out.mat b/Assets/automata/GoL/gol_out.mat index b0a0f1e..38405fd 100644 --- a/Assets/automata/GoL/gol_out.mat +++ b/Assets/automata/GoL/gol_out.mat @@ -8,7 +8,7 @@ Material: m_PrefabInstance: {fileID: 0} m_PrefabAsset: {fileID: 0} m_Name: gol_out - m_Shader: {fileID: 4800000, guid: 6bd3377429d62992c8fdea6f71c8bcaa, type: 3} + m_Shader: {fileID: 4800000, guid: 2e618ea97d394cb79a97f3f69d29807a, type: 3} m_ShaderKeywords: m_LightmapFlags: 0 m_EnableInstancingVariants: 0 @@ -70,6 +70,7 @@ Material: - _DistortionStrength: 1 - _DistortionStrengthScaled: 0 - _DstBlend: 0 + - _Emission: 0.5 - _EmissionEnabled: 0 - _FlipbookMode: 0 - _GlossMapScale: 1 diff --git a/Assets/automata/Lenia/lenia.shader b/Assets/automata/Lenia/lenia.shader index 359260d..53ffc0b 100644 --- a/Assets/automata/Lenia/lenia.shader +++ b/Assets/automata/Lenia/lenia.shader @@ -20,6 +20,8 @@ #include "UnityCG.cginc" + #define WIDTH 512 + struct appdata { float4 vertex : POSITION; @@ -33,7 +35,7 @@ float4 vertex : SV_POSITION; }; - sampler2D _LastFrame; + texture2D _LastFrame; float _GrowtCenter; float _GrowthWidth; float _Speed; @@ -74,43 +76,42 @@ return exp(-((u-mu) * (u-mu)) / (2 * sigma * sigma)) * 2.0 - 1.0; } - inline half value(float2 center, float x, float y) { - return tex2D(_LastFrame, center + float2(x, y)).r; + inline float value(uint2 p, int dx, int dy) { + const uint x = (p.x + dx) % WIDTH; + const uint y = (p.y + dy) % WIDTH; + return _LastFrame[uint2(x, y)].r; } fixed4 frag (v2f i) : SV_Target { if(_ProjectionParams.z > 1) discard; - const float resolution = 512.0; - const float d = 1.0 / resolution; // Defines RADIUS Kernel total_max #include "lenia_generated_kernel.cginc" + const uint2 p = i.uv * WIDTH; float total = 0.0; [unroll(RADIUS)] for (int y = 0; y < RADIUS; y++) { [unroll(RADIUS)] for (int x = 1; x <= RADIUS; x++) { - const float xx = (float)x * d; - const float yy = (float)y * d; - total += value(i.uv, xx, yy) * Kernel[y][x-1]; - total += value(i.uv, -yy, xx) * Kernel[y][x-1]; - total += value(i.uv, -xx, -yy) * Kernel[y][x-1]; - total += value(i.uv, yy, -xx) * Kernel[y][x-1]; + total += value(p, x, y) * Kernel[y][x - 1]; + total += value(p, -y, x) * Kernel[y][x - 1]; + total += value(p, -x, -y) * Kernel[y][x - 1]; + total += value(p, y, -x) * Kernel[y][x - 1]; } } - float old_state = value(i.uv, 0.0, 0.0) ; + float old_state = value(p, 0, 0) ; float count = total / total_max; const float step = _Speed * unity_DeltaTime.x; float state = activation(count) * step + old_state; state = clamp(state, 0, 1); - // kernel visualization: lookup table (SLOW) + // kernel visualization: lookup table (VERY SLOW) // float k = 0; // { - // float2 p = (i.uv - 0.5 ) * resolution; + // float2 p = (i.uv - 0.5 ) * WIDTH; // p = floor(p); // if (p.x > 0 && p.y >= 0) { // k = Kernel[p.y][p.x-1]; @@ -124,7 +125,7 @@ // } // kernel visualisation: real size - // float2 p = (i.uv - 0.5) * resolution; + // float2 p = (i.uv - 0.5) * WIDTH; // float k = kernel(length(p)) * (max(abs(p.x), abs(p.y)) <= RADIUS); // kernel visualisation: fill square diff --git a/Assets/automata/Lenia/lenia_generated_kernel.cginc b/Assets/automata/Lenia/lenia_generated_kernel.cginc index b98614d..55e4e32 100644 --- a/Assets/automata/Lenia/lenia_generated_kernel.cginc +++ b/Assets/automata/Lenia/lenia_generated_kernel.cginc @@ -24,3 +24,5 @@ const half Kernel[21][20] = { {0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, }, }; const float total_max = 356.63977; +// Total texture lookups: 206 * 4 + 1 = 825 +// (lookups multiplied by 0.0 get optimised away by the shader compiler, and this giant table generally only exists at compile time) diff --git a/Assets/automata/Lenia/lenia_out.mat b/Assets/automata/Lenia/lenia_out.mat index b3b4acd..43f0a43 100644 --- a/Assets/automata/Lenia/lenia_out.mat +++ b/Assets/automata/Lenia/lenia_out.mat @@ -8,15 +8,14 @@ Material: m_PrefabInstance: {fileID: 0} m_PrefabAsset: {fileID: 0} m_Name: lenia_out - m_Shader: {fileID: 211, guid: 0000000000000000f000000000000000, type: 0} + m_Shader: {fileID: 4800000, guid: 2e618ea97d394cb79a97f3f69d29807a, type: 3} m_ShaderKeywords: m_LightmapFlags: 0 m_EnableInstancingVariants: 0 m_DoubleSidedGI: 0 m_CustomRenderQueue: -1 stringTagMap: {} - disabledShaderPasses: - - ALWAYS + disabledShaderPasses: [] m_SavedProperties: serializedVersion: 3 m_TexEnvs: @@ -71,6 +70,7 @@ Material: - _DistortionStrength: 1 - _DistortionStrengthScaled: 0 - _DstBlend: 0 + - _Emission: 0.5 - _EmissionEnabled: 0 - _FlipbookMode: 0 - _GlossMapScale: 1 diff --git a/Assets/automata/ca_output.shader b/Assets/automata/ca_output.shader new file mode 100644 index 0000000..0e82a51 --- /dev/null +++ b/Assets/automata/ca_output.shader @@ -0,0 +1,43 @@ +Shader "CrispyPin/CA Output" +{ + Properties + { + [NoScaleOffset] + _MainTex ("RenderTexture", 2D) = "white" {} + _Emission ("Emission", Range(0, 1)) = 0.5 + } + SubShader + { + Tags { "RenderType"="Opaque" } + LOD 200 + + CGPROGRAM + // Physically based Standard lighting model, and enable shadows on all light types + #pragma surface surf Standard fullforwardshadows + + // Use shader model 3.0 target, to get nicer looking lighting + #pragma target 3.0 + + struct Input + { + float2 uv_MainTex; + }; + + sampler2D _MainTex; + half _Emission; + + void surf (Input IN, inout SurfaceOutputStandard o) + { + // don't interfere with simulation camera + if(_ProjectionParams.z < 1) discard; + + // Albedo comes from a texture tinted by color + fixed4 c = tex2D(_MainTex, IN.uv_MainTex); + o.Albedo = c.rgb; + o.Emission = c.rgb * _Emission; + + } + ENDCG + } + FallBack "Diffuse" +} diff --git a/Assets/automata/sim_output.shader.meta b/Assets/automata/ca_output.shader.meta similarity index 80% rename from Assets/automata/sim_output.shader.meta rename to Assets/automata/ca_output.shader.meta index 25a58da..0aa507d 100644 --- a/Assets/automata/sim_output.shader.meta +++ b/Assets/automata/ca_output.shader.meta @@ -1,5 +1,5 @@ fileFormatVersion: 2 -guid: 6bd3377429d62992c8fdea6f71c8bcaa +guid: 2e618ea97d394cb79a97f3f69d29807a ShaderImporter: externalObjects: {} defaultTextures: [] diff --git a/Assets/automata/sim_output.shader b/Assets/automata/sim_output.shader deleted file mode 100644 index 5db8007..0000000 --- a/Assets/automata/sim_output.shader +++ /dev/null @@ -1,61 +0,0 @@ -Shader "CrispyPin/SimOutput" -{ - Properties - { - _MainTex ("Texture", 2D) = "white" {} - } - SubShader - { - Tags { "RenderType"="Opaque" } - LOD 100 - - Pass - { - CGPROGRAM - #pragma vertex vert - #pragma fragment frag - - #include "UnityCG.cginc" - - struct appdata - { - float4 vertex : POSITION; - float2 uv : TEXCOORD0; - UNITY_VERTEX_INPUT_INSTANCE_ID - }; - - struct v2f - { - float2 uv : TEXCOORD0; - float4 vertex : SV_POSITION; - UNITY_VERTEX_OUTPUT_STEREO - - }; - - sampler2D _MainTex; - float4 _MainTex_ST; - - v2f vert (appdata v) - { - v2f o; - UNITY_SETUP_INSTANCE_ID(v); - UNITY_INITIALIZE_OUTPUT(v2f, o); - UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o); - - o.vertex = UnityObjectToClipPos(v.vertex); - o.uv = TRANSFORM_TEX(v.uv, _MainTex); - return o; - } - - fixed4 frag (v2f i) : SV_Target - { - // don't interfere with simulation camera - if(_ProjectionParams.z < 1) discard; - - fixed4 col = tex2D(_MainTex, i.uv); - return col; - } - ENDCG - } - } -} diff --git a/lenia-kernel/src/main.rs b/lenia-kernel/src/main.rs index 6cf0754..9e05e6d 100644 --- a/lenia-kernel/src/main.rs +++ b/lenia-kernel/src/main.rs @@ -10,18 +10,23 @@ fn main() { let radius = args[0].parse().unwrap(); let k_offset = 0.435; let k_sharpness = 28.0; + let precision = 50.0; // for rounding let mut img = image::RgbImage::new(radius * 2 + 1, radius * 2 + 1); let mut total_max = 0.0; + let mut total_lookups = 0; println!("// generated by the rust program"); println!("#define RADIUS {}", radius); println!("const half Kernel[{}][{}] = {{", radius + 1, radius); for y in 0..=radius { print!(" {{"); for x in 1..=radius { - let k = (k(x, y, radius, k_offset, k_sharpness) * 50.0).floor() / 50.0; + let k = (k(x, y, radius, k_offset, k_sharpness) * precision).floor() / precision; total_max += k * 4.0; + if k > 0.0 { + total_lookups += 1; + } print!("{:.2}, ", k); { let pixel = image::Rgb([0, (k * 255.0) as u8, 0]); @@ -36,6 +41,12 @@ fn main() { } println!("}};"); println!("const float total_max = {};", total_max); + println!( + "// Total texture lookups: {} * 4 + 1 = {}", + total_lookups, + total_lookups * 4 + 1 + ); + println!("// (lookups multiplied by 0.0 get optimised away by the shader compiler, and this giant table generally only exists at compile time)"); img.save("kernel.png").unwrap(); }