This commit is contained in:
2024-09-20 20:30:10 +02:00
commit 4fabf1a6fd
29169 changed files with 1706941 additions and 0 deletions

View File

@@ -0,0 +1,68 @@
using UnityEngine.Serialization;
namespace UnityEngine.Rendering
{
/// <summary>
/// The available options for range reduction/tonemapping when outputting to an HDR device.
/// </summary>
[GenerateHLSL]
public enum HDRRangeReduction
{
/// <summary>
/// No range reduction.
/// </summary>
None,
/// <summary>
/// Reinhard tonemapping.
/// </summary>
Reinhard,
/// <summary>
/// BT2390 Hermite spline EETF range reduction.
/// </summary>
BT2390,
/// <summary>
/// ACES tonemapping preset for 1000 nits displays.
/// </summary>
ACES1000Nits,
/// <summary>
/// ACES tonemapping preset for 2000 nits displays.
/// </summary>
ACES2000Nits,
/// <summary>
/// ACES tonemapping preset for 4000 nits displays.
/// </summary>
ACES4000Nits
}
/// <summary>
/// The available options for colorspace when outputting to an HDR device.
/// </summary>
[GenerateHLSL]
public enum HDRColorspace
{
/// <summary>
/// Rec709 color primaries with D65 white point.
/// </summary>
Rec709,
/// <summary>
/// Rec2020 color primaries with D65 white point.
/// </summary>
Rec2020
}
/// <summary>
/// The available options for color encoding when outputting to an HDR device.
/// </summary>
[GenerateHLSL]
public enum HDREncoding
{
/// <summary>
/// Linear OETF.
/// </summary>
Linear = TransferFunction.Linear,
/// <summary>
/// ST 2084 PQ OETF
/// </summary>
PQ = TransferFunction.PQ
}
}

View File

@@ -0,0 +1,30 @@
//
// This file was automatically generated. Please don't edit by hand. Execute Editor command [ Edit > Rendering > Generate Shader Includes ] instead
//
#ifndef HDROUTPUTDEFINES_CS_HLSL
#define HDROUTPUTDEFINES_CS_HLSL
//
// UnityEngine.Rendering.HDRRangeReduction: static fields
//
#define HDRRANGEREDUCTION_NONE (0)
#define HDRRANGEREDUCTION_REINHARD (1)
#define HDRRANGEREDUCTION_BT2390 (2)
#define HDRRANGEREDUCTION_ACES1000NITS (3)
#define HDRRANGEREDUCTION_ACES2000NITS (4)
#define HDRRANGEREDUCTION_ACES4000NITS (5)
//
// UnityEngine.Rendering.HDREncoding: static fields
//
#define HDRENCODING_LINEAR (3)
#define HDRENCODING_PQ (2)
//
// UnityEngine.Rendering.HDRColorspace: static fields
//
#define HDRCOLORSPACE_REC709 (0)
#define HDRCOLORSPACE_REC2020 (1)
#endif

View File

@@ -0,0 +1,7 @@
fileFormatVersion: 2
guid: 413c288ef05043d4285cf0407f8188b9
ShaderIncludeImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: d568fc1905aef314a9f36afd557bc4f1
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 8d87e57545548474c86d357b2d08ee30
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,192 @@
#if UNITY_EDITOR
using UnityEditor;
#endif
using System;
namespace UnityEngine.Rendering
{
/// <summary>
/// Data-Driven Lens Flare can be added on any gameobject
/// </summary>
[ExecuteAlways]
[AddComponentMenu("Rendering/Lens Flare (SRP)")]
public sealed class LensFlareComponentSRP : MonoBehaviour
{
[SerializeField]
private LensFlareDataSRP m_LensFlareData = null;
/// <summary>
/// Lens flare asset used on this component
/// </summary>
public LensFlareDataSRP lensFlareData
{
get
{
return m_LensFlareData;
}
set
{
m_LensFlareData = value;
OnValidate();
}
}
/// <summary>
/// Intensity
/// </summary>
[Min(0.0f)]
public float intensity = 1.0f;
/// <summary>
/// Distance used to scale the Distance Attenuation Curve
/// </summary>
[Min(1e-5f)]
public float maxAttenuationDistance = 100.0f;
/// <summary>
/// Distance used to scale the Scale Attenuation Curve
/// </summary>
[Min(1e-5f)]
public float maxAttenuationScale = 100.0f;
/// <summary>
/// Attenuation by distance
/// </summary>
public AnimationCurve distanceAttenuationCurve = new AnimationCurve(new Keyframe(0.0f, 1.0f), new Keyframe(1.0f, 0.0f));
/// <summary>
/// Scale by distance, use the same distance as distanceAttenuationCurve
/// </summary>
public AnimationCurve scaleByDistanceCurve = new AnimationCurve(new Keyframe(0.0f, 1.0f), new Keyframe(1.0f, 0.0f));
/// <summary>
/// If component attached to a light, attenuation the lens flare per light type
/// </summary>
public bool attenuationByLightShape = true;
/// <summary>
/// Attenuation used radially, which allow for instance to enable flare only on the edge of the screen
/// </summary>
public AnimationCurve radialScreenAttenuationCurve = new AnimationCurve(new Keyframe(0.0f, 1.0f), new Keyframe(1.0f, 1.0f));
/// <summary>
/// Enable Occlusion feature
/// </summary>
public bool useOcclusion = false;
/// <summary>
/// Radius around the light used to occlude the flare (value in world space)
/// </summary>
[Min(0.0f)]
public float occlusionRadius = 0.1f;
/// <summary>
/// Enable Occlusion using Background Cloud (for instance: CloudLayer)
/// </summary>
public bool useBackgroundCloudOcclusion = false;
/// <summary>
/// Random Samples Count used inside the disk with 'occlusionRadius'
/// </summary>
[Range(1, 64)]
public uint sampleCount = 32;
/// <summary>
/// Z Occlusion Offset allow us to offset the plane where the disc of occlusion is place (closer to camera), value on world space.
/// Useful for instance to sample occlusion outside a light bulb if we place a flare inside the light bulb
/// </summary>
public float occlusionOffset = 0.05f;
/// <summary>
/// Global Scale
/// </summary>
[Min(0.0f)]
public float scale = 1.0f;
/// <summary>
/// If allowOffScreen is true then If the lens flare is outside the screen we still emit the flare on screen
/// </summary>
public bool allowOffScreen = false;
/// <summary>
/// If volumetricCloudOcclusion is true then use the volumetric cloud (on HDRP only) for the occlusion
/// </summary>
public bool volumetricCloudOcclusion = false;
/// Our default celestial body will have an angular radius of 3.3 degrees. This is an arbitrary number, but must be kept constant
/// so the occlusion radius for direct lights is consistent regardless of near / far clip plane configuration.
private static float sCelestialAngularRadius = 3.3f * Mathf.PI / 180.0f;
/// <summary>
/// OcclusionRemapCurve allow the occlusion [from 0 to 1] to be remap with any desired shape.
/// </summary>
public TextureCurve occlusionRemapCurve = new TextureCurve(AnimationCurve.Linear(0.0f, 0.0f, 1.0f, 1.0f), 1.0f, false, new Vector2(0.0f, 1.0f));
/// <summary>
/// Retrieves the projected occlusion radius from a particular celestial in the infinity plane with an angular radius.
/// This is used for directional lights which require to have consistent occlusion radius regardless of the near/farplane configuration.
/// </summary>
/// <param name="mainCam">The camera utilized to calculate the occlusion radius</param>
/// <returns>The value, in world units, of the occlusion angular radius.</returns>
public float celestialProjectedOcclusionRadius(Camera mainCam)
{
float projectedRadius = (float)Math.Tan(sCelestialAngularRadius) * mainCam.farClipPlane;
return occlusionRadius * projectedRadius;
}
/// <summary>
/// Add or remove the lens flare to the queue of PostProcess
/// </summary>
void OnEnable()
{
if (lensFlareData)
LensFlareCommonSRP.Instance.AddData(this);
else
LensFlareCommonSRP.Instance.RemoveData(this);
}
/// <summary>
/// Remove the lens flare from the queue of PostProcess
/// </summary>
void OnDisable()
{
LensFlareCommonSRP.Instance.RemoveData(this);
}
/// <summary>
/// Add or remove the lens flare from the queue of PostProcess
/// </summary>
void OnValidate()
{
if (isActiveAndEnabled && lensFlareData != null)
{
LensFlareCommonSRP.Instance.AddData(this);
}
else
{
LensFlareCommonSRP.Instance.RemoveData(this);
}
}
#if UNITY_EDITOR
private float sDebugClippingSafePercentage = 0.9f; //for debug gizmo, only push 90% further so we avoid clipping of debug lines.
void OnDrawGizmosSelected()
{
Camera mainCam = Camera.current;
if (mainCam != null && useOcclusion)
{
Vector3 positionWS;
float adjustedOcclusionRadius = occlusionRadius;
Light light = GetComponent<Light>();
if (light != null && light.type == LightType.Directional)
{
positionWS = -transform.forward * (mainCam.farClipPlane * sDebugClippingSafePercentage) + mainCam.transform.position;
adjustedOcclusionRadius = celestialProjectedOcclusionRadius(mainCam);
}
else
{
positionWS = transform.position;
}
Color previousH = Handles.color;
Color previousG = Gizmos.color;
Handles.color = Color.red;
Gizmos.color = Color.red;
Vector3 dir = (mainCam.transform.position - positionWS).normalized;
Handles.DrawWireDisc(positionWS + dir * occlusionOffset, dir, adjustedOcclusionRadius, 1.0f);
Gizmos.DrawWireSphere(positionWS, occlusionOffset);
Gizmos.color = previousG;
Handles.color = previousH;
}
}
#endif
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: cc4d76f733087744991913c9d19d5274
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {fileID: 2800000, guid: d7b90718ad2a7af4aab8546a0897d5db, type: 3}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,319 @@
using UnityEngine.Serialization;
namespace UnityEngine.Rendering
{
/// <summary>
/// SRPLensFlareBlendMode defined the available blend mode for each LensFlareElement
/// </summary>
[System.Serializable]
public enum SRPLensFlareBlendMode
{
/// <summary>
/// Additive: Blend One One
/// </summary>
Additive,
/// <summary>
/// Screen:
/// Blend One OneMinusSrcColor
/// </summary>
Screen,
/// <summary>
/// Premultiply:
/// Blend One OneMinusSrcAlpha
/// ColorMask RGB
/// </summary>
Premultiply,
/// <summary>
/// Lerp: Blend SrcAlpha OneMinusSrcAlpha
/// </summary>
Lerp
}
/// <summary>
/// SRPLensFlareDistribution defined how we spread the flare element when count > 1
/// </summary>
[System.Serializable]
public enum SRPLensFlareDistribution
{
/// <summary>
/// Uniformly spread
/// </summary>
Uniform,
/// <summary>
/// Controlled with curved
/// </summary>
Curve,
/// <summary>
/// Random distribution
/// </summary>
Random
}
/// <summary>
/// SRPLensFlareType which can be an image of a procedural shape
/// If change order or add new member, need to update preview
/// shader: LensFlareDataDrivenPreview.shader
/// </summary>
[System.Serializable]
public enum SRPLensFlareType
{
/// <summary>
/// Image from a file or a RenderTexture
/// </summary>
Image,
/// <summary>
/// Procedural Circle
/// </summary>
Circle,
/// <summary>
/// Polygon
/// </summary>
Polygon
}
/// <summary>
/// LensFlareDataElementSRP defines collection of parameters describing the behavior a Lens Flare Element.
/// </summary>
[System.Serializable]
public sealed class LensFlareDataElementSRP
{
/// <summary> Initialize default values </summary>
public LensFlareDataElementSRP()
{
visible = true;
localIntensity = 1.0f;
position = 0.0f;
positionOffset = new Vector2(0.0f, 0.0f);
angularOffset = 0.0f;
translationScale = new Vector2(1.0f, 1.0f);
lensFlareTexture = null;
uniformScale = 1.0f;
sizeXY = Vector2.one;
allowMultipleElement = false;
count = 5;
rotation = 0.0f;
tint = new Color(1.0f, 1.0f, 1.0f, 0.5f);
blendMode = SRPLensFlareBlendMode.Additive;
autoRotate = false;
isFoldOpened = true;
flareType = SRPLensFlareType.Circle;
distribution = SRPLensFlareDistribution.Uniform;
lengthSpread = 1f;
colorGradient = new Gradient();
colorGradient.SetKeys(new GradientColorKey[] { new GradientColorKey(Color.white, 0.0f), new GradientColorKey(Color.white, 1.0f) },
new GradientAlphaKey[] { new GradientAlphaKey(1.0f, 0.0f), new GradientAlphaKey(1.0f, 1.0f) });
positionCurve = new AnimationCurve(new Keyframe(0.0f, 0.0f, 1.0f, 1.0f), new Keyframe(1.0f, 1.0f, 1.0f, -1.0f));
scaleCurve = new AnimationCurve(new Keyframe(0.0f, 1.0f), new Keyframe(1.0f, 1.0f));
uniformAngleCurve = new AnimationCurve(new Keyframe(0.0f, 0.0f), new Keyframe(1.0f, 0.0f));
// Random
seed = 0;
intensityVariation = 0.75f;
positionVariation = new Vector2(1.0f, 0.0f);
scaleVariation = 1.0f;
rotationVariation = 180.0f;
// Distortion
enableRadialDistortion = false;
targetSizeDistortion = Vector2.one;
distortionCurve = new AnimationCurve(new Keyframe(0.0f, 0.0f, 1.0f, 1.0f), new Keyframe(1.0f, 1.0f, 1.0f, -1.0f));
distortionRelativeToCenter = false;
// Parameters for Procedural
fallOff = 1.0f;
edgeOffset = 0.1f;
sdfRoundness = 0.0f;
sideCount = 6;
inverseSDF = false;
}
/// <summary> Visibility checker for current element </summary>
public bool visible;
/// <summary> Position </summary>
public float position;
/// <summary> Position offset </summary>
public Vector2 positionOffset;
/// <summary> Angular offset </summary>
public float angularOffset;
/// <summary> Translation Scale </summary>
public Vector2 translationScale;
[Min(0), SerializeField, FormerlySerializedAs("localIntensity")]
float m_LocalIntensity;
/// <summary> Intensity of this element </summary>
public float localIntensity
{
get => m_LocalIntensity;
set => m_LocalIntensity = Mathf.Max(0, value);
}
/// <summary> Texture used to for this Lens Flare Element </summary>
public Texture lensFlareTexture;
/// <summary> Uniform scale applied </summary>
public float uniformScale;
/// <summary> Scale size on each dimension </summary>
public Vector2 sizeXY;
/// <summary> Enable multiple elements </summary>
public bool allowMultipleElement;
[Min(1), SerializeField, FormerlySerializedAs("count")]
int m_Count;
/// <summary> Element can be repeated 'count' times </summary>
public int count
{
get => m_Count;
set => m_Count = Mathf.Max(1, value);
}
/// <summary> Preserve Aspect Ratio </summary>
public bool preserveAspectRatio;
/// <summary> Local rotation of the texture </summary>
public float rotation;
/// <summary> Tint of the texture can be modulated by the light we are attached to </summary>
public Color tint;
/// <summary> Blend mode used </summary>
public SRPLensFlareBlendMode blendMode;
/// <summary> Rotate the texture relative to the angle on the screen (the rotation will be added to the parameter 'rotation') </summary>
public bool autoRotate;
/// <summary> FlareType used </summary>
public SRPLensFlareType flareType;
/// <summary> Modulate by light color if the asset is used in a 'SRP Lens Flare Source Override' </summary>
public bool modulateByLightColor;
#pragma warning disable 0414 // never used (editor state)
/// <summary> Internal value use to store the state of minimized or maximized LensFlareElement </summary>
[SerializeField]
bool isFoldOpened;
#pragma warning restore 0414
/// <summary> SRPLensFlareDistribution defined how we spread the flare element when count > 1 </summary>
public SRPLensFlareDistribution distribution;
/// <summary> Length to spread the distribution of flares, spread start at 'starting position' </summary>
public float lengthSpread;
/// <summary> Curve describing how to place flares distribution (Used only for Uniform and Curve 'distribution') </summary>
public AnimationCurve positionCurve;
/// <summary> Curve describing how to scale flares distribution (Used only for Uniform and Curve 'distribution') </summary>
public AnimationCurve scaleCurve;
/// <summary> Seed used to seed randomness </summary>
public int seed;
/// <summary> Colors used uniformly for Uniform or Curve Distribution and Random when the distribution is 'Random'. </summary>
public Gradient colorGradient;
[Range(0, 1), SerializeField, FormerlySerializedAs("intensityVariation")]
float m_IntensityVariation;
/// <summary> Scale factor applied on the variation of the intensities. </summary>
public float intensityVariation
{
get => m_IntensityVariation;
set => m_IntensityVariation = Mathf.Max(0, value);
}
/// <summary> Scale factor applied on the variation of the positions. </summary>
public Vector2 positionVariation;
/// <summary> Coefficient applied on the variation of the scale (relative to the current scale). </summary>
public float scaleVariation;
/// <summary> Scale factor applied on the variation of the rotation (relative to the current rotation or auto-rotate). </summary>
public float rotationVariation;
/// <summary> True to use or not the radial distortion. </summary>
public bool enableRadialDistortion;
/// <summary> Target size used on the edge of the screen. </summary>
public Vector2 targetSizeDistortion;
/// <summary> Curve blending from screen center to the edges of the screen. </summary>
public AnimationCurve distortionCurve;
/// <summary> If true the distortion is relative to center of the screen otherwise relative to lensFlare source screen position. </summary>
public bool distortionRelativeToCenter;
[Range(0, 1), SerializeField, FormerlySerializedAs("fallOff")]
float m_FallOff;
/// <summary> Fall of the gradient used for the Procedural Flare. </summary>
public float fallOff
{
get => m_FallOff;
set => m_FallOff = Mathf.Clamp01(value);
}
[Range(0, 1), SerializeField, FormerlySerializedAs("edgeOffset")]
float m_EdgeOffset;
/// <summary> Gradient Offset used for the Procedural Flare. </summary>
public float edgeOffset
{
get => m_EdgeOffset;
set => m_EdgeOffset = Mathf.Clamp01(value);
}
[Min(3), SerializeField, FormerlySerializedAs("sideCount")]
int m_SideCount;
/// <summary> Side count of the regular polygon generated. </summary>
public int sideCount
{
get => m_SideCount;
set => m_SideCount = Mathf.Max(3, value);
}
[Range(0, 1), SerializeField, FormerlySerializedAs("sdfRoundness")]
float m_SdfRoundness;
/// <summary> Roundness of the polygon flare (0: Sharp Polygon, 1: Circle). </summary>
public float sdfRoundness
{
get => m_SdfRoundness;
set => m_SdfRoundness = Mathf.Clamp01(value);
}
/// <summary> Inverse the gradient direction. </summary>
public bool inverseSDF;
/// <summary> Uniform angle (in degrees) used with multiple element enabled with Uniform distribution. </summary>
public float uniformAngle;
/// <summary> Uniform angle (remap from -180.0f to 180.0f) used with multiple element enabled with Curve distribution. </summary>
public AnimationCurve uniformAngleCurve;
}
/// <summary> LensFlareDataSRP defines a Lens Flare with a set of LensFlareDataElementSRP </summary>
[System.Serializable]
public sealed class LensFlareDataSRP : ScriptableObject
{
/// <summary> Initialize default value </summary>
public LensFlareDataSRP()
{
elements = null;
}
/// <summary> List of LensFlareDataElementSRP </summary>
public LensFlareDataElementSRP[] elements;
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: 877e1507ae8d81c40bf1b94c1a86036f
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {fileID: 2800000, guid: d7b90718ad2a7af4aab8546a0897d5db, type: 3}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,8 @@
fileFormatVersion: 2
guid: 86304ecf756376e4b8ecb6f9d6111b89
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,233 @@
///
/// FidelityFX Super Resolution (FSR) Common Shader Code
///
/// This file provides shader helper functions which are intended to be used with the C# helper functions in
/// FSRUtils.cs. The main purpose of this file is to simplify the usage of the external FSR shader functions within
/// the Unity shader environment.
///
/// Usage:
/// - Call SetXConstants function on a command buffer in C#
/// - Launch shader (via draw or dispatch)
/// - Call associated ApplyX function provided by this file
///
/// The following preprocessor parameters MUST be defined before including this file:
/// - FSR_INPUT_TEXTURE
/// - The texture to use as input for the FSR passes
/// - FSR_INPUT_SAMPLER
/// - The sample to use for FSR_INPUT_TEXTURE
///
/// The following preprocessor parameters are optional and MAY be defined before including this file:
/// - FSR_ENABLE_ALPHA
/// - Enables alpha pass-through functionality for the RCAS pass
///
// Ensure that the shader including this file is targeting an adequate shader level
#if SHADER_TARGET < 45
#error FidelityFX Super Resolution requires a shader target of 4.5 or greater
#endif
// Indicate that we'll be using the HLSL implementation of FSR
#define A_GPU 1
#define A_HLSL 1
// Enable either the 16-bit or the 32-bit implementation of FSR depending on platform support
// Note: There are known issues relating to the math approximation functions on some DX11 drivers when FP16 is used.
// Due to this issue, we currently prevent the 16-bit implementation from being used when DX11 is detected.
#if REAL_IS_HALF && !defined(SHADER_API_D3D11)
#define A_HALF
#define FSR_EASU_H 1
#define FSR_RCAS_H 1
#else
#define FSR_EASU_F 1
#define FSR_RCAS_F 1
#endif
// Enable the RCAS passthrough alpha feature when the alpha channel is in use
#if FSR_ENABLE_ALPHA
#define FSR_RCAS_PASSTHROUGH_ALPHA 1
#endif
// Include the external FSR HLSL files
#include "Packages/com.unity.render-pipelines.core/Runtime/PostProcessing/Shaders/ffx/ffx_a.hlsl"
#include "Packages/com.unity.render-pipelines.core/Runtime/PostProcessing/Shaders/ffx/ffx_fsr1.hlsl"
/// Bindings for FSR EASU constants provided by the CPU
///
/// Helper functions that set these constants can be found in FSRUtils
float4 _FsrEasuConstants0;
float4 _FsrEasuConstants1;
float4 _FsrEasuConstants2;
float4 _FsrEasuConstants3;
// Unity doesn't currently have a way to bind uint4 types so we reinterpret float4 as uint4 using macros below
#define FSR_EASU_CONSTANTS_0 asuint(_FsrEasuConstants0)
#define FSR_EASU_CONSTANTS_1 asuint(_FsrEasuConstants1)
#define FSR_EASU_CONSTANTS_2 asuint(_FsrEasuConstants2)
#define FSR_EASU_CONSTANTS_3 asuint(_FsrEasuConstants3)
/// EASU glue functions
///
/// These are used by the EASU implementation to access texture data
#if FSR_EASU_H
AH4 FsrEasuRH(AF2 p)
{
return (AH4)GATHER_RED_TEXTURE2D_X(FSR_INPUT_TEXTURE, FSR_INPUT_SAMPLER, p);
}
AH4 FsrEasuGH(AF2 p)
{
return (AH4)GATHER_GREEN_TEXTURE2D_X(FSR_INPUT_TEXTURE, FSR_INPUT_SAMPLER, p);
}
AH4 FsrEasuBH(AF2 p)
{
return (AH4)GATHER_BLUE_TEXTURE2D_X(FSR_INPUT_TEXTURE, FSR_INPUT_SAMPLER, p);
}
void FsrEasuProcessInput(inout AH4 r, inout AH4 g, inout AH4 b)
{
// HDRP only. URP use an upscaling pass before EASU pass where this operation can be done.
#ifdef HDR_INPUT
AH3 s0 = FastTonemap(AH3(r.x, g.x, b.x) * FSR_EASU_ONE_OVER_PAPER_WHITE);
AH3 s1 = FastTonemap(AH3(r.y, g.y, b.y) * FSR_EASU_ONE_OVER_PAPER_WHITE);
AH3 s2 = FastTonemap(AH3(r.z, g.z, b.z) * FSR_EASU_ONE_OVER_PAPER_WHITE);
AH3 s3 = FastTonemap(AH3(r.w, g.w, b.w) * FSR_EASU_ONE_OVER_PAPER_WHITE);
r = AH4(s0.r, s1.r, s2.r, s3.r);
g = AH4(s0.g, s1.g, s2.g, s3.g);
b = AH4(s0.b, s1.b, s2.b, s3.b);
#endif
}
#else
AF4 FsrEasuRF(AF2 p)
{
return GATHER_RED_TEXTURE2D_X(FSR_INPUT_TEXTURE, FSR_INPUT_SAMPLER, p);
}
AF4 FsrEasuGF(AF2 p)
{
return GATHER_GREEN_TEXTURE2D_X(FSR_INPUT_TEXTURE, FSR_INPUT_SAMPLER, p);
}
AF4 FsrEasuBF(AF2 p)
{
return GATHER_BLUE_TEXTURE2D_X(FSR_INPUT_TEXTURE, FSR_INPUT_SAMPLER, p);
}
void FsrEasuProcessInput(inout AF4 r, inout AF4 g, inout AF4 b)
{
// HDRP only. URP use an upscaling pass before EASU pass where this operation can be done.
#ifdef HDR_INPUT
#ifndef FSR_EASU_ONE_OVER_PAPER_WHITE
#error missing definition of FSR_EASU_ONE_OVER_PAPER_WHITE
#endif
float3 s0 = FastTonemap(float3(r.x, g.x, b.x) * FSR_EASU_ONE_OVER_PAPER_WHITE);
float3 s1 = FastTonemap(float3(r.y, g.y, b.y) * FSR_EASU_ONE_OVER_PAPER_WHITE);
float3 s2 = FastTonemap(float3(r.z, g.z, b.z) * FSR_EASU_ONE_OVER_PAPER_WHITE);
float3 s3 = FastTonemap(float3(r.w, g.w, b.w) * FSR_EASU_ONE_OVER_PAPER_WHITE);
r = float4(s0.r, s1.r, s2.r, s3.r);
g = float4(s0.g, s1.g, s2.g, s3.g);
b = float4(s0.b, s1.b, s2.b, s3.b);
#endif
}
#endif
/// Applies FidelityFX Super Resolution Edge Adaptive Spatial Upsampling at the provided pixel position
///
/// The source texture must be provided before this file is included via the FSR_INPUT_TEXTURE preprocessor symbol
/// Ex: #define FSR_INPUT_TEXTURE _SourceTex
///
/// A valid sampler must also be provided via the FSR_INPUT_SAMPLER preprocessor symbol
/// Ex: #define FSR_INPUT_SAMPLER sampler_LinearClamp
///
/// The color data stored in the source texture should be in gamma 2.0 color space
real3 ApplyEASU(uint2 positionSS)
{
#if FSR_EASU_H
// Execute 16-bit EASU
AH3 color;
FsrEasuH(
#else
// Execute 32-bit EASU
AF3 color;
FsrEasuF(
#endif
color, positionSS, FSR_EASU_CONSTANTS_0, FSR_EASU_CONSTANTS_1, FSR_EASU_CONSTANTS_2, FSR_EASU_CONSTANTS_3
);
return color;
}
/// Bindings for FSR RCAS constants provided by the CPU
///
/// Helper functions that set these constants can be found in FSRUtils
float4 _FsrRcasConstants;
// Unity doesn't currently have a way to bind uint4 types so we reinterpret float4 as uint4 using macros below
#define FSR_RCAS_CONSTANTS asuint(_FsrRcasConstants)
/// RCAS glue functions
///
/// These are used by the RCAS implementation to access texture data and perform color space conversion if necessary
#if FSR_RCAS_H
AH4 FsrRcasLoadH(ASW2 p)
{
return (AH4)LOAD_TEXTURE2D_X(FSR_INPUT_TEXTURE, p);
}
void FsrRcasInputH(inout AH1 r, inout AH1 g, inout AH1 b)
{
// No conversion to linear necessary since it's always performed during EASU output
}
#else
AF4 FsrRcasLoadF(ASU2 p)
{
return LOAD_TEXTURE2D_X(FSR_INPUT_TEXTURE, p);
}
void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b)
{
// No conversion to linear necessary since it's always performed during EASU output
}
#endif
/// Applies FidelityFX Super Resolution Robust Contrast Adaptive Sharpening at the provided pixel position
///
/// The source texture must be provided before this file is included via the FSR_INPUT_TEXTURE preprocessor symbol
/// Ex: #define FSR_INPUT_TEXTURE _SourceTex
///
/// A valid sampler must also be provided via the FSR_INPUT_SAMPLER preprocessor symbol
/// Ex: #define FSR_INPUT_SAMPLER sampler_LinearClamp
///
/// RCAS supports an optional alpha passthrough that can be enabled via the FSR_ENABLE_ALPHA preprocessor symbol
/// When passthrough is enabled, this function will return the input texture's alpha channel unmodified
///
/// The color data stored in the source texture should be in linear color space
#if FSR_ENABLE_ALPHA
real4 ApplyRCAS(uint2 positionSS)
#else
real3 ApplyRCAS(uint2 positionSS)
#endif
{
#if FSR_RCAS_H
// Execute 16-bit RCAS
#if FSR_ENABLE_ALPHA
AH4 color;
#else
AH3 color;
#endif
FsrRcasH(
#else
// Execute 32-bit RCAS
#if FSR_ENABLE_ALPHA
AF4 color;
#else
AF3 color;
#endif
FsrRcasF(
#endif
color.r,
color.g,
color.b,
#if FSR_ENABLE_ALPHA
color.a,
#endif
positionSS,
FSR_RCAS_CONSTANTS
);
return color;
}

View File

@@ -0,0 +1,7 @@
fileFormatVersion: 2
guid: ee30e890854422d4c85fb895d34e2a53
ShaderIncludeImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,363 @@
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Filtering.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Random.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Sampling/Sampling.hlsl"
#if SHADER_API_GLES
struct AttributesLensFlare
{
float4 positionCS : POSITION;
float2 uv : TEXCOORD0;
#ifndef FLARE_PREVIEW
UNITY_VERTEX_INPUT_INSTANCE_ID
#endif
};
#else
struct AttributesLensFlare
{
uint vertexID : SV_VertexID;
#ifndef FLARE_PREVIEW
UNITY_VERTEX_INPUT_INSTANCE_ID
#endif
};
#endif
struct VaryingsLensFlare
{
float4 positionCS : SV_POSITION;
float2 texcoord : TEXCOORD0;
#if defined(FLARE_HAS_OCCLUSION) || defined(FLARE_COMPUTE_OCCLUSION)
float occlusion : TEXCOORD1;
#endif
#ifndef FLARE_PREVIEW
UNITY_VERTEX_OUTPUT_STEREO
#endif
};
TEXTURE2D(_FlareTex);
SAMPLER(sampler_FlareTex);
TEXTURE2D(_FlareOcclusionRemapTex);
SAMPLER(sampler_FlareOcclusionRemapTex);
#if defined(FLARE_HAS_OCCLUSION)
TEXTURE2D_ARRAY(_FlareOcclusionTex);
SAMPLER(sampler_FlareOcclusionTex);
#endif
#ifdef HDRP_FLARE
#if defined(FLARE_CLOUD_BACKGROUND_OCCLUSION)
TEXTURE2D_X(_FlareCloudOpacity);
SAMPLER(sampler_FlareCloudOpacity);
#endif
#if defined(FLARE_VOLUMETRIC_CLOUD_OCCLUSION)
TEXTURE2D_X(_FlareSunOcclusionTex);
SAMPLER(sampler_FlareSunOcclusionTex);
#endif
#endif
float4 _FlareColorValue;
float4 _FlareData0; // x: localCos0, y: localSin0, zw: PositionOffsetXY
float4 _FlareData1; // x: OcclusionRadius, y: OcclusionSampleCount, z: ScreenPosZ, w: ScreenRatio
float4 _FlareData2; // xy: ScreenPos, zw: FlareSize
float4 _FlareData3; // x: Allow Offscreen, y: Edge Offset, z: Falloff, w: invSideCount
float4 _FlareData4; // x: SDF Roundness, y: Poly Radius, z: PolyParam0, w: PolyParam1
#ifdef FLARE_PREVIEW
float4 _FlarePreviewData;
#define _ScreenSize _FlarePreviewData.xy;
#define _FlareScreenRatio _FlarePreviewData.z;
#endif
float4 _FlareOcclusionIndex;
#define _FlareColor _FlareColorValue
#define _LocalCos0 _FlareData0.x
#define _LocalSin0 _FlareData0.y
#define _PositionTranslate _FlareData0.zw
#define _OcclusionRadius _FlareData1.x
#define _OcclusionSampleCount _FlareData1.y
#define _ScreenPosZ _FlareData1.z
#ifndef _FlareScreenRatio
#define _FlareScreenRatio _FlareData1.w
#endif
#define _ScreenPos _FlareData2.xy
#define _FlareSize _FlareData2.zw
#define _OcclusionOffscreen _FlareData3.x
#define _FlareEdgeOffset _FlareData3.y
#define _FlareFalloff _FlareData3.z
#define _FlareShapeInvSide _FlareData3.z
#define _FlareSDFRoundness _FlareData4.x
#define _FlareSDFPolyRadius _FlareData4.y
#define _FlareSDFPolyParam0 _FlareData4.z
#define _FlareSDFPolyParam1 _FlareData4.w
float2 Rotate(float2 v, float cos0, float sin0)
{
return float2(v.x * cos0 - v.y * sin0,
v.x * sin0 + v.y * cos0);
}
#if defined(FLARE_COMPUTE_OCCLUSION) || defined(FLARE_OPENGL3_OR_OPENGLCORE)
float GetLinearDepthValue(float2 uv)
{
#if defined(HDRP_FLARE) || defined(FLARE_PREVIEW)
float depth = LOAD_TEXTURE2D_X_LOD(_CameraDepthTexture, uint2(uv * _ScreenSize.xy), 0).x;
#else
float depth = LOAD_TEXTURE2D_X_LOD(_CameraDepthTexture, uint2(uv * GetScaledScreenParams().xy), 0).x;
#endif
return LinearEyeDepth(depth, _ZBufferParams);
}
float GetOcclusion(float ratio)
{
if (_OcclusionSampleCount == 0.0f)
return 1.0f;
float contrib = 0.0f;
float sample_Contrib = 1.0f / _OcclusionSampleCount;
float2 ratioScale = float2(1.0f / ratio, 1.0);
for (uint i = 0; i < (uint)_OcclusionSampleCount; i++)
{
float2 dir = _OcclusionRadius * SampleDiskUniform(Hash(2 * i + 0), Hash(2 * i + 1));
float2 pos0 = _ScreenPos.xy + dir;
float2 pos = pos0 * 0.5f + 0.5f;
#ifdef UNITY_UV_STARTS_AT_TOP
pos.y = 1.0f - pos.y;
#endif
if (all(pos >= 0) && all(pos <= 1))
{
float depth0 = GetLinearDepthValue(pos);
if (depth0 > _ScreenPosZ)
{
float occlusionValue = 1.0f;
#ifdef HDRP_FLARE
#if defined(FLARE_CLOUD_BACKGROUND_OCCLUSION)
float cloudOpacity = LOAD_TEXTURE2D_X_LOD(_FlareCloudOpacity, uint2(pos.xy * _ScreenSize.xy), 0).x;
occlusionValue *= LOAD_TEXTURE2D_X(_FlareCloudOpacity, uint2(pos * _ScreenParams.xy)).x;
#endif
#if defined(FLARE_VOLUMETRIC_CLOUD_OCCLUSION)
float volumetricCloudOcclusion = SAMPLE_TEXTURE2D_X_LOD(_FlareSunOcclusionTex, sampler_FlareSunOcclusionTex, pos, 0).w;
occlusionValue *= saturate(volumetricCloudOcclusion);
#endif
#endif
contrib += sample_Contrib * occlusionValue;
}
}
else if (_OcclusionOffscreen > 0.0f)
{
contrib += sample_Contrib;
}
}
contrib = SAMPLE_TEXTURE2D_LOD(_FlareOcclusionRemapTex, sampler_FlareOcclusionRemapTex, float2(saturate(contrib), 0.0f), 0).x;
contrib = saturate(contrib);
return contrib;
}
#endif
#if defined(FLARE_COMPUTE_OCCLUSION)
VaryingsLensFlare vertOcclusion(AttributesLensFlare input, uint instanceID : SV_InstanceID)
{
VaryingsLensFlare output;
UNITY_SETUP_INSTANCE_ID(input);
UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(output);
#if defined(HDRP_FLARE) || defined(FLARE_PREVIEW)
float screenRatio = _FlareScreenRatio;
#else
float2 screenParam = GetScaledScreenParams().xy;
float screenRatio = screenParam.y / screenParam.x;
#endif
float2 quadPos = 2.0f * GetQuadVertexPosition(input.vertexID).xy - 1.0f;
float2 uv = GetQuadTexCoord(input.vertexID);
uv.x = 1.0f - uv.x;
output.positionCS.xy = quadPos;
output.texcoord.xy = uv;
output.positionCS.z = 1.0f;
output.positionCS.w = 1.0f;
float occlusion = GetOcclusion(screenRatio);
if (_OcclusionOffscreen < 0.0f && // No lens flare off screen
(any(_ScreenPos.xy < -1) || any(_ScreenPos.xy >= 1)))
occlusion = 0.0f;
output.occlusion = occlusion;
return output;
}
float4 fragOcclusion(VaryingsLensFlare input) : SV_Target
{
UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(input);
return float4(input.occlusion.xxx, 1.0f);
}
#else
VaryingsLensFlare vert(AttributesLensFlare input, uint instanceID : SV_InstanceID)
{
VaryingsLensFlare output;
#ifndef FLARE_PREVIEW
UNITY_SETUP_INSTANCE_ID(input);
UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(output);
#endif
#if defined(HDRP_FLARE) || defined(FLARE_PREVIEW)
float screenRatio = _FlareScreenRatio;
#else
float2 screenParam = GetScaledScreenParams().xy;
float screenRatio = screenParam.y / screenParam.x;
#endif
#if SHADER_API_GLES
float4 posPreScale = input.positionCS;
float2 uv = input.uv;
#else
float4 posPreScale = float4(2.0f, 2.0f, 1.0f, 1.0f) * GetQuadVertexPosition(input.vertexID) - float4(1.0f, 1.0f, 0.0f, 0.0);
float2 uv = GetQuadTexCoord(input.vertexID);
uv.x = 1.0f - uv.x;
#endif
output.texcoord.xy = uv;
posPreScale.xy *= _FlareSize;
float2 local = Rotate(posPreScale.xy, _LocalCos0, _LocalSin0);
local.x *= screenRatio;
output.positionCS.xy = local + _ScreenPos + _PositionTranslate;
output.positionCS.z = 1.0f;
output.positionCS.w = 1.0f;
#if defined(FLARE_HAS_OCCLUSION)
float occlusion;
if (_OcclusionOffscreen < 0.0f && // No lens flare off screen
(any(_ScreenPos.xy < -1) || any(_ScreenPos.xy > 1)))
{
occlusion = 0.0f;
}
else
{
#if defined(FLARE_OPENGL3_OR_OPENGLCORE)
#if defined(HDRP_FLARE) || defined(FLARE_PREVIEW)
float screenRatio = _FlareScreenRatio;
#else
float2 screenParam = GetScaledScreenParams().xy;
float screenRatio = screenParam.y / screenParam.x;
#endif
occlusion = GetOcclusion(screenRatio);
#else
occlusion = LOAD_TEXTURE2D_ARRAY_LOD(_FlareOcclusionTex, uint2(_FlareOcclusionIndex.x, 0), unity_StereoEyeIndex, 0).x;
#endif
}
output.occlusion = occlusion;
#endif
return output;
}
#endif
float InverseGradient(float x)
{
// Do *not* simplify as 1.0f - x
return x * (1.0f - x) / (x + 1e-6f);
}
float4 ComputeCircle(float2 uv)
{
float2 v = (uv - 0.5f) * 2.0f;
const float epsilon = 1e-3f;
const float epsCoef = pow(epsilon, 1.0f / _FlareFalloff);
float x = length(v);
float sdf = saturate((x - 1.0f) / ((_FlareEdgeOffset - 1.0f)));
#if defined(FLARE_INVERSE_SDF)
sdf = saturate(sdf);
sdf = InverseGradient(sdf);
#endif
return pow(sdf, _FlareFalloff);
}
// Modfied from ref: https://www.shadertoy.com/view/MtKcWW
// https://www.shadertoy.com/view/3tGBDt
float4 ComputePolygon(float2 uv_)
{
float2 p = uv_ * 2.0f - 1.0f;
float r = _FlareSDFPolyRadius;
float an = _FlareSDFPolyParam0;
float he = _FlareSDFPolyParam1;
float bn = an * floor((atan2(p.y, p.x) + 0.5f * an) / an);
float cos0 = cos(bn);
float sin0 = sin(bn);
p = float2(cos0 * p.x + sin0 * p.y,
-sin0 * p.x + cos0 * p.y);
// side of polygon
float sdf = length(p - float2(r, clamp(p.y, -he, he))) * sign(p.x - r) - _FlareSDFRoundness;
sdf *= _FlareEdgeOffset;
#if defined(FLARE_INVERSE_SDF)
sdf = saturate(-sdf);
sdf = InverseGradient(sdf);
#else
sdf = saturate(-sdf);
#endif
return saturate(pow(sdf, _FlareFalloff));
}
float4 GetFlareShape(float2 uv)
{
#ifdef FLARE_CIRCLE
return ComputeCircle(uv);
#elif defined(FLARE_POLYGON)
return ComputePolygon(uv);
#else
return SAMPLE_TEXTURE2D(_FlareTex, sampler_FlareTex, uv);
#endif
}
float4 frag(VaryingsLensFlare input) : SV_Target
{
#ifndef FLARE_PREVIEW
UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(input);
#endif
float4 col = GetFlareShape(input.texcoord);
#if defined(FLARE_HAS_OCCLUSION)
return col * _FlareColor * input.occlusion;
#else
return col * _FlareColor;
#endif
}

View File

@@ -0,0 +1,7 @@
fileFormatVersion: 2
guid: a279154a65cb87e46a9922aeaf9b7bc4
ShaderIncludeImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,8 @@
fileFormatVersion: 2
guid: badda61a05436ff46bc63d5e3182a197
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,7 @@
fileFormatVersion: 2
guid: e3711cae0eaec834eb4ae197be7dd569
ShaderIncludeImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,7 @@
fileFormatVersion: 2
guid: 2a9d148fab2695346824a766e925a31f
ShaderIncludeImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,7 @@
fileFormatVersion: 2
guid: 2a92c8c54bb0f63458be93b670c77c3b
ShaderIncludeImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,854 @@
//_____________________________________________________________/\_______________________________________________________________
//==============================================================================================================================
//
// [LPM] LUMA PRESERVING MAPPER 1.20190521
//
//==============================================================================================================================
// LICENSE
// =======
// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved.
// -------
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
// -------
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
// Software.
// -------
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//------------------------------------------------------------------------------------------------------------------------------
// ABOUT
// =====
// For questions and comments, feel free to contact the author directly: timothy.lottes@amd.com
// LPM is an evoluation of the concepts in "A Blend of GCN Optimisation and Colour Processing" from GDC 2019.
// This presentation can be found here: https://gpuopen.com/gdc-2019-presentation-links/
// Changes from the GDC 2019 presentation,
// (a.) Switched from using LUTs to doing the full mapping in ALU.
// This was made possible by changing the algorithm used to "walk back in gamut".
// There is no longer any need for "Fast" vs "Quality" modes.
// While the prior "Quality" mode had good precision for luma, chroma precision was relatively poor.
// Specifically having only 32 steps in the LUT wasn't enough to do a great job on the overexposure color shaping.
// The ALU version has great precision, while being much easier to integrate and tune.
// Likewise as most GPU workloads are not ALU-bound, this ALU version should be better for async integration.
// (b.) Moved to only one step of "walk back in gamut" done towards the end of the mapper (not necessary to do twice).
// (c.) Introduced a secondary optional matrix multiply conversion at the end (better for small to large gamut cases, etc).
//------------------------------------------------------------------------------------------------------------------------------
// FP16 PRECISION LIMITS
// =====================
// LPM supports a packed FP16 fast path.
// The lowest normal FP16 value is 2^-14 (or roughly 0.000061035).
// Converting this linear value to sRGB with input range {0 to 1} scaled {0 to 255} on output produces roughly 0.2.
// Maybe enough precision to convert for 8-bit, but not enough working precision for accumulating lighting.
// Converting 2^-14 to Gamma 2.2 with input range {0 to 1} scaled to {0 to 255} on output produces roughly 3.1.
// Not enough precision in normals values, depending on denormals for near zero precision.
//------------------------------------------------------------------------------------------------------------------------------
// TUNING THE CROSSTALK VALUE
// ==========================
// The crosstalk value shapes how color over-exposes.
// Suggest tuning the crosstalk value to get the look and feel desired by the content author.
// Here is the intuition on how to tune the crosstalk value passed into LpmSetup().
// Start with {1.0,1.0,1.0} -> this will fully desaturate on over-exposure (not good).
// Start reducing the blue {1.0,1.0,1.0/8.0} -> tint towards yellow on over-exposure (medium maintaining saturation).
// Reduce blue until the red->yellow->white transition is good {1.0,1.0,1.0/16.0} -> maintaining good saturation.
// It is possible to go extreme to {1.0,1.0,1.0/128.0} -> very high saturation on over-exposure (yellow tint).
// Then reduce red a little {1.0/2.0,1.0,1.0/32.0} -> to tint blues towards cyan on over-exposure.
// Or reduce green a little {1.0,1.0/2.0,1.0/32.0} -> to tint blues towards purple on over-exposure.
// If wanting a stronger blue->purple, drop both green and blue {1.0,1.0/4.0,1.0/128.0}.
// Note that crosstalk value should be tuned differently based on the color space.
// Suggest tuning crosstalk separately for Rec.2020 and Rec.709 primaries for example.
//------------------------------------------------------------------------------------------------------------------------------
// SETUP THE MAPPER
// ================
// This would typically be done on the CPU, but showing the GPU code here.
// ...
// // Setup the control block.
// AU4 map0,map1,map2,map3,map4,map5,map6,map7,map8,map9,mapA,mapB,
// AU4 mapC,mapD,mapE,mapF,mapG,mapH,mapI,mapJ,mapK,mapL,mapM,mapN;
// LpmSetup(
// map0,map1,map2,map3,map4,map5,map6,map7,map8,map9,mapA,mapB,
// mapC,mapD,mapE,mapF,mapG,mapH,mapI,mapJ,mapK,mapL,mapM,mapN,
// false,LPM_CONFIG_709_709,LPM_COLORS_709_709, // <-- Using the LPM_ prefabs to make inputs easier.
// 0.0, // softGap
// 256.0, // hdrMax
// 8.0, // exposure
// 0.25, // contrast
// 1.0, // shoulder contrast
// AF3_(0.0), // saturation
// AF3(1.0,1.0/2.0,1.0/32.0)); // crosstalk
// ...
// // Store the control block to ubo/ssbo buffer (this requires 0 through N to be stored).
// constants.map0=map0;
// constants.map1=map1;
// constants.map2=map2;
// ...
// constants.mapM=mapM;
// constants.mapN=mapN;
//------------------------------------------------------------------------------------------------------------------------------
// RUNNING THE MAPPER ON COLOR
// ===========================
// This part would be running per-pixel on the GPU.
// This is showing the no 'shoulder' adjustment fast path.
// ...
// #include "ffx_a.h"
// #include "ffx_lpm.h"
// ...
// // Fetch the control block (this requires 0 through N to be loaded, some of this will get dead-code removed).
// AU4 map0,map1,map2,map3,map4,map5,map6,map7,map8,map9,mapA,mapB,
// AU4 mapC,mapD,mapE,mapF,mapG,mapH,mapI,mapJ,mapK,mapL,mapM,mapN;
// map0=constants.map0;
// map1=constants.map1;
// map2=constants.map2;
// ...
// mapM=constants.mapM;
// mapN=constants.mapN;
// ...
// // Run the tone/gamut-mapper.
// // The 'c.rgb' is the color to map, using the no-shoulder tuning option.
// LpmFilter(c.r,c.g,c.b,false,LPM_CONFIG_709_709, // <-- Using the LPM_CONFIG_ prefab to make inputs easier.
// map0,map1,map2,map3,map4,map5,map6,map7,map8,map9,mapA,mapB,
// mapC,mapD,mapE,mapF,mapG,mapH,mapI,mapJ,mapK,mapL,mapM,mapN);
// ...
// // Do the final linear to non-linear transform.
// c.r=AToSrgbF1(c.r);
// c.g=AToSrgbF1(c.g);
// c.b=AToSrgbF1(c.b);
//------------------------------------------------------------------------------------------------------------------------------
// CHANGE LOG
// ==========
// 20190521 - Updated file naming.
// 20190425 - Initial GPU-only ALU-only prototype code drop (working 32-bit and 16-bit paths).
//==============================================================================================================================
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//_____________________________________________________________/\_______________________________________________________________
//==============================================================================================================================
//
// CPU
//
//==============================================================================================================================
#ifdef A_CPU
// TODO!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//_____________________________________________________________/\_______________________________________________________________
//==============================================================================================================================
//
// GPU
//
//==============================================================================================================================
#ifdef A_GPU
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//_____________________________________________________________/\_______________________________________________________________
//==============================================================================================================================
// HELPER CODE
//------------------------------------------------------------------------------------------------------------------------------
// Used by LpmSetup() to build constants for the GPU setup path.
//------------------------------------------------------------------------------------------------------------------------------
// Color math references,
// - http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html
// - https://en.wikipedia.org/wiki/SRGB#The_sRGB_transfer_function_.28.22gamma.22.29
// - http://www.ryanjuckett.com/programming/rgb-color-space-conversion/
//==============================================================================================================================
// Low-precision solution.
void LpmMatInv3x3(out AF3 ox,out AF3 oy,out AF3 oz,in AF3 ix,in AF3 iy,in AF3 iz){
AF1 i=1.0/(ix.x*(iy.y*iz.z-iz.y*iy.z)-ix.y*(iy.x*iz.z-iy.z*iz.x)+ix.z*(iy.x*iz.y-iy.y*iz.x));
ox=AF3((iy.y*iz.z-iz.y*iy.z)*i,(ix.z*iz.y-ix.y*iz.z)*i,(ix.y*iy.z-ix.z*iy.y)*i);
oy=AF3((iy.z*iz.x-iy.x*iz.z)*i,(ix.x*iz.z-ix.z*iz.x)*i,(iy.x*ix.z-ix.x*iy.z)*i);
oz=AF3((iy.x*iz.y-iz.x*iy.y)*i,(iz.x*ix.y-ix.x*iz.y)*i,(ix.x*iy.y-iy.x*ix.y)*i);}
//------------------------------------------------------------------------------------------------------------------------------
// Transpose.
void LpmMatTrn3x3(out AF3 ox,out AF3 oy,out AF3 oz,in AF3 ix,in AF3 iy,in AF3 iz){
ox=AF3(ix.x,iy.x,iz.x);oy=AF3(ix.y,iy.y,iz.y);oz=AF3(ix.z,iy.z,iz.z);}
//------------------------------------------------------------------------------------------------------------------------------
void LpmMatMul3x3(out AF3 ox,out AF3 oy,out AF3 oz,in AF3 ax,in AF3 ay,in AF3 az,in AF3 bx,in AF3 by,in AF3 bz){
AF3 bx2,by2,bz2;LpmMatTrn3x3(bx2,by2,bz2,bx,by,bz);
ox=AF3(dot(ax,bx2),dot(ax,by2),dot(ax,bz2));
oy=AF3(dot(ay,bx2),dot(ay,by2),dot(ay,bz2));
oz=AF3(dot(az,bx2),dot(az,by2),dot(az,bz2));}
//------------------------------------------------------------------------------------------------------------------------------
// D65 xy coordinates.
AF2 lpmColD65=AF2(0.3127,0.3290);
//------------------------------------------------------------------------------------------------------------------------------
// Rec709 xy coordinates, (D65 white point).
AF2 lpmCol709R=AF2(0.64,0.33);
AF2 lpmCol709G=AF2(0.30,0.60);
AF2 lpmCol709B=AF2(0.15,0.06);
//------------------------------------------------------------------------------------------------------------------------------
// DCI-P3 xy coordinates, (D65 white point).
AF2 lpmColP3R=AF2(0.680,0.320);
AF2 lpmColP3G=AF2(0.265,0.690);
AF2 lpmColP3B=AF2(0.150,0.060);
//------------------------------------------------------------------------------------------------------------------------------
// Rec2020 xy coordinates, (D65 white point).
AF2 lpmCol2020R=AF2(0.708,0.292);
AF2 lpmCol2020G=AF2(0.170,0.797);
AF2 lpmCol2020B=AF2(0.131,0.046);
//------------------------------------------------------------------------------------------------------------------------------
// Computes z from xy, returns xyz.
AF3 LpmColXyToZ(AF2 a){return AF3(a.x,a.y,1.0-(a.x+a.y));}
//------------------------------------------------------------------------------------------------------------------------------
// Returns conversion matrix, rgbw inputs are xy chroma coordinates.
void LpmColRgbToXyz(out AF3 ox,out AF3 oy,out AF3 oz,AF2 r,AF2 g,AF2 b,AF2 w){
// Expand from xy to xyz.
AF3 r3,g3,b3;LpmMatTrn3x3(r3,g3,b3,LpmColXyToZ(r),LpmColXyToZ(g),LpmColXyToZ(b));
// Convert white xyz to XYZ.
AF3 w3=LpmColXyToZ(w)*(1.0/w.y);
// Compute xyz to XYZ scalars for primaries.
AF3 rv,gv,bv;LpmMatInv3x3(rv,gv,bv,r3,g3,b3);
AF3 s=AF3(dot(rv,w3),dot(gv,w3),dot(bv,w3));
// Scale.
ox=r3*s;oy=g3*s;oz=b3*s;}
//==============================================================================================================================
// Visualize difference between two values, by bits of precision.
// This is useful when doing approximation to reference comparisons.
AP1 LpmD(AF1 a,AF1 b){return abs(a-b)<1.0;}
//------------------------------------------------------------------------------------------------------------------------------
AF1 LpmC(AF1 a,AF1 b){AF1 c=1.0; // 6-bits or less (the color)
if(LpmD(a* 127.0,b* 127.0))c=0.875; // 7-bits
if(LpmD(a* 255.0,b* 255.0))c=0.5; // 8-bits
if(LpmD(a* 512.0,b* 512.0))c=0.125; // 9-bits
if(LpmD(a*1024.0,b*1024.0))c=0.0; // 10-bits or better (black)
return c;}
//------------------------------------------------------------------------------------------------------------------------------
AF3 LpmViewDiff(AF3 a,AF3 b){return AF3(LpmC(a.r,b.r),LpmC(a.g,b.g),LpmC(a.b,b.b));}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//_____________________________________________________________/\_______________________________________________________________
//==============================================================================================================================
// HDR10 RANGE LIMITING SCALAR
//------------------------------------------------------------------------------------------------------------------------------
// As of 2019, HDR10 supporting TVs typically have PQ tonal curves with near clipping long before getting to the peak 10K nits.
// Unfortunately this clipping point changes per TV (requires some amount of user calibration).
// Some examples,
// https://youtu.be/M7OsbpU4oCQ?t=875
// https://youtu.be/8mlTElC2z2A?t=1159
// https://youtu.be/B5V5hCVXBAI?t=975
// For this reason it can be useful to manually limit peak HDR10 output to some point before the clipping point.
// The following functions are useful to compute the scaling factor 'hdr10S' to use with LpmSetup() to manually limit peak.
//==============================================================================================================================
// Compute 'hdr10S' for raw HDR10 output, pass in peak nits (typically somewhere around 1000.0 to 2000.0).
AF1 LpmHdr10RawScalar(AF1 peakNits){return peakNits*(1.0/10000.0);}
//------------------------------------------------------------------------------------------------------------------------------
// Compute 'hdr10S' for scRGB based HDR10 output, pass in peak nits (typically somewhere around 1000.0 to 2000.0).
AF1 LpmHdr10ScrgbScalar(AF1 peakNits){return peakNits*(1.0/10000.0)*(10000.0/80.0);}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//_____________________________________________________________/\_______________________________________________________________
//==============================================================================================================================
// FREESYNC2 SCRGB SCALAR
//------------------------------------------------------------------------------------------------------------------------------
// The more expensive scRGB mode for FreeSync2 requires a complex scale factor based on display properties.
//==============================================================================================================================
// TODO: Validate this is correct!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// This computes the 'fs2S' factor used in LpmSetup().
AF1 LpmFs2ScrgbScalar(
bool localDimming, // Is local dimming on?
AF1 minLuma,AF1 medLuma){ // Queried display properties.
if(localDimming)return 0.0; // TODO!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
return ((medLuma-minLuma)+minLuma)*(1.0/80.0);}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//_____________________________________________________________/\_______________________________________________________________
//==============================================================================================================================
// CONFIGURATION PREFABS
//------------------------------------------------------------------------------------------------------------------------------
// Use these to simplify some of the input(s) to the LpmSetup() and LpmFilter() functions.
// The 'LPM_CONFIG_<destination>_<source>' defines are used for the path control.
// The 'LPM_COLORS_<destination>_<source>' defines are used for the gamut control.
// This contains expected common configurations, anything else will need to be made by the user.
//------------------------------------------------------------------------------------------------------------------------------
// WORKING COLOR SPACE
// ===================
// 2020 ......... Rec.2020
// 709 .......... Rec.709
// P3 ........... DCI-P3 with D65 white-point
// --------------
// OUTPUT COLOR SPACE
// ==================
// FS2RAW ....... Faster 32-bit/pixel FreeSync2 raw gamma 2.2 output (native display primaries)
// FS2SCRGB ..... Slower 64-bit/pixel FreeSync2 via the scRGB option (Rec.709 primaries with possible negative color)
// HDR10RAW ..... Faster 32-bit/pixel HDR10 raw (10:10:10:2 PQ output with Rec.2020 primaries)
// HDR10SCRGB ... Slower 64-bit/pixel scRGB (linear FP16, Rec.709 primaries with possible negative color)
// 709 .......... Rec.709, sRGB, Gamma 2.2, or traditional displays with Rec.709-like primaries
//------------------------------------------------------------------------------------------------------------------------------
// FREESYNC2 VARIABLES
// ===================
// fs2R ..... Queried xy coordinates for display red
// fs2G ..... Queried xy coordinates for display green
// fs2B ..... Queried xy coordinates for display blue
// fs2W ..... Queried xy coordinates for display white point
// fs2S ..... Computed by LpmFs2ScrgbScalar()
//------------------------------------------------------------------------------------------------------------------------------
// HDR10 VARIABLES
// ===============
// hdr10S ... Use LpmHdr10<Raw|Scrgb>Scalar() to compute this value
//==============================================================================================================================
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_FS2RAW_709 false,false,true, true, false
#define LPM_COLORS_FS2RAW_709 lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,\
fs2R,fs2G,fs2B,fs2W,\
fs2R,fs2G,fs2B,fs2W,1.0
//------------------------------------------------------------------------------------------------------------------------------
// FreeSync2 min-spec is larger than sRGB, so using 709 primaries all the way through as an optimization.
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_FS2SCRGB_709 false,false,false,false,true
#define LPM_COLORS_FS2SCRGB_709 lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,fs2S
//------------------------------------------------------------------------------------------------------------------------------
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_HDR10RAW_709 false,false,true, true, false
#define LPM_COLORS_HDR10RAW_709 lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,\
lpmCol2020R,lpmCol2020G,lpmCol2020B,lpmColD65,hdr10S
//------------------------------------------------------------------------------------------------------------------------------
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_HDR10SCRGB_709 false,false,false,false,true
#define LPM_COLORS_HDR10SCRGB_709 lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,hdr10S
//------------------------------------------------------------------------------------------------------------------------------
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_709_709 false,false,false,false,false
#define LPM_COLORS_709_709 lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,1.0
//==============================================================================================================================
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_FS2RAW_P3 true, true, false,false,false
#define LPM_COLORS_FS2RAW_P3 lpmColP3R,lpmColP3G,lpmColP3B,lpmColD65,\
fs2R,fs2G,fs2B,fs2W,\
fs2R,fs2G,fs2B,fs2W,1.0
//------------------------------------------------------------------------------------------------------------------------------
// FreeSync2 gamut can be smaller than P3.
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_FS2SCRGB_P3 true, true, true, false,false
#define LPM_COLORS_FS2SCRGB_P3 lpmColP3R,lpmColP3G,lpmColP3B,lpmColD65,\
fs2R,fs2G,fs2B,fs2W,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,fs2S
//------------------------------------------------------------------------------------------------------------------------------
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_HDR10RAW_P3 false,false,true, true, false
#define LPM_COLORS_HDR10RAW_P3 lpmColP3R,lpmColP3G,lpmColP3B,lpmColD65,\
lpmColP3R,lpmColP3G,lpmColP3B,lpmColD65,\
lpmCol2020R,lpmCol2020G,lpmCol2020B,lpmColD65,hdr10S
//------------------------------------------------------------------------------------------------------------------------------
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_HDR10SCRGB_P3 false,false,true, false,false
#define LPM_COLORS_HDR10SCRGB_P3 lpmColP3R,lpmColP3G,lpmColP3B,lpmColD65,\
lpmCol2020R,lpmCol2020G,lpmCol2020B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,hdr10S
//------------------------------------------------------------------------------------------------------------------------------
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_709_P3 true, true, false,false,false
#define LPM_COLORS_709_P3 lpmColP3R,lpmColP3G,lpmColP3B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,1.0
//==============================================================================================================================
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_FS2RAW_2020 true, true, false,false,false
#define LPM_COLORS_FS2RAW_2020 lpmCol2020R,lpmCol2020G,lpmCol2020B,lpmColD65,\
fs2R,fs2G,fs2B,fs2W,\
fs2R,fs2G,fs2B,fs2W,1.0
//------------------------------------------------------------------------------------------------------------------------------
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_FS2SCRGB_2020 true, true, true, false,false
#define LPM_COLORS_FS2SCRGB_2020 lpmCol2020R,lpmCol2020G,lpmCol2020B,lpmColD65,\
fs2R,fs2G,fs2B,fs2W,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,fs2S
//------------------------------------------------------------------------------------------------------------------------------
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_HDR10RAW_2020 false,false,false,false,false
#define LPM_COLORS_HDR10RAW_2020 lpmCol2020R,lpmCol2020G,lpmCol2020B,lpmColD65,\
lpmCol2020R,lpmCol2020G,lpmCol2020B,lpmColD65,\
lpmCol2020R,lpmCol2020G,lpmCol2020B,lpmColD65,hdr10S
//------------------------------------------------------------------------------------------------------------------------------
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_HDR10SCRGB_2020 false,false,true, false,false
#define LPM_COLORS_HDR10SCRGB_2020 lpmCol2020R,lpmCol2020G,lpmCol2020B,lpmColD65,\
lpmCol2020R,lpmCol2020G,lpmCol2020B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,hdr10S
//------------------------------------------------------------------------------------------------------------------------------
// CON SOFT CON2 CLIP SCALEONLY
#define LPM_CONFIG_709_2020 true, true, false,false,false
#define LPM_COLORS_709_2020 lpmCol2020R,lpmCol2020G,lpmCol2020B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,\
lpmCol709R,lpmCol709G,lpmCol709B,lpmColD65,1.0
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//_____________________________________________________________/\_______________________________________________________________
//==============================================================================================================================
// SETUP CONTROL BLOCK
//------------------------------------------------------------------------------------------------------------------------------
// This is used to control LpmFilter*() functions.
//------------------------------------------------------------------------------------------------------------------------------
// CONTROL BLOCK
// =============
// LPM has an optimized constant|literal control block of 384 bytes.
// This control block should be 128-byte aligned (future-proof in case constant cache lines end up at 128-bytes/line).
// Much of this block is reserved for future usage, and to establish good alignment.
// Compile will dead-code remove things not used (no extra overhead).
// Content ordered and grouped for best performance in the common cases.
// Control block has both 32-bit and 16-bit values so that optimizations are possible on platforms supporting faster 16-bit.
//------------------------------------------------------------------------------------------------------------------------------
// 32-BIT PART
// _______R________ _______G________ _______B________ _______A________
// map0 saturation.r saturation.g saturation.b contrast
// map1 toneScaleBias.x toneScaleBias.y lumaT.r lumaT.g
// map2 lumaT.b crosstalk.r crosstalk.g crosstalk.b
// map3 rcpLumaT.r rcpLumaT.g rcpLumaT.b con2R.r
// --
// map4 con2R.g con2R.b con2G.r con2G.g
// map5 con2G.b con2B.r con2B.g con2B.b
// map6 shoulderContrast lumaW.r lumaW.g lumaW.b
// map7 softGap.x softGap.y conR.r conR.g
// --
// map8 conR.b conG.r conG.g conG.b
// map9 conB.r conB.g conB.b (reserved)
// mapA (reserved) (reserved) (reserved) (reserved)
// mapB (reserved) (reserved) (reserved) (reserved)
// --
// mapC (reserved) (reserved) (reserved) (reserved)
// mapD (reserved) (reserved) (reserved) (reserved)
// mapE (reserved) (reserved) (reserved) (reserved)
// mapF (reserved) (reserved) (reserved) (reserved)
// --
// PACKED 16-BIT PART
// _______X________ _______Y________ _______X________ _______Y________
// mapG.rg saturation.r saturation.g saturation.b contrast
// mapG.ba toneScaleBias.x toneScaleBias.y lumaT.r lumaT.g
// mapH.rg lumaT.b crosstalk.r crosstalk.g crosstalk.b
// mapH.ba rcpLumaT.r rcpLumaT.g rcpLumaT.b con2R.r
// mapI.rg con2R.g con2R.b con2G.r con2G.g
// mapI.ba con2G.b con2B.r con2B.g con2B.b
// mapJ.rg shoulderContrast lumaW.r lumaW.g lumaW.b
// mapJ.ba softGap.x softGap.y conR.r conR.g
// --
// mapK.rg conR.b conG.r conG.g conG.b
// mapK.ba conB.r conB.g conB.b (reserved)
// mapL.rb (reserved) (reserved) (reserved) (reserved)
// mapL.ba (reserved) (reserved) (reserved) (reserved)
// mapM.rb (reserved) (reserved) (reserved) (reserved)
// mapM.ba (reserved) (reserved) (reserved) (reserved)
// mapN.rb (reserved) (reserved) (reserved) (reserved)
// mapN.ba (reserved) (reserved) (reserved) (reserved)
//------------------------------------------------------------------------------------------------------------------------------
// IDEAS
// =====
// - Some of this might benefit from double precision on the GPU.
// - Can scaling factor in con2 be used to improve FP16 precision?
// - Verify lumaW stuff!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//==============================================================================================================================
void LpmSetup(
// Control block output.
out AU4 map0,out AU4 map1,out AU4 map2,out AU4 map3,out AU4 map4,out AU4 map5,out AU4 map6,out AU4 map7,
out AU4 map8,out AU4 map9,out AU4 mapA,out AU4 mapB,out AU4 mapC,out AU4 mapD,out AU4 mapE,out AU4 mapF,
out AU4 mapG,out AU4 mapH,out AU4 mapI,out AU4 mapJ,out AU4 mapK,out AU4 mapL,out AU4 mapM,out AU4 mapN,
// Path control.
AP1 shoulder, // Use optional extra shoulderContrast tuning (set to false if shoulderContrast is 1.0).
// Prefab start, "LPM_CONFIG_".
AP1 con, // Use first RGB conversion matrix, if 'soft' then 'con' must be true also.
AP1 soft, // Use soft gamut mapping.
AP1 con2, // Use last RGB conversion matrix.
AP1 clip, // Use clipping in last conversion matrix.
AP1 scaleOnly, // Scale only for last conversion matrix (used for 709 HDR to scRGB).
// Gamut control, "LPM_COLORS_".
AF2 xyRedW,AF2 xyGreenW,AF2 xyBlueW,AF2 xyWhiteW, // Chroma coordinates for working color space.
AF2 xyRedO,AF2 xyGreenO,AF2 xyBlueO,AF2 xyWhiteO, // For the output color space.
AF2 xyRedC,AF2 xyGreenC,AF2 xyBlueC,AF2 xyWhiteC,AF1 scaleC, // For the output container color space (if con2).
// Prefab end.
AF1 softGap, // Range of 0 to a little over zero, controls how much feather region in out-of-gamut mapping, 0=clip.
// Tonemapping control.
AF1 hdrMax, // Maximum input value.
AF1 exposure, // Number of stops between 'hdrMax' and 18% mid-level on input.
AF1 contrast, // Input range {0.0 (no extra contrast) to 1.0 (maximum contrast)}.
AF1 shoulderContrast, // Shoulder shaping, 1.0 = no change (fast path).
AF3 saturation, // A per channel adjustment, use <0 decrease, 0=no change, >0 increase.
AF3 crosstalk){ // One channel must be 1.0, the rest can be <= 1.0 but not zero.
//-----------------------------------------------------------------------------------------------------------------------------
// Contrast needs to be 1.0 based for no contrast.
contrast+=1.0;
// Saturation is based on contrast.
saturation+=contrast;
//-----------------------------------------------------------------------------------------------------------------------------
AF1 midIn=hdrMax*0.18/exp2(exposure);
AF1 midOut=0.18;
//-----------------------------------------------------------------------------------------------------------------------------
AF2 toneScaleBias;
AF1 cs=contrast*shoulderContrast;
AF1 z0=-pow(midIn,contrast);
AF1 z1=pow(hdrMax,cs)*pow(midIn,contrast);
AF1 z2=pow(hdrMax,contrast)*pow(midIn,cs)*midOut;
AF1 z3=pow(hdrMax,cs)*midOut;
AF1 z4=pow(midIn,cs)*midOut;
toneScaleBias.x=-((z0+(midOut*(z1-z2))/(z3-z4))/z4);
//-----------------------------------------------------------------------------------------------------------------------------
AF1 w0=pow(hdrMax,cs)*pow(midIn,contrast);
AF1 w1=pow(hdrMax,contrast)*pow(midIn,cs)*midOut;
AF1 w2=pow(hdrMax,cs)*midOut;
AF1 w3=pow(midIn,cs)*midOut;
toneScaleBias.y=(w0-w1)/(w2-w3);
//-----------------------------------------------------------------------------------------------------------------------------
AF3 lumaW;AF3 rgbToXyzXW;AF3 rgbToXyzYW;AF3 rgbToXyzZW;
LpmColRgbToXyz(rgbToXyzXW,rgbToXyzYW,rgbToXyzZW,xyRedW,xyGreenW,xyBlueW,xyWhiteW);
// Use the Y vector of the matrix for the associated luma coef.
// For safety, make sure the vector sums to 1.0.
lumaW=rgbToXyzYW;
lumaW*=ARcpF1(lumaW.r+lumaW.g+lumaW.b);
//-----------------------------------------------------------------------------------------------------------------------------
// The 'lumaT' for crosstalk mapping is always based on the output color space, unless soft conversion is not used.
AF3 lumaT;AF3 rgbToXyzXO;AF3 rgbToXyzYO;AF3 rgbToXyzZO;
LpmColRgbToXyz(rgbToXyzXO,rgbToXyzYO,rgbToXyzZO,xyRedO,xyGreenO,xyBlueO,xyWhiteO);
if(soft)lumaT=rgbToXyzYO;else lumaT=rgbToXyzYW;
lumaT*=ARcpF1(lumaT.r+lumaT.g+lumaT.b);
AF3 rcpLumaT=ARcpF3(lumaT);
//-----------------------------------------------------------------------------------------------------------------------------
AF2 softGap2;
if(soft)softGap2=AF2(softGap,(1.0-softGap)/(softGap*0.693147180559));
#ifdef A_HLSL
else softGap2=AF2_(0.0);
#endif
//-----------------------------------------------------------------------------------------------------------------------------
// First conversion is always working to output.
AF3 conR,conG,conB;
if(con){AF3 xyzToRgbRO;AF3 xyzToRgbGO;AF3 xyzToRgbBO;
LpmMatInv3x3(xyzToRgbRO,xyzToRgbGO,xyzToRgbBO,rgbToXyzXO,rgbToXyzYO,rgbToXyzZO);
LpmMatMul3x3(conR,conG,conB,xyzToRgbRO,xyzToRgbGO,xyzToRgbBO,rgbToXyzXW,rgbToXyzYW,rgbToXyzZW);}
#ifdef A_HLSL
else{conR=conG=conB=AF3_(0.0);}
#endif
//-----------------------------------------------------------------------------------------------------------------------------
// The last conversion is always output to container.
AF3 con2R,con2G,con2B;
if(con2){AF3 rgbToXyzXC;AF3 rgbToXyzYC;AF3 rgbToXyzZC;
LpmColRgbToXyz(rgbToXyzXC,rgbToXyzYC,rgbToXyzZC,xyRedC,xyGreenC,xyBlueC,xyWhiteC);
AF3 xyzToRgbRC;AF3 xyzToRgbGC;AF3 xyzToRgbBC;
LpmMatInv3x3(xyzToRgbRC,xyzToRgbGC,xyzToRgbBC,rgbToXyzXC,rgbToXyzYC,rgbToXyzZC);
LpmMatMul3x3(con2R,con2G,con2B,xyzToRgbRC,xyzToRgbGC,xyzToRgbBC,rgbToXyzXO,rgbToXyzYO,rgbToXyzZO);
con2R*=scaleC;con2G*=scaleC;con2B*=scaleC;}
#ifdef A_HLSL
else{con2R=con2G=con2B=AF3_(0.0);}
#endif
if(scaleOnly)con2R.r=scaleC;
//-----------------------------------------------------------------------------------------------------------------------------
// Debug force 16-bit precision for the 32-bit inputs.
#ifdef LPM_DEBUG_FORCE_16BIT_PRECISION
saturation=AF3(AH3(saturation));
contrast=AF1(AH1(contrast));
toneScaleBias=AF2(AH2(toneScaleBias));
lumaT=AF3(AH3(lumaT));
crosstalk=AF3(AH3(crosstalk));
rcpLumaT=AF3(AH3(rcpLumaT));
con2R=AF3(AH3(con2R));
con2G=AF3(AH3(con2G));
con2B=AF3(AH3(con2B));
shoulderContrast=AF1(AH1(shoulderContrast));
lumaW=AF3(AH3(lumaW));
softGap2=AF2(AH2(softGap2));
conR=AF3(AH3(conR));
conG=AF3(AH3(conG));
conB=AF3(AH3(conB));
#endif
//-----------------------------------------------------------------------------------------------------------------------------
// Pack into control block.
map0.rgb=AU3_AF3(saturation);map0.a=AU1_AF1(contrast);
map1.rg=AU2_AF2(toneScaleBias);map1.ba=AU2_AF2(lumaT.rg);
map2.r=AU1_AF1(lumaT.b);map2.gba=AU3_AF3(crosstalk);
map3.rgb=AU3_AF3(rcpLumaT);map3.a=AU1_AF1(con2R.r);
map4.rg=AU2_AF2(con2R.gb);map4.ba=AU2_AF2(con2G.rg);
map5.r=AU1_AF1(con2R.b);map5.gba=AU3_AF3(con2B);
map6.r=AU1_AF1(shoulderContrast);map6.gba=AU3_AF3(lumaW);
map7.rg=AU2_AF2(softGap2);map7.ba=AU2_AF2(conR.rg);
map8.r=AU1_AF1(conR.b);map8.gba=AU3_AF3(conG);
map9.rgb=AU3_AF3(conB);
#ifdef A_HLSL
map9.a=0.0;mapA=mapB=mapC=mapD=mapE=mapF=AU4_(0);
#endif
//-----------------------------------------------------------------------------------------------------------------------------
#ifdef A_HALF
// Packed 16-bit part of control block.
mapG.rg=AU2_AH4(AH4(AH1(saturation.r),AH1(saturation.g),AH1(saturation.b),AH1(contrast)));
mapG.ba=AU2_AH4(AH4(AH1(toneScaleBias.x),AH1(toneScaleBias.y),AH1(lumaT.r),AH1(lumaT.g)));
mapH.rg=AU2_AH4(AH4(AH1(lumaT.b),AH1(crosstalk.r),AH1(crosstalk.g),AH1(crosstalk.b)));
mapH.ba=AU2_AH4(AH4(AH1(rcpLumaT.r),AH1(rcpLumaT.g),AH1(rcpLumaT.b),AH1(con2R.r)));
mapI.rg=AU2_AH4(AH4(AH1(con2R.g),AH1(con2R.b),AH1(con2G.r),AH1(con2G.g)));
mapI.ba=AU2_AH4(AH4(AH1(con2G.b),AH1(con2B.r),AH1(con2B.g),AH1(con2B.b)));
mapJ.rg=AU2_AH4(AH4(AH1(shoulderContrast),AH1(lumaW.r),AH1(lumaW.g),AH1(lumaW.b)));
mapJ.ba=AU2_AH4(AH4(AH1(softGap2.x),AH1(softGap2.y),AH1(conR.r),AH1(conR.g)));
mapK.rg=AU2_AH4(AH4(AH1(conR.b),AH1(conG.r),AH1(conG.g),AH1(conG.b)));
mapK.ba=AU2_AH4(AH4(AH1(conB.r),AH1(conB.g),AH1(conB.b),AH1(0.0)));
#ifdef A_HLSL
mapL=mapM=mapN=AU4_(0);
#endif
#endif
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//_____________________________________________________________/\_______________________________________________________________
//==============================================================================================================================
// MAPPER
//------------------------------------------------------------------------------------------------------------------------------
// Do not call this directly, instead call the LpmFilter*() functions.
// This gets reconfigured based on inputs for all the various usage cases.
// Some of this has been explicitly ordered to increase precision.
//------------------------------------------------------------------------------------------------------------------------------
// IDEAS
// =====
// - Use med3() for soft falloff and for [A] color conversions.
// - Retry FP16 PQ conversion with different input range.
// - Possibly skip some work if entire wave is in gamut.
//==============================================================================================================================
// Use LpmFilter() instead of this.
void LpmMap(inout AF1 colorR,inout AF1 colorG,inout AF1 colorB,// Input and output color.
AF3 lumaW, // Luma coef for RGB working space.
AF3 lumaT, // Luma coef for crosstalk mapping (can be working or output color-space depending on usage case).
AF3 rcpLumaT, // 1/lumaT.
AF3 saturation, // Saturation powers.
AF1 contrast, // Contrast power.
AP1 shoulder, // Using shoulder tuning (should be a compile-time immediate).
AF1 shoulderContrast, // Shoulder power.
AF2 toneScaleBias, // Other tonemapping parameters.
AF3 crosstalk, // Crosstalk scaling for over-exposure color shaping.
AP1 con, // Use first RGB conversion matrix (should be a compile-time immediate), if 'soft' then 'con' must be true also.
AF3 conR,AF3 conG,AF3 conB, // RGB conversion matrix (working to output space conversion).
AP1 soft, // Use soft gamut mapping (should be a compile-time immediate).
AF2 softGap, // {x,(1-x)/(x*0.693147180559)}, where 'x' is gamut mapping soft fall-off amount.
AP1 con2, // Use last RGB conversion matrix (should be a compile-time immediate).
AP1 clip, // Use clipping on last conversion matrix.
AP1 scaleOnly, // Do scaling only (special case for 709 HDR to scRGB).
AF3 con2R,AF3 con2G,AF3 con2B){ // Secondary RGB conversion matrix.
//------------------------------------------------------------------------------------------------------------------------------
// Grab original RGB ratio (RCP, 3x MUL, MAX3).
AF1 rcpMax=ARcpF1(AMax3F1(colorR,colorG,colorB));AF1 ratioR=colorR*rcpMax;AF1 ratioG=colorG*rcpMax;AF1 ratioB=colorB*rcpMax;
// Apply saturation, ratio must be max 1.0 for this to work right (3x EXP2, 3x LOG2, 3x MUL).
ratioR=pow(ratioR,AF1_(saturation.r));ratioG=pow(ratioG,AF1_(saturation.g));ratioB=pow(ratioB,AF1_(saturation.b));
//------------------------------------------------------------------------------------------------------------------------------
// Tonemap luma, note this uses the original color, so saturation is luma preserving.
// If not using 'con' this uses the output space luma directly to avoid needing extra constants.
// Note 'soft' should be a compile-time immediate (so no branch) (3x MAD).
AF1 luma;if(soft)luma=colorG*AF1_(lumaW.g)+(colorR*AF1_(lumaW.r)+(colorB*AF1_(lumaW.b)));
else luma=colorG*AF1_(lumaT.g)+(colorR*AF1_(lumaT.r)+(colorB*AF1_(lumaT.b)));
luma=pow(luma,AF1_(contrast)); // (EXP2, LOG2, MUL).
AF1 lumaShoulder=shoulder?pow(luma,AF1_(shoulderContrast)):luma; // Optional (EXP2, LOG2, MUL).
luma=luma*ARcpF1(lumaShoulder*AF1_(toneScaleBias.x)+AF1_(toneScaleBias.y)); // (MAD, MUL, RCP).
//------------------------------------------------------------------------------------------------------------------------------
// If running soft clipping (this should be a compile-time immediate so branch will not exist).
if(soft){
// The 'con' should be a compile-time immediate so branch will not exist.
// Use of 'con' is implied if soft-falloff is enabled, but using the check here to make finding bugs easy.
if(con){
// Converting ratio instead of color. Change of primaries (9x MAD).
colorR=ratioR;colorG=ratioG;colorB=ratioB;
ratioR=colorR*AF1_(conR.r)+(colorG*AF1_(conR.g)+(colorB*AF1_(conR.b)));
ratioG=colorG*AF1_(conG.g)+(colorR*AF1_(conG.r)+(colorB*AF1_(conG.b)));
ratioB=colorB*AF1_(conB.b)+(colorG*AF1_(conB.g)+(colorR*AF1_(conB.r)));
// Convert ratio to max 1 again (RCP, 3x MUL, MAX3).
rcpMax=ARcpF1(AMax3F1(ratioR,ratioG,ratioB));ratioR*=rcpMax;ratioG*=rcpMax;ratioB*=rcpMax;}
//------------------------------------------------------------------------------------------------------------------------------
// Absolute gamut mapping converted to soft falloff (maintains max 1 property).
// g = gap {0 to g} used for {-inf to 0} input range
// {g to 1} used for {0 to 1} input range
// x >= 0 := y = x * (1-g) + g
// x < 0 := g * 2^(x*h)
// Where h=(1-g)/(g*log(2)) --- where log() is the natural log
// The {g,h} above is passed in as softGap.
// Soft falloff (3x MIN, 3x MAX, 9x MAD, 3x EXP2).
ratioR=min(max(AF1_(softGap.x),ASatF1(ratioR*AF1_(-softGap.x)+ratioR)),
ASatF1(AF1_(softGap.x)*exp2(ratioR*AF1_(softGap.y))));
ratioG=min(max(AF1_(softGap.x),ASatF1(ratioG*AF1_(-softGap.x)+ratioG)),
ASatF1(AF1_(softGap.x)*exp2(ratioG*AF1_(softGap.y))));
ratioB=min(max(AF1_(softGap.x),ASatF1(ratioB*AF1_(-softGap.x)+ratioB)),
ASatF1(AF1_(softGap.x)*exp2(ratioB*AF1_(softGap.y))));}
//------------------------------------------------------------------------------------------------------------------------------
// Compute ratio scaler required to hit target luma (4x MAD, 1 RCP).
AF1 lumaRatio=ratioR*AF1_(lumaT.r)+ratioG*AF1_(lumaT.g)+ratioB*AF1_(lumaT.b);
// This is limited to not clip.
AF1 ratioScale=ASatF1(luma*ARcpF1(lumaRatio));
// Assume in gamut, compute output color (3x MAD).
colorR=ASatF1(ratioR*ratioScale);colorG=ASatF1(ratioG*ratioScale);colorB=ASatF1(ratioB*ratioScale);
// Capability per channel to increase value (3x MAD).
// This factors in crosstalk factor to avoid multiplies later.
// '(1.0-ratio)*crosstalk' optimized to '-crosstalk*ratio+crosstalk'
AF1 capR=AF1_(-crosstalk.r)*colorR+AF1_(crosstalk.r);
AF1 capG=AF1_(-crosstalk.g)*colorG+AF1_(crosstalk.g);
AF1 capB=AF1_(-crosstalk.b)*colorB+AF1_(crosstalk.b);
// Compute amount of luma needed to add to non-clipped channels to make up for clipping (3x MAD).
AF1 lumaAdd=ASatF1((-colorB)*AF1_(lumaT.b)+((-colorR)*AF1_(lumaT.r)+((-colorG)*AF1_(lumaT.g)+luma)));
// Amount to increase keeping over-exposure ratios constant and possibly exceeding clipping point (4x MAD, 1 RCP).
AF1 t=lumaAdd*ARcpF1(capG*AF1_(lumaT.g)+(capR*AF1_(lumaT.r)+(capB*AF1_(lumaT.b))));
// Add amounts to base color but clip (3x MAD).
colorR=ASatF1(t*capR+colorR);colorG=ASatF1(t*capG+colorG);colorB=ASatF1(t*capB+colorB);
// Compute amount of luma needed to add to non-clipped channel to make up for clipping (3x MAD).
lumaAdd=ASatF1((-colorB)*AF1_(lumaT.b)+((-colorR)*AF1_(lumaT.r)+((-colorG)*AF1_(lumaT.g)+luma)));
// Add to last channel (3x MAD).
colorR=ASatF1(lumaAdd*AF1_(rcpLumaT.r)+colorR);
colorG=ASatF1(lumaAdd*AF1_(rcpLumaT.g)+colorG);
colorB=ASatF1(lumaAdd*AF1_(rcpLumaT.b)+colorB);
//------------------------------------------------------------------------------------------------------------------------------
// The 'con2' should be a compile-time immediate so branch will not exist.
// Last optional place to convert from smaller to larger gamut (or do clipped conversion).
// For the non-soft-falloff case, doing this after all other mapping saves intermediate re-scaling ratio to max 1.0.
if(con2){
// Change of primaries (9x MAD).
ratioR=colorR;ratioG=colorG;ratioB=colorB;
if(clip){
colorR=ASatF1(ratioR*AF1_(con2R.r)+(ratioG*AF1_(con2R.g)+(ratioB*AF1_(con2R.b))));
colorG=ASatF1(ratioG*AF1_(con2G.g)+(ratioR*AF1_(con2G.r)+(ratioB*AF1_(con2G.b))));
colorB=ASatF1(ratioB*AF1_(con2B.b)+(ratioG*AF1_(con2B.g)+(ratioR*AF1_(con2B.r))));}
else{
colorR=ratioR*AF1_(con2R.r)+(ratioG*AF1_(con2R.g)+(ratioB*AF1_(con2R.b)));
colorG=ratioG*AF1_(con2G.g)+(ratioR*AF1_(con2G.r)+(ratioB*AF1_(con2G.b)));
colorB=ratioB*AF1_(con2B.b)+(ratioG*AF1_(con2B.g)+(ratioR*AF1_(con2B.r)));}}
//------------------------------------------------------------------------------------------------------------------------------
if(scaleOnly){colorR*=AF1_(con2R.r);colorG*=AF1_(con2R.r);colorB*=AF1_(con2R.r);}}
//==============================================================================================================================
// Packed FP16 version, see non-packed version above for all comments.
// Use LpmFilterH() instead of this.
void LpmMapH(inout AH2 colorR,inout AH2 colorG,inout AH2 colorB,AH3 lumaW,AH3 lumaT,AH3 rcpLumaT,AH3 saturation,AH1 contrast,
AP1 shoulder,AH1 shoulderContrast,AH2 toneScaleBias,AH3 crosstalk,AP1 con,AH3 conR,AH3 conG,AH3 conB,AP1 soft,AH2 softGap,
AP1 con2,AP1 clip,AP1 scaleOnly,AH3 con2R,AH3 con2G,AH3 con2B){
//------------------------------------------------------------------------------------------------------------------------------
AH2 rcpMax=ARcpH2(AMax3H2(colorR,colorG,colorB));AH2 ratioR=colorR*rcpMax;AH2 ratioG=colorG*rcpMax;AH2 ratioB=colorB*rcpMax;
ratioR=pow(ratioR,AH2_(saturation.r));ratioG=pow(ratioG,AH2_(saturation.g));ratioB=pow(ratioB,AH2_(saturation.b));
//------------------------------------------------------------------------------------------------------------------------------
AH2 luma;if(soft)luma=colorG*AH2_(lumaW.g)+(colorR*AH2_(lumaW.r)+(colorB*AH2_(lumaW.b)));
else luma=colorG*AH2_(lumaT.g)+(colorR*AH2_(lumaT.r)+(colorB*AH2_(lumaT.b)));
luma=pow(luma,AH2_(contrast));
AH2 lumaShoulder=shoulder?pow(luma,AH2_(shoulderContrast)):luma;
luma=luma*ARcpH2(lumaShoulder*AH2_(toneScaleBias.x)+AH2_(toneScaleBias.y));
//------------------------------------------------------------------------------------------------------------------------------
if(soft){
if(con){
colorR=ratioR;colorG=ratioG;colorB=ratioB;
ratioR=colorR*AH2_(conR.r)+(colorG*AH2_(conR.g)+(colorB*AH2_(conR.b)));
ratioG=colorG*AH2_(conG.g)+(colorR*AH2_(conG.r)+(colorB*AH2_(conG.b)));
ratioB=colorB*AH2_(conB.b)+(colorG*AH2_(conB.g)+(colorR*AH2_(conB.r)));
rcpMax=ARcpH2(AMax3H2(ratioR,ratioG,ratioB));ratioR*=rcpMax;ratioG*=rcpMax;ratioB*=rcpMax;}
//------------------------------------------------------------------------------------------------------------------------------
ratioR=min(max(AH2_(softGap.x),ASatH2(ratioR*AH2_(-softGap.x)+ratioR)),
ASatH2(AH2_(softGap.x)*exp2(ratioR*AH2_(softGap.y))));
ratioG=min(max(AH2_(softGap.x),ASatH2(ratioG*AH2_(-softGap.x)+ratioG)),
ASatH2(AH2_(softGap.x)*exp2(ratioG*AH2_(softGap.y))));
ratioB=min(max(AH2_(softGap.x),ASatH2(ratioB*AH2_(-softGap.x)+ratioB)),
ASatH2(AH2_(softGap.x)*exp2(ratioB*AH2_(softGap.y))));}
//------------------------------------------------------------------------------------------------------------------------------
AH2 lumaRatio=ratioR*AH2_(lumaT.r)+ratioG*AH2_(lumaT.g)+ratioB*AH2_(lumaT.b);
AH2 ratioScale=ASatH2(luma*ARcpH2(lumaRatio));
colorR=ASatH2(ratioR*ratioScale);colorG=ASatH2(ratioG*ratioScale);colorB=ASatH2(ratioB*ratioScale);
AH2 capR=AH2_(-crosstalk.r)*colorR+AH2_(crosstalk.r);
AH2 capG=AH2_(-crosstalk.g)*colorG+AH2_(crosstalk.g);
AH2 capB=AH2_(-crosstalk.b)*colorB+AH2_(crosstalk.b);
AH2 lumaAdd=ASatH2((-colorB)*AH2_(lumaT.b)+((-colorR)*AH2_(lumaT.r)+((-colorG)*AH2_(lumaT.g)+luma)));
AH2 t=lumaAdd*ARcpH2(capG*AH2_(lumaT.g)+(capR*AH2_(lumaT.r)+(capB*AH2_(lumaT.b))));
colorR=ASatH2(t*capR+colorR);colorG=ASatH2(t*capG+colorG);colorB=ASatH2(t*capB+colorB);
lumaAdd=ASatH2((-colorB)*AH2_(lumaT.b)+((-colorR)*AH2_(lumaT.r)+((-colorG)*AH2_(lumaT.g)+luma)));
colorR=ASatH2(lumaAdd*AH2_(rcpLumaT.r)+colorR);
colorG=ASatH2(lumaAdd*AH2_(rcpLumaT.g)+colorG);
colorB=ASatH2(lumaAdd*AH2_(rcpLumaT.b)+colorB);
//------------------------------------------------------------------------------------------------------------------------------
if(con2){
ratioR=colorR;ratioG=colorG;ratioB=colorB;
if(clip){
colorR=ASatH2(ratioR*AH2_(con2R.r)+(ratioG*AH2_(con2R.g)+(ratioB*AH2_(con2R.b))));
colorG=ASatH2(ratioG*AH2_(con2G.g)+(ratioR*AH2_(con2G.r)+(ratioB*AH2_(con2G.b))));
colorB=ASatH2(ratioB*AH2_(con2B.b)+(ratioG*AH2_(con2B.g)+(ratioR*AH2_(con2B.r))));}
else{
colorR=ratioR*AH2_(con2R.r)+(ratioG*AH2_(con2R.g)+(ratioB*AH2_(con2R.b)));
colorG=ratioG*AH2_(con2G.g)+(ratioR*AH2_(con2G.r)+(ratioB*AH2_(con2G.b)));
colorB=ratioB*AH2_(con2B.b)+(ratioG*AH2_(con2B.g)+(ratioR*AH2_(con2B.r)));}}
//------------------------------------------------------------------------------------------------------------------------------
if(scaleOnly){colorR*=AH2_(con2R.r);colorG*=AH2_(con2R.r);colorB*=AH2_(con2R.r);}}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//_____________________________________________________________/\_______________________________________________________________
//==============================================================================================================================
// FILTER
//------------------------------------------------------------------------------------------------------------------------------
// Entry point for per-pixel color tone+gamut mapping.
// Input is linear color {0 to hdrMax} ranged.
// Output is linear color {0 to 1} ranged, except for scRGB where outputs can end up negative and larger than one.
//==============================================================================================================================
// 32-bit entry point.
void LpmFilter(
// Input and output color.
inout AF1 colorR,inout AF1 colorG,inout AF1 colorB,
// Path control should all be compile-time immediates.
AP1 shoulder, // Using shoulder tuning.
// Prefab "LPM_CONFIG_" start, use the same as used for LpmSetup().
AP1 con, // Use first RGB conversion matrix, if 'soft' then 'con' must be true also.
AP1 soft, // Use soft gamut mapping.
AP1 con2, // Use last RGB conversion matrix.
AP1 clip, // Use clipping in last conversion matrix.
AP1 scaleOnly, // Scale only for last conversion matrix (used for 709 HDR to scRGB).
// Prefab end.
// Control block.
AU4 map0,AU4 map1,AU4 map2,AU4 map3,AU4 map4,AU4 map5,AU4 map6,AU4 map7,
AU4 map8,AU4 map9,AU4 mapA,AU4 mapB,AU4 mapC,AU4 mapD,AU4 mapE,AU4 mapF,
AU4 mapG,AU4 mapH,AU4 mapI,AU4 mapJ,AU4 mapK,AU4 mapL,AU4 mapM,AU4 mapN){
LpmMap(colorR,colorG,colorB,
AF3(AF4_AU4(map6).g,AF4_AU4(map6).b,AF4_AU4(map6).a), // lumaW
AF3(AF4_AU4(map1).b,AF4_AU4(map1).a,AF4_AU4(map2).r), // lumaT
AF3(AF4_AU4(map3).r,AF4_AU4(map3).g,AF4_AU4(map3).b), // rcpLumaT
AF3(AF4_AU4(map0).r,AF4_AU4(map0).g,AF4_AU4(map0).b), // saturation
AF4_AU4(map0).a, // contrast
shoulder,
AF4_AU4(map6).r, // shoulderContrast
AF2(AF4_AU4(map1).r,AF4_AU4(map1).g), // toneScaleBias
AF3(AF4_AU4(map2).g,AF4_AU4(map2).b,AF4_AU4(map2).a),// crosstalk
con,
AF3(AF4_AU4(map7).b,AF4_AU4(map7).a,AF4_AU4(map8).r), // conR
AF3(AF4_AU4(map8).g,AF4_AU4(map8).b,AF4_AU4(map8).a), // conG
AF3(AF4_AU4(map9).r,AF4_AU4(map9).g,AF4_AU4(map9).b), // conB
soft,
AF2(AF4_AU4(map7).r,AF4_AU4(map7).g), // softGap
con2,clip,scaleOnly,
AF3(AF4_AU4(map3).a,AF4_AU4(map4).r,AF4_AU4(map4).g), // con2R
AF3(AF4_AU4(map4).b,AF4_AU4(map4).a,AF4_AU4(map5).r), // con2G
AF3(AF4_AU4(map5).g,AF4_AU4(map5).b,AF4_AU4(map5).a));} // con2B
//------------------------------------------------------------------------------------------------------------------------------
#if A_HALF
// Packed 16-bit entry point (maps 2 colors at the same time).
void LpmFilterH(
inout AH2 colorR,inout AH2 colorG,inout AH2 colorB,
AP1 shoulder,AP1 con,AP1 soft,AP1 con2,AP1 clip,AP1 scaleOnly,
AU4 map0,AU4 map1,AU4 map2,AU4 map3,AU4 map4,AU4 map5,AU4 map6,AU4 map7,
AU4 map8,AU4 map9,AU4 mapA,AU4 mapB,AU4 mapC,AU4 mapD,AU4 mapE,AU4 mapF,
AU4 mapG,AU4 mapH,AU4 mapI,AU4 mapJ,AU4 mapK,AU4 mapL,AU4 mapM,AU4 mapN){
LpmMapH(colorR,colorG,colorB,
AH3(AH2_AU1(mapJ.r).y,AH2_AU1(mapJ.g).x,AH2_AU1(mapJ.g).y), // lumaW
AH3(AH2_AU1(mapG.a).x,AH2_AU1(mapG.a).y,AH2_AU1(mapH.r).x), // lumaT
AH3(AH2_AU1(mapH.b).x,AH2_AU1(mapH.b).y,AH2_AU1(mapH.a).x), // rcpLumaT
AH3(AH2_AU1(mapG.r).x,AH2_AU1(mapG.r).y,AH2_AU1(mapG.g).x), // saturation
AH2_AU1(mapG.g).y, // contrast
shoulder,
AH2_AU1(mapJ.r).x, // shoulderContrast
AH2_AU1(mapG.b), // toneScaleBias
AH3(AH2_AU1(mapH.r).y,AH2_AU1(mapH.g).x,AH2_AU1(mapH.g).y), // crosstalk
con,
AH3(AH2_AU1(mapJ.a).x,AH2_AU1(mapJ.a).y,AH2_AU1(mapK.r).x), // conR
AH3(AH2_AU1(mapK.r).y,AH2_AU1(mapK.g).x,AH2_AU1(mapK.g).y), // conG
AH3(AH2_AU1(mapK.b).x,AH2_AU1(mapK.b).y,AH2_AU1(mapK.a).x), // conB
soft,
AH2_AU1(mapJ.b), // softGap
con2,clip,scaleOnly,
AH3(AH2_AU1(mapH.a).y,AH2_AU1(mapI.r).x,AH2_AU1(mapI.r).y), // con2R
AH3(AH2_AU1(mapI.g).x,AH2_AU1(mapI.g).y,AH2_AU1(mapI.b).x), // con2G
AH3(AH2_AU1(mapI.b).y,AH2_AU1(mapI.a).x,AH2_AU1(mapI.a).y));} // con2B
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//_____________________________________________________________/\_______________________________________________________________
//==============================================================================================================================
// END OF GPU CODE
//==============================================================================================================================
#endif

View File

@@ -0,0 +1,7 @@
fileFormatVersion: 2
guid: 8b2264ffe26ac3e42b4077bfbc35941b
ShaderIncludeImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant: