git-svn-id: svn://db.shs.com.ru/libs@663 a8b55f48-bf90-11e4-a774-851b48703e85
This commit is contained in:
@@ -28,10 +28,7 @@ static int _count = 0;
|
||||
|
||||
Mesh::Mesh(GLenum geom_type_): geom_type(geom_type_),
|
||||
buffer_geom(GL_ARRAY_BUFFER, GL_STATIC_DRAW),
|
||||
buffer_ind (GL_ELEMENT_ARRAY_BUFFER, GL_STATIC_DRAW),
|
||||
buffer_obj (GL_ARRAY_BUFFER, GL_STREAM_DRAW),
|
||||
buffer_sel (GL_ARRAY_BUFFER, GL_STREAM_DRAW) {
|
||||
vao = 0;
|
||||
buffer_ind (GL_ELEMENT_ARRAY_BUFFER, GL_STATIC_DRAW) {
|
||||
hash_ = 0;
|
||||
changed = hash_changed = objects_changed = selected_changed = true;
|
||||
//qDebug() << "Mesh, now" << ++_count;
|
||||
@@ -60,26 +57,17 @@ Mesh * Mesh::clone() {
|
||||
|
||||
|
||||
void Mesh::init(QOpenGLExtraFunctions * f) {
|
||||
if (!isInit()) {
|
||||
buffer_geom.init(f);
|
||||
buffer_ind .init(f);
|
||||
buffer_obj .init(f);
|
||||
buffer_sel .init(f);
|
||||
f->glGenVertexArrays(1, &vao);
|
||||
}
|
||||
vao.bindBuffers(f, buffer_geom, buffer_ind);
|
||||
changed = true;
|
||||
}
|
||||
|
||||
|
||||
void Mesh::destroy(QOpenGLExtraFunctions * f) {
|
||||
if (vao != 0) {
|
||||
f->glDeleteVertexArrays(1, &vao);
|
||||
buffer_geom.destroy(f);
|
||||
buffer_ind .destroy(f);
|
||||
buffer_obj .destroy(f);
|
||||
buffer_sel .destroy(f);
|
||||
}
|
||||
vao = 0;
|
||||
vao.destroy(f);
|
||||
}
|
||||
|
||||
|
||||
@@ -131,21 +119,6 @@ void Mesh::calculateTangents() {
|
||||
}
|
||||
|
||||
|
||||
void Mesh::loadBuffer(QOpenGLExtraFunctions * f, Buffer & buf, const void * data, int size) {
|
||||
if (!isInit()) init(f);
|
||||
if (!buf.isInit() || !data) return;
|
||||
buf.bind(f);
|
||||
buf.resize(f, size);
|
||||
buf.load(f, data, size);
|
||||
//qDebug() << "loadBuffer" << size << "bytes";
|
||||
/*void * map = buf.map(f, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
qDebug() << map;
|
||||
if (map)
|
||||
memcpy(map, objects_.constData(), osize);
|
||||
buf.unmap(f);*/
|
||||
}
|
||||
|
||||
|
||||
bool Mesh::rebuffer(QOpenGLExtraFunctions * f) {
|
||||
changed = false;
|
||||
if (vertices_.isEmpty()) return true;
|
||||
@@ -168,12 +141,10 @@ bool Mesh::rebuffer(QOpenGLExtraFunctions * f) {
|
||||
int gsize = data_.size() * sizeof(Vertex);
|
||||
int tsize = triangles_.size() * sizeof(Vector3i);
|
||||
int lsize = lines_.size() * sizeof(Vector2i);
|
||||
f->glBindVertexArray(vao);
|
||||
|
||||
buffer_geom.bind(f);
|
||||
buffer_geom.resize(f, gsize);
|
||||
buffer_geom.load(f, data_.constData(), gsize);
|
||||
prepareDrawGeom(f);
|
||||
|
||||
buffer_ind.bind(f);
|
||||
if (geom_type == GL_TRIANGLES) {
|
||||
@@ -184,86 +155,19 @@ bool Mesh::rebuffer(QOpenGLExtraFunctions * f) {
|
||||
buffer_ind.load(f, lines_.constData(), lsize);
|
||||
}
|
||||
|
||||
buffer_obj.bind(f);
|
||||
prepareDrawObj(f);
|
||||
|
||||
buffer_sel.bind(f);
|
||||
prepareDrawSel(f);
|
||||
|
||||
f->glBindVertexArray(0);
|
||||
return !isEmpty();
|
||||
}
|
||||
|
||||
|
||||
void Mesh::prepareDrawGeom(QOpenGLExtraFunctions * f) {
|
||||
//qDebug() << "prepareDrawGeom";
|
||||
|
||||
f->glEnableVertexAttribArray(pos_loc );
|
||||
f->glEnableVertexAttribArray(normal_loc );
|
||||
f->glEnableVertexAttribArray(tangent_loc );
|
||||
f->glEnableVertexAttribArray(bitangent_loc);
|
||||
f->glEnableVertexAttribArray(tex_loc );
|
||||
|
||||
int size = sizeof(Vertex);
|
||||
f->glVertexAttribPointer(pos_loc , 3, GL_FLOAT, GL_FALSE, size, (const void *)pos_offset );
|
||||
f->glVertexAttribPointer(normal_loc , 3, GL_FLOAT, GL_FALSE, size, (const void *)normal_offset );
|
||||
f->glVertexAttribPointer(tangent_loc , 3, GL_FLOAT, GL_FALSE, size, (const void *)tangent_offset );
|
||||
f->glVertexAttribPointer(bitangent_loc, 3, GL_FLOAT, GL_FALSE, size, (const void *)bitangent_offset);
|
||||
f->glVertexAttribPointer(tex_loc , 2, GL_FLOAT, GL_FALSE, size, (const void *)tex_offset );
|
||||
}
|
||||
|
||||
|
||||
void Mesh::prepareDrawObj(QOpenGLExtraFunctions * f) {
|
||||
//qDebug() << "prepareDrawObj";
|
||||
|
||||
f->glEnableVertexAttribArray(material_loc );
|
||||
f->glEnableVertexAttribArray(object_id_loc);
|
||||
f->glEnableVertexAttribArray(color_loc );
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
f->glEnableVertexAttribArray(modelmatrix_loc + i);
|
||||
}
|
||||
|
||||
GLsizei size = sizeof(Object);
|
||||
f->glVertexAttribIPointer(material_loc , 1, GL_UNSIGNED_INT , size, (const void *)material_offset );
|
||||
f->glVertexAttribIPointer(object_id_loc, 1, GL_UNSIGNED_INT , size, (const void *)object_id_offset);
|
||||
f->glVertexAttribPointer (color_loc , 4, GL_FLOAT, GL_FALSE, size, (const void *)color_offset );
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
f->glVertexAttribPointer(modelmatrix_loc + i, 4, GL_FLOAT, GL_FALSE, size, (const void *)(modelmatrix_offset + sizeof(QVector4D)*i));
|
||||
}
|
||||
|
||||
f->glVertexAttribDivisor(material_loc , 1);
|
||||
f->glVertexAttribDivisor(object_id_loc, 1);
|
||||
f->glVertexAttribDivisor(color_loc , 1);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
f->glVertexAttribDivisor(modelmatrix_loc + i, 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void Mesh::prepareDrawSel(QOpenGLExtraFunctions * f) {
|
||||
//qDebug() << "prepareDrawObj";
|
||||
|
||||
f->glEnableVertexAttribArray(is_selected_loc);
|
||||
GLsizei size = 1;
|
||||
f->glVertexAttribIPointer(is_selected_loc, 1, GL_UNSIGNED_BYTE, size, (const void *)is_selected_offset);
|
||||
f->glVertexAttribDivisor(is_selected_loc, 1);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void Mesh::draw(QOpenGLExtraFunctions * f, int count) {
|
||||
if (isEmpty()) return;
|
||||
if (!isInit()) init(f);
|
||||
if (changed) rebuffer(f);
|
||||
//qDebug() << "draw" << geom_type << vert_count << count;
|
||||
|
||||
f->glBindVertexArray(vao);
|
||||
if (geom_type == GL_TRIANGLES)
|
||||
f->glDrawElementsInstanced(geom_type, triangles_.size() * 3, GL_UNSIGNED_INT, 0, count);
|
||||
vao.draw(f, geom_type, triangles_.size() * 3, count);
|
||||
else
|
||||
f->glDrawElementsInstanced(geom_type, lines_.size() * 2, GL_UNSIGNED_INT, 0, count);
|
||||
f->glBindVertexArray(0);
|
||||
vao.draw(f, geom_type, lines_.size() * 2, count);
|
||||
}
|
||||
|
||||
|
||||
@@ -281,18 +185,18 @@ void Mesh::clear() {
|
||||
|
||||
|
||||
void Mesh::loadObject(QOpenGLExtraFunctions * f, const Object & object) {
|
||||
loadBuffer(f, buffer_obj, &object, sizeof(Object));
|
||||
vao.loadObject(f, object);
|
||||
}
|
||||
|
||||
|
||||
void Mesh::loadObjects(QOpenGLExtraFunctions * f, const QVector<Object> & objects) {
|
||||
loadBuffer(f, buffer_obj, objects.constData(), objects.size() * sizeof(Object));
|
||||
vao.loadObjects(f, objects);
|
||||
}
|
||||
|
||||
|
||||
void Mesh::loadSelections(QOpenGLExtraFunctions * f, const QVector<uchar> & sels) {
|
||||
//qDebug() << "loadSelections" << sels;
|
||||
loadBuffer(f, buffer_sel, sels.constData(), sels.size());
|
||||
vao.loadSelections(f, sels);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -20,8 +20,7 @@
|
||||
#define GLMESH_H
|
||||
|
||||
#include <chunkstream.h>
|
||||
#include "glbuffer.h"
|
||||
#include "glshaders_types.h"
|
||||
#include "glvertexobject.h"
|
||||
|
||||
|
||||
class Mesh
|
||||
@@ -51,7 +50,7 @@ public:
|
||||
int verticesCount() const {return vertices_.size();}
|
||||
int trianglesCount() const {return triangles_.size();}
|
||||
int linesCount() const {return lines_.size();}
|
||||
bool isInit() const {return vao != 0;}
|
||||
bool isInit() const {return vao.isInit();}
|
||||
bool isEmpty() const {return vertices_.isEmpty();}
|
||||
uint hash() const;
|
||||
|
||||
@@ -76,14 +75,9 @@ public:
|
||||
|
||||
Box3D boundingBox() const;
|
||||
|
||||
static void prepareDrawGeom(QOpenGLExtraFunctions * f);
|
||||
static void prepareDrawObj (QOpenGLExtraFunctions * f);
|
||||
static void prepareDrawSel (QOpenGLExtraFunctions * f);
|
||||
|
||||
private:
|
||||
void calculateNormals();
|
||||
void calculateTangents();
|
||||
void loadBuffer(QOpenGLExtraFunctions * f, Buffer & buf, const void * data, int size);
|
||||
|
||||
QVector<QVector3D> vertices_, normals_, tangents_, bitangents_;
|
||||
QVector<QVector2D> texcoords_;
|
||||
@@ -91,8 +85,9 @@ private:
|
||||
QVector< Vector2i> lines_;
|
||||
|
||||
QVector<QGLEngineShaders::Vertex> data_;
|
||||
GLenum vao, geom_type;
|
||||
Buffer buffer_geom, buffer_ind, buffer_obj, buffer_sel;
|
||||
GLenum geom_type;
|
||||
Buffer buffer_geom, buffer_ind;
|
||||
VertexObject vao;
|
||||
mutable uint hash_;
|
||||
mutable bool hash_changed;
|
||||
int vert_count;
|
||||
|
||||
@@ -117,70 +117,30 @@ bool QGLEngineShaders::loadShadersMulti(QOpenGLShaderProgram *& prog, const QStr
|
||||
}
|
||||
|
||||
|
||||
void QGLEngineShaders::setUniformMatrices(QOpenGLShaderProgram * prog, QMatrix4x4 proj, QMatrix4x4 view, QMatrix4x4 prevproj, QMatrix4x4 prevview) {
|
||||
if (!prog) return;
|
||||
if (!prog->isLinked()) return;
|
||||
QMatrix4x4 mvpm = proj * view;
|
||||
QMatrix4x4 pmvpm = prevproj * prevview;
|
||||
QMatrix3x3 nm = view.normalMatrix();
|
||||
//nm.in;
|
||||
prog->setUniformValue("qgl_ModelViewMatrix", view);
|
||||
prog->setUniformValue("qgl_ProjectionMatrix", proj);
|
||||
prog->setUniformValue("prev_ModelViewProjectioMatrix", pmvpm);
|
||||
prog->setUniformValue("prev_ModelViewMatrix", prevview);
|
||||
prog->setUniformValue("qgl_ModelViewProjectionMatrix", mvpm);
|
||||
prog->setUniformValue("qgl_NormalMatrix", nm);
|
||||
//prog->setUniformValue("qgl_BumpMatrix", nm.);
|
||||
prog->setUniformValue("qgl_ModelViewMatrixTranspose", view.transposed());
|
||||
prog->setUniformValue("qgl_ProjectionMatrixTranspose", proj.transposed());
|
||||
prog->setUniformValue("qgl_ModelViewProjectionMatrixTranspose", mvpm.transposed());
|
||||
bool QGLEngineShaders::loadShaders(QOpenGLShaderProgram *& prog, const QStringList & files, bool add_qgl, const QStringList & defines) {
|
||||
if (!prog)
|
||||
prog = new QOpenGLShaderProgram();
|
||||
prog->removeAllShaders();
|
||||
QString cur_shader, defs = prepareDefines(defines);
|
||||
foreach (QString f, files) {
|
||||
QFileInfo fi(f);
|
||||
QOpenGLShader::ShaderType type = 0;
|
||||
if (fi.suffix().toLower() == "vert") type = QOpenGLShader::Vertex ;
|
||||
if (fi.suffix().toLower() == "frag") type = QOpenGLShader::Fragment;
|
||||
if (fi.suffix().toLower() == "geom") type = QOpenGLShader::Geometry;
|
||||
if (type == 0) continue;
|
||||
QFile file(f);
|
||||
if (!file.open(QIODevice::ReadOnly)) {
|
||||
qDebug() << "[QGLView] Shader" << f << "Error: can`t open file!";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void QGLEngineShaders::setUniformLights(QOpenGLShaderProgram * prog, const QVector<Light*> & lights, const QMatrix4x4 & mat, int shadow_start) {
|
||||
for (int i = 0; i < lights.size(); ++i)
|
||||
;//setUniformLight(prog, lights[i], QString("qgl_Light[%1]").arg(i), mat, shadow_start + i);
|
||||
cur_shader = file.readAll();
|
||||
if (!addShader(prog, type, cur_shader, f, add_qgl, defs)) return false;
|
||||
}
|
||||
/*
|
||||
" vec3 position;\n"
|
||||
" vec3 direction;\n"
|
||||
" vec4 color;\n"
|
||||
" float intensity;\n"
|
||||
" float startAngle;\n"
|
||||
" float endAngle;\n"
|
||||
" float constantAttenuation;\n"
|
||||
" float linearAttenuation;\n"
|
||||
" float quadraticAttenuation;\n"
|
||||
" sampler2DShadow shadow;\n"
|
||||
" mat4 shadowMatrix;\n"
|
||||
*/
|
||||
void QGLEngineShaders::setUniformLight(QOpenGLShaderProgram * prog, Light * light, QString ulightn, const QMatrix4x4 & mat, int shadow) {
|
||||
if (!prog) return;
|
||||
if (!prog->isLinked()) return;
|
||||
QMatrix4x4 m = mat * light->worldTransform();
|
||||
QVector4D pos(0, 0, 0, 1.), dir(light->direction(), 1);//, dir0(light->dir0), dir1(light->dir1);
|
||||
pos = m * pos;
|
||||
dir = ((m * dir) - pos).normalized();
|
||||
float ang_start = light->angle_start / 2.f, ang_end = light->angle_end / 2.f;
|
||||
if (light->light_type == Light::Omni)
|
||||
ang_start = ang_end = 180.;
|
||||
//qDebug() << "light" << light->name() << ulightn << pos;
|
||||
prog->setUniformValue((ulightn + ".position").toLatin1().constData(), pos);
|
||||
prog->setUniformValue((ulightn + ".direction").toLatin1().constData(), dir);
|
||||
prog->setUniformValue((ulightn + ".intensity").toLatin1().constData(), GLfloat(light->intensity));
|
||||
prog->setUniformValue((ulightn + ".startAngle").toLatin1().constData(), GLfloat(ang_start));
|
||||
prog->setUniformValue((ulightn + ".startAngleCos").toLatin1().constData(), GLfloat(cosf(ang_start * deg2rad)));
|
||||
prog->setUniformValue((ulightn + ".endAngle").toLatin1().constData(), GLfloat(ang_end));
|
||||
prog->setUniformValue((ulightn + ".endAngleCos").toLatin1().constData(), GLfloat(cosf(ang_end * deg2rad)));
|
||||
//prog->setUniformValue((ulightn + ".color").toLatin1().constData(), light->color());
|
||||
prog->setUniformValue((ulightn + ".constantAttenuation").toLatin1().constData(), GLfloat(light->decay_const));
|
||||
prog->setUniformValue((ulightn + ".linearAttenuation").toLatin1().constData(), GLfloat(light->decay_linear));
|
||||
prog->setUniformValue((ulightn + ".quadraticAttenuation").toLatin1().constData(), GLfloat(light->decay_quadratic));
|
||||
prog->setUniformValue((ulightn + ".shadow").toLatin1().constData(), shadow);
|
||||
prog->setUniformValue((ulightn + ".shadowColor").toLatin1().constData(), shadow);
|
||||
prog->setUniformValue((ulightn + ".shadowMatrix").toLatin1().constData(), light->shadow_matrix);
|
||||
//qDebug() << light->shadow_matrix;
|
||||
//prog->setUniformValue((ulightn + ".shadowDir0").toLatin1().constData(), (mat * dir0));
|
||||
//prog->setUniformValue((ulightn + ".shadowDir1").toLatin1().constData(), (mat * dir1));
|
||||
//qDebug() << light->direction << light->dir0 << light->dir1;
|
||||
if (!prog->link()) {
|
||||
qDebug() << "[QGLView] Shader" << files << "Link error:\n" << prog->log();
|
||||
return false;
|
||||
}
|
||||
qDebug() << "[QGLView] Shader" << files << "ok";
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -24,10 +24,7 @@
|
||||
namespace QGLEngineShaders {
|
||||
|
||||
bool loadShadersMulti(QOpenGLShaderProgram *& prog, const QString & file, bool add_qgl = true, const QStringList & defines = QStringList());
|
||||
|
||||
void setUniformMatrices(QOpenGLShaderProgram * prog, QMatrix4x4 proj, QMatrix4x4 view, QMatrix4x4 prevproj = QMatrix4x4(), QMatrix4x4 prevview = QMatrix4x4());
|
||||
void setUniformLights(QOpenGLShaderProgram * prog, const QVector<Light*> & lights, const QMatrix4x4 & mat, int shadow_start);
|
||||
void setUniformLight(QOpenGLShaderProgram * prog, Light * light, QString ulightn, const QMatrix4x4 & mat = QMatrix4x4(), int shadow = 0);
|
||||
bool loadShaders(QOpenGLShaderProgram *& prog, const QStringList & files, bool add_qgl = true, const QStringList & defines = QStringList());
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -39,3 +39,60 @@ QGLEngineShaders::QGLMaterial::QGLMaterial() {
|
||||
map[mtNormal].map_index = emrBlue;
|
||||
map[mtRoughness].amount = 0.75;
|
||||
}
|
||||
|
||||
|
||||
void QGLEngineShaders::prepareDrawGeom(QOpenGLExtraFunctions * f) {
|
||||
//qDebug() << "prepareDrawGeom";
|
||||
|
||||
f->glEnableVertexAttribArray(pos_loc );
|
||||
f->glEnableVertexAttribArray(normal_loc );
|
||||
f->glEnableVertexAttribArray(tangent_loc );
|
||||
f->glEnableVertexAttribArray(bitangent_loc);
|
||||
f->glEnableVertexAttribArray(tex_loc );
|
||||
|
||||
int size = sizeof(Vertex);
|
||||
f->glVertexAttribPointer(pos_loc , 3, GL_FLOAT, GL_FALSE, size, (const void *)pos_offset );
|
||||
f->glVertexAttribPointer(normal_loc , 3, GL_FLOAT, GL_FALSE, size, (const void *)normal_offset );
|
||||
f->glVertexAttribPointer(tangent_loc , 3, GL_FLOAT, GL_FALSE, size, (const void *)tangent_offset );
|
||||
f->glVertexAttribPointer(bitangent_loc, 3, GL_FLOAT, GL_FALSE, size, (const void *)bitangent_offset);
|
||||
f->glVertexAttribPointer(tex_loc , 2, GL_FLOAT, GL_FALSE, size, (const void *)tex_offset );
|
||||
}
|
||||
|
||||
|
||||
void QGLEngineShaders::prepareDrawObj(QOpenGLExtraFunctions * f) {
|
||||
//qDebug() << "prepareDrawObj";
|
||||
|
||||
f->glEnableVertexAttribArray(material_loc );
|
||||
f->glEnableVertexAttribArray(object_id_loc);
|
||||
f->glEnableVertexAttribArray(color_loc );
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
f->glEnableVertexAttribArray(modelmatrix_loc + i);
|
||||
}
|
||||
|
||||
GLsizei size = sizeof(Object);
|
||||
f->glVertexAttribIPointer(material_loc , 1, GL_UNSIGNED_INT , size, (const void *)material_offset );
|
||||
f->glVertexAttribIPointer(object_id_loc, 1, GL_UNSIGNED_INT , size, (const void *)object_id_offset);
|
||||
f->glVertexAttribPointer (color_loc , 4, GL_FLOAT, GL_FALSE, size, (const void *)color_offset );
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
f->glVertexAttribPointer(modelmatrix_loc + i, 4, GL_FLOAT, GL_FALSE, size, (const void *)(modelmatrix_offset + sizeof(QVector4D)*i));
|
||||
}
|
||||
|
||||
f->glVertexAttribDivisor(material_loc , 1);
|
||||
f->glVertexAttribDivisor(object_id_loc, 1);
|
||||
f->glVertexAttribDivisor(color_loc , 1);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
f->glVertexAttribDivisor(modelmatrix_loc + i, 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void QGLEngineShaders::prepareDrawSel(QOpenGLExtraFunctions * f) {
|
||||
//qDebug() << "prepareDrawObj";
|
||||
|
||||
f->glEnableVertexAttribArray(is_selected_loc);
|
||||
GLsizei size = 1;
|
||||
f->glVertexAttribIPointer(is_selected_loc, 1, GL_UNSIGNED_BYTE, size, (const void *)is_selected_offset);
|
||||
f->glVertexAttribDivisor(is_selected_loc, 1);
|
||||
|
||||
}
|
||||
|
||||
@@ -142,6 +142,10 @@ struct QGLLightPosition {
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
void prepareDrawGeom(QOpenGLExtraFunctions * f);
|
||||
void prepareDrawObj (QOpenGLExtraFunctions * f);
|
||||
void prepareDrawSel (QOpenGLExtraFunctions * f);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -176,6 +176,7 @@ class TextureManager;
|
||||
class Texture2DArray;
|
||||
class Framebuffer;
|
||||
class FramebufferMipmap;
|
||||
class VertexObject;
|
||||
class Mesh;
|
||||
class Scene;
|
||||
class RendererBase;
|
||||
|
||||
114
qglengine/core/glvertexobject.cpp
Normal file
114
qglengine/core/glvertexobject.cpp
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
QGLView
|
||||
Copyright (C) 2019 Ivan Pelipenko peri4ko@yandex.ru
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
#include <QOpenGLExtraFunctions>
|
||||
#include "glvertexobject.h"
|
||||
|
||||
using namespace QGLEngineShaders;
|
||||
|
||||
|
||||
VertexObject::VertexObject():
|
||||
buffer_obj (GL_ARRAY_BUFFER, GL_STREAM_DRAW),
|
||||
buffer_sel (GL_ARRAY_BUFFER, GL_STREAM_DRAW) {
|
||||
vao_ = 0;
|
||||
}
|
||||
|
||||
|
||||
VertexObject::~VertexObject() {
|
||||
}
|
||||
|
||||
|
||||
void VertexObject::init(QOpenGLExtraFunctions * f) {
|
||||
if (!isInit()) {
|
||||
buffer_obj.init(f);
|
||||
buffer_sel.init(f);
|
||||
f->glGenVertexArrays(1, &vao_);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void VertexObject::destroy(QOpenGLExtraFunctions * f) {
|
||||
if (vao_ != 0) {
|
||||
buffer_obj.destroy(f);
|
||||
buffer_sel.destroy(f);
|
||||
f->glDeleteVertexArrays(1, &vao_);
|
||||
}
|
||||
vao_ = 0;
|
||||
}
|
||||
|
||||
|
||||
void VertexObject::bind(QOpenGLExtraFunctions * f) {
|
||||
//qDebug() << "bind" << target_ << buffer_;
|
||||
f->glBindVertexArray(vao_);
|
||||
}
|
||||
|
||||
|
||||
void VertexObject::release(QOpenGLExtraFunctions * f) {
|
||||
f->glBindVertexArray(0);
|
||||
}
|
||||
|
||||
|
||||
void VertexObject::bindBuffers(QOpenGLExtraFunctions * f, Buffer & geom, Buffer & elem) {
|
||||
init(f);
|
||||
bind(f);
|
||||
|
||||
geom.bind(f);
|
||||
prepareDrawGeom(f);
|
||||
|
||||
elem.bind(f);
|
||||
|
||||
buffer_obj.bind(f);
|
||||
prepareDrawObj(f);
|
||||
|
||||
buffer_sel.bind(f);
|
||||
prepareDrawSel(f);
|
||||
|
||||
release(f);
|
||||
}
|
||||
|
||||
|
||||
void VertexObject::loadObject(QOpenGLExtraFunctions * f, const Object & object) {
|
||||
loadBuffer(f, buffer_obj, &object, sizeof(Object));
|
||||
}
|
||||
|
||||
|
||||
void VertexObject::loadObjects(QOpenGLExtraFunctions * f, const QVector<Object> & objects) {
|
||||
loadBuffer(f, buffer_obj, objects.constData(), objects.size() * sizeof(Object));
|
||||
}
|
||||
|
||||
|
||||
void VertexObject::loadSelections(QOpenGLExtraFunctions * f, const QVector<uchar> & sels) {
|
||||
loadBuffer(f, buffer_sel, sels.constData(), sels.size());
|
||||
}
|
||||
|
||||
|
||||
void VertexObject::draw(QOpenGLExtraFunctions * f, GLenum geom_type, int vert_cout, int obj_count) {
|
||||
bind(f);
|
||||
f->glDrawElementsInstanced(geom_type, vert_cout, GL_UNSIGNED_INT, 0, obj_count);
|
||||
release(f);
|
||||
}
|
||||
|
||||
|
||||
void VertexObject::loadBuffer(QOpenGLExtraFunctions * f, Buffer & buf, const void * data, int size) {
|
||||
buf.init(f);
|
||||
if (!data) return;
|
||||
buf.bind(f);
|
||||
buf.resize(f, size);
|
||||
buf.load(f, data, size);
|
||||
}
|
||||
58
qglengine/core/glvertexobject.h
Normal file
58
qglengine/core/glvertexobject.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
QGLView
|
||||
Copyright (C) 2019 Ivan Pelipenko peri4ko@yandex.ru
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef GLVERTEXOBJECT_H
|
||||
#define GLVERTEXOBJECT_H
|
||||
|
||||
#include "glbuffer.h"
|
||||
#include "glshaders_types.h"
|
||||
|
||||
|
||||
class VertexObject
|
||||
{
|
||||
friend class Mesh;
|
||||
public:
|
||||
VertexObject();
|
||||
~VertexObject();
|
||||
|
||||
void init (QOpenGLExtraFunctions * f);
|
||||
void destroy (QOpenGLExtraFunctions * f);
|
||||
|
||||
void bind (QOpenGLExtraFunctions * f);
|
||||
void release (QOpenGLExtraFunctions * f);
|
||||
|
||||
void bindBuffers (QOpenGLExtraFunctions * f, Buffer & geom, Buffer & elem);
|
||||
void loadObject (QOpenGLExtraFunctions * f, const QGLEngineShaders::Object & object);
|
||||
void loadObjects (QOpenGLExtraFunctions * f, const QVector<QGLEngineShaders::Object> & objects);
|
||||
void loadSelections(QOpenGLExtraFunctions * f, const QVector<uchar> & sels);
|
||||
|
||||
void draw(QOpenGLExtraFunctions * f, GLenum geom_type, int vert_cout, int obj_count);
|
||||
|
||||
GLuint ID() const {return vao_;}
|
||||
bool isInit() const {return vao_ != 0;}
|
||||
|
||||
private:
|
||||
void loadBuffer(QOpenGLExtraFunctions * f, Buffer & buf, const void * data, int size);
|
||||
|
||||
GLuint vao_;
|
||||
Buffer buffer_obj, buffer_sel;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif // GLVERTEXOBJECT_H
|
||||
@@ -67,6 +67,7 @@ Renderer::Renderer(QGLView * view_): RendererBase(view_),
|
||||
<< ShaderPair("ssao_blur", &shader_ssao_blur)
|
||||
<< ShaderPair("ssao_merge", &shader_ssao_merge)
|
||||
<< ShaderPair("dof", &shader_dof);*/
|
||||
shader_fxaa = 0;
|
||||
exposure_ = 1.;
|
||||
edit_mode = need_init_shaders = is_camera_light = true;
|
||||
}
|
||||
@@ -76,6 +77,7 @@ Renderer::~Renderer() {
|
||||
delete quad;
|
||||
delete cam_light;
|
||||
qDeleteAll(shaders.values());
|
||||
if (shader_fxaa) delete shader_fxaa;
|
||||
}
|
||||
|
||||
|
||||
@@ -109,6 +111,9 @@ void Renderer::reloadShaders() {
|
||||
loadShadersMulti(shaders[it.key()], dir + it.value(), true, shader_defines.value(it.key()));
|
||||
}
|
||||
loadShadersMulti(tone_proc.shader_sum, dir + "sum.glsl", false);
|
||||
QStringList fxaa_defs;
|
||||
fxaa_defs << "FXAA_PC 1" << "FXAA_GLSL_130 1" << "FXAA_QUALITY__PRESET 20";
|
||||
loadShaders(shader_fxaa, QStringList() << (dir + "fxaa.vert") << (dir + "fxaa.frag"), true, fxaa_defs);
|
||||
need_init_shaders = true;
|
||||
view->scene()->setLightsChanged();
|
||||
view->scene()->setTreeStructChanged();
|
||||
@@ -126,6 +131,14 @@ bool Renderer::bindShader(Renderer::ShaderRole role, QOpenGLShaderProgram ** ret
|
||||
}
|
||||
|
||||
|
||||
bool Renderer::bindShader(QOpenGLShaderProgram * sp) {
|
||||
if (!sp) return true;
|
||||
if (!sp->isLinked()) return true;
|
||||
if (!sp->bind()) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void Renderer::initShaders() {
|
||||
if (!need_init_shaders) return;
|
||||
need_init_shaders = false;
|
||||
|
||||
@@ -94,6 +94,7 @@ protected:
|
||||
void renderObjects(Scene & scene, RenderPass pass);
|
||||
|
||||
bool bindShader(ShaderRole role, QOpenGLShaderProgram ** ret = 0);
|
||||
bool bindShader(QOpenGLShaderProgram * sp);
|
||||
void initShaders();
|
||||
void releaseShader();
|
||||
|
||||
@@ -108,6 +109,7 @@ private:
|
||||
QMap<ShaderRole, QString> shader_files;
|
||||
QMap<ShaderRole, QStringList> shader_defines;
|
||||
QMap<ShaderRole, QOpenGLShaderProgram*> shaders;
|
||||
QOpenGLShaderProgram * shader_fxaa;
|
||||
|
||||
RendererMaterial rend_mat;
|
||||
RendererService rend_service;
|
||||
|
||||
@@ -292,8 +292,9 @@ void RendererBase::initQuad(Mesh * mesh, QMatrix4x4 mat) {
|
||||
}
|
||||
|
||||
|
||||
void RendererBase::renderQuad(QOpenGLShaderProgram * prog, Mesh * mesh, Camera * cam) {
|
||||
void RendererBase::renderQuad(QOpenGLShaderProgram * prog, Mesh * mesh, Camera * cam, bool uniforms) {
|
||||
glDisableDepth();
|
||||
if (uniforms)
|
||||
setUniformCamera(prog, cam, false);
|
||||
mesh->draw(view, 1);
|
||||
}
|
||||
|
||||
@@ -44,7 +44,7 @@ protected:
|
||||
void markReloadTextures();
|
||||
void setMapsSize(QSize sz);
|
||||
void initQuad(Mesh * mesh, QMatrix4x4 mat = QMatrix4x4());
|
||||
void renderQuad(QOpenGLShaderProgram * prog, Mesh * mesh, Camera * cam = 0);
|
||||
void renderQuad(QOpenGLShaderProgram * prog, Mesh * mesh, Camera * cam = 0, bool uniforms = true);
|
||||
|
||||
QGLView * view;
|
||||
TextureManager * textures_manager;
|
||||
|
||||
@@ -27,7 +27,7 @@ using namespace QGLEngineShaders;
|
||||
|
||||
|
||||
RendererSelection::RendererSelection(Renderer * r_): r(r_),
|
||||
fbo_selection(r->view, 4) {
|
||||
fbo_selection(r->view, 6) {
|
||||
sel_frame = Primitive::plane(2., 2.);
|
||||
id_hover = 0;
|
||||
line_thick_ = 2.;
|
||||
@@ -138,7 +138,7 @@ void RendererSelection::renderSelection(Scene & scene) {
|
||||
//qDebug() << id_hover;
|
||||
fbo_selection.bindColorTexture(sbrSrcHover , sbrSrcHover );
|
||||
fbo_selection.bindColorTexture(sbrSrcSelect, sbrSrcSelect);
|
||||
int wind[2] = {2, 3};
|
||||
int wind[2] = {sbrHovered, sbrSelected};
|
||||
fbo_selection.setWriteBuffers(wind, 2);
|
||||
if (!view->hoverHalo_ && !view->selectionHalo_)
|
||||
glClearFramebuffer(QColor(0,0,0,0), false);
|
||||
@@ -157,6 +157,17 @@ void RendererSelection::renderSelection(Scene & scene) {
|
||||
r->renderQuad(prog, r->quad, view->camera());
|
||||
}
|
||||
|
||||
prog = r->shader_fxaa;
|
||||
if (r->bindShader(prog)) {
|
||||
r->setUniformCamera(prog, 0, true, fbo_selection.size());
|
||||
fbo_selection.bindColorTexture(sbrHovered);
|
||||
fbo_selection.setWriteBuffer(sbrHoveredFXAA);
|
||||
r->renderQuad(prog, r->quad, 0, false);
|
||||
fbo_selection.bindColorTexture(sbrSelected);
|
||||
fbo_selection.setWriteBuffer(sbrSelectedFXAA);
|
||||
r->renderQuad(prog, r->quad, 0, false);
|
||||
}
|
||||
|
||||
fbo_selection.release();
|
||||
}
|
||||
}
|
||||
@@ -187,8 +198,8 @@ void RendererSelection::drawSelection(Framebuffer & fbo_out, int index_out) {
|
||||
fbo_selection.bindColorTextures();
|
||||
fbo_out.bindColorTexture(index_out);
|
||||
prog->setUniformValue("fb_out" , 0);
|
||||
prog->setUniformValue("fb_hover" , (int)sbrHovered );
|
||||
prog->setUniformValue("fb_select", (int)sbrSelected);
|
||||
prog->setUniformValue("fb_hover" , (int)sbrHoveredFXAA );
|
||||
prog->setUniformValue("fb_select", (int)sbrSelectedFXAA);
|
||||
r->renderQuad(prog, r->quad, r->view->camera());
|
||||
if (!r->mouse_rect.isNull()) {
|
||||
renderSelectionFrame();
|
||||
|
||||
@@ -41,7 +41,9 @@ protected:
|
||||
sbrSrcHover,
|
||||
sbrSrcSelect,
|
||||
sbrHovered,
|
||||
sbrSelected
|
||||
sbrSelected,
|
||||
sbrHoveredFXAA,
|
||||
sbrSelectedFXAA,
|
||||
};
|
||||
|
||||
void generateObjectsID(Scene & scene);
|
||||
|
||||
2223
qglengine/shaders/fxaa.frag
Normal file
2223
qglengine/shaders/fxaa.frag
Normal file
@@ -0,0 +1,2223 @@
|
||||
/*============================================================================
|
||||
|
||||
|
||||
NVIDIA FXAA 3.11 by TIMOTHY LOTTES
|
||||
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
COPYRIGHT (C) 2010, 2011 NVIDIA CORPORATION. ALL RIGHTS RESERVED.
|
||||
------------------------------------------------------------------------------
|
||||
TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED
|
||||
*AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA
|
||||
OR ITS SUPPLIERS BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT, OR
|
||||
CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR
|
||||
LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION,
|
||||
OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR INABILITY TO USE
|
||||
THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
DAMAGES.
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
INTEGRATION CHECKLIST
|
||||
------------------------------------------------------------------------------
|
||||
(1.)
|
||||
In the shader source, setup defines for the desired configuration.
|
||||
When providing multiple shaders (for different presets),
|
||||
simply setup the defines differently in multiple files.
|
||||
Example,
|
||||
|
||||
#define FXAA_PC 1
|
||||
#define FXAA_HLSL_5 1
|
||||
#define FXAA_QUALITY__PRESET 12
|
||||
|
||||
Or,
|
||||
|
||||
#define FXAA_360 1
|
||||
|
||||
Or,
|
||||
|
||||
#define FXAA_PS3 1
|
||||
|
||||
Etc.
|
||||
|
||||
(2.)
|
||||
Then include this file,
|
||||
|
||||
#include "Fxaa3_11.h"
|
||||
|
||||
(3.)
|
||||
Then call the FXAA pixel shader from within your desired shader.
|
||||
Look at the FXAA Quality FxaaPixelShader() for docs on inputs.
|
||||
As for FXAA 3.11 all inputs for all shaders are the same
|
||||
to enable easy porting between platforms.
|
||||
|
||||
return FxaaPixelShader(...);
|
||||
|
||||
(4.)
|
||||
Insure pass prior to FXAA outputs RGBL (see next section).
|
||||
Or use,
|
||||
|
||||
#define FXAA_GREEN_AS_LUMA 1
|
||||
|
||||
(5.)
|
||||
Setup engine to provide the following constants
|
||||
which are used in the FxaaPixelShader() inputs,
|
||||
|
||||
FxaaFloat2 fxaaQualityRcpFrame,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt2,
|
||||
FxaaFloat4 fxaaConsole360RcpFrameOpt2,
|
||||
FxaaFloat fxaaQualitySubpix,
|
||||
FxaaFloat fxaaQualityEdgeThreshold,
|
||||
FxaaFloat fxaaQualityEdgeThresholdMin,
|
||||
FxaaFloat fxaaConsoleEdgeSharpness,
|
||||
FxaaFloat fxaaConsoleEdgeThreshold,
|
||||
FxaaFloat fxaaConsoleEdgeThresholdMin,
|
||||
FxaaFloat4 fxaaConsole360ConstDir
|
||||
|
||||
Look at the FXAA Quality FxaaPixelShader() for docs on inputs.
|
||||
|
||||
(6.)
|
||||
Have FXAA vertex shader run as a full screen triangle,
|
||||
and output "pos" and "fxaaConsolePosPos"
|
||||
such that inputs in the pixel shader provide,
|
||||
|
||||
// {xy} = center of pixel
|
||||
FxaaFloat2 pos,
|
||||
|
||||
// {xy__} = upper left of pixel
|
||||
// {__zw} = lower right of pixel
|
||||
FxaaFloat4 fxaaConsolePosPos,
|
||||
|
||||
(7.)
|
||||
Insure the texture sampler(s) used by FXAA are set to bilinear filtering.
|
||||
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
INTEGRATION - RGBL AND COLORSPACE
|
||||
------------------------------------------------------------------------------
|
||||
FXAA3 requires RGBL as input unless the following is set,
|
||||
|
||||
#define FXAA_GREEN_AS_LUMA 1
|
||||
|
||||
In which case the engine uses green in place of luma,
|
||||
and requires RGB input is in a non-linear colorspace.
|
||||
|
||||
RGB should be LDR (low dynamic range).
|
||||
Specifically do FXAA after tonemapping.
|
||||
|
||||
RGB data as returned by a texture fetch can be non-linear,
|
||||
or linear when FXAA_GREEN_AS_LUMA is not set.
|
||||
Note an "sRGB format" texture counts as linear,
|
||||
because the result of a texture fetch is linear data.
|
||||
Regular "RGBA8" textures in the sRGB colorspace are non-linear.
|
||||
|
||||
If FXAA_GREEN_AS_LUMA is not set,
|
||||
luma must be stored in the alpha channel prior to running FXAA.
|
||||
This luma should be in a perceptual space (could be gamma 2.0).
|
||||
Example pass before FXAA where output is gamma 2.0 encoded,
|
||||
|
||||
color.rgb = ToneMap(color.rgb); // linear color output
|
||||
color.rgb = sqrt(color.rgb); // gamma 2.0 color output
|
||||
return color;
|
||||
|
||||
To use FXAA,
|
||||
|
||||
color.rgb = ToneMap(color.rgb); // linear color output
|
||||
color.rgb = sqrt(color.rgb); // gamma 2.0 color output
|
||||
color.a = dot(color.rgb, FxaaFloat3(0.299, 0.587, 0.114)); // compute luma
|
||||
return color;
|
||||
|
||||
Another example where output is linear encoded,
|
||||
say for instance writing to an sRGB formated render target,
|
||||
where the render target does the conversion back to sRGB after blending,
|
||||
|
||||
color.rgb = ToneMap(color.rgb); // linear color output
|
||||
return color;
|
||||
|
||||
To use FXAA,
|
||||
|
||||
color.rgb = ToneMap(color.rgb); // linear color output
|
||||
color.a = sqrt(dot(color.rgb, FxaaFloat3(0.299, 0.587, 0.114))); // compute luma
|
||||
return color;
|
||||
|
||||
Getting luma correct is required for the algorithm to work correctly.
|
||||
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
BEING LINEARLY CORRECT?
|
||||
------------------------------------------------------------------------------
|
||||
Applying FXAA to a framebuffer with linear RGB color will look worse.
|
||||
This is very counter intuitive, but happends to be true in this case.
|
||||
The reason is because dithering artifacts will be more visiable
|
||||
in a linear colorspace.
|
||||
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
COMPLEX INTEGRATION
|
||||
------------------------------------------------------------------------------
|
||||
Q. What if the engine is blending into RGB before wanting to run FXAA?
|
||||
|
||||
A. In the last opaque pass prior to FXAA,
|
||||
have the pass write out luma into alpha.
|
||||
Then blend into RGB only.
|
||||
FXAA should be able to run ok
|
||||
assuming the blending pass did not any add aliasing.
|
||||
This should be the common case for particles and common blending passes.
|
||||
|
||||
A. Or use FXAA_GREEN_AS_LUMA.
|
||||
|
||||
============================================================================*/
|
||||
|
||||
/*============================================================================
|
||||
|
||||
INTEGRATION KNOBS
|
||||
|
||||
============================================================================*/
|
||||
//
|
||||
// FXAA_PS3 and FXAA_360 choose the console algorithm (FXAA3 CONSOLE).
|
||||
// FXAA_360_OPT is a prototype for the new optimized 360 version.
|
||||
//
|
||||
// 1 = Use API.
|
||||
// 0 = Don't use API.
|
||||
//
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_PS3
|
||||
#define FXAA_PS3 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_360
|
||||
#define FXAA_360 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_360_OPT
|
||||
#define FXAA_360_OPT 0
|
||||
#endif
|
||||
/*==========================================================================*/
|
||||
#ifndef FXAA_PC
|
||||
//
|
||||
// FXAA Quality
|
||||
// The high quality PC algorithm.
|
||||
//
|
||||
#define FXAA_PC 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_PC_CONSOLE
|
||||
//
|
||||
// The console algorithm for PC is included
|
||||
// for developers targeting really low spec machines.
|
||||
// Likely better to just run FXAA_PC, and use a really low preset.
|
||||
//
|
||||
#define FXAA_PC_CONSOLE 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_GLSL_120
|
||||
#define FXAA_GLSL_120 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_GLSL_130
|
||||
#define FXAA_GLSL_130 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_HLSL_3
|
||||
#define FXAA_HLSL_3 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_HLSL_4
|
||||
#define FXAA_HLSL_4 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_HLSL_5
|
||||
#define FXAA_HLSL_5 0
|
||||
#endif
|
||||
/*==========================================================================*/
|
||||
#ifndef FXAA_GREEN_AS_LUMA
|
||||
//
|
||||
// For those using non-linear color,
|
||||
// and either not able to get luma in alpha, or not wanting to,
|
||||
// this enables FXAA to run using green as a proxy for luma.
|
||||
// So with this enabled, no need to pack luma in alpha.
|
||||
//
|
||||
// This will turn off AA on anything which lacks some amount of green.
|
||||
// Pure red and blue or combination of only R and B, will get no AA.
|
||||
//
|
||||
// Might want to lower the settings for both,
|
||||
// fxaaConsoleEdgeThresholdMin
|
||||
// fxaaQualityEdgeThresholdMin
|
||||
// In order to insure AA does not get turned off on colors
|
||||
// which contain a minor amount of green.
|
||||
//
|
||||
// 1 = On.
|
||||
// 0 = Off.
|
||||
//
|
||||
#define FXAA_GREEN_AS_LUMA 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_EARLY_EXIT
|
||||
//
|
||||
// Controls algorithm's early exit path.
|
||||
// On PS3 turning this ON adds 2 cycles to the shader.
|
||||
// On 360 turning this OFF adds 10ths of a millisecond to the shader.
|
||||
// Turning this off on console will result in a more blurry image.
|
||||
// So this defaults to on.
|
||||
//
|
||||
// 1 = On.
|
||||
// 0 = Off.
|
||||
//
|
||||
#define FXAA_EARLY_EXIT 1
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_DISCARD
|
||||
//
|
||||
// Only valid for PC OpenGL currently.
|
||||
// Probably will not work when FXAA_GREEN_AS_LUMA = 1.
|
||||
//
|
||||
// 1 = Use discard on pixels which don't need AA.
|
||||
// For APIs which enable concurrent TEX+ROP from same surface.
|
||||
// 0 = Return unchanged color on pixels which don't need AA.
|
||||
//
|
||||
#define FXAA_DISCARD 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_FAST_PIXEL_OFFSET
|
||||
//
|
||||
// Used for GLSL 120 only.
|
||||
//
|
||||
// 1 = GL API supports fast pixel offsets
|
||||
// 0 = do not use fast pixel offsets
|
||||
//
|
||||
#ifdef GL_EXT_gpu_shader4
|
||||
#define FXAA_FAST_PIXEL_OFFSET 1
|
||||
#endif
|
||||
#ifdef GL_NV_gpu_shader5
|
||||
#define FXAA_FAST_PIXEL_OFFSET 1
|
||||
#endif
|
||||
#ifdef GL_ARB_gpu_shader5
|
||||
#define FXAA_FAST_PIXEL_OFFSET 1
|
||||
#endif
|
||||
#ifndef FXAA_FAST_PIXEL_OFFSET
|
||||
#define FXAA_FAST_PIXEL_OFFSET 0
|
||||
#endif
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_GATHER4_ALPHA
|
||||
//
|
||||
// 1 = API supports gather4 on alpha channel.
|
||||
// 0 = API does not support gather4 on alpha channel.
|
||||
//
|
||||
#if (FXAA_HLSL_5 == 1)
|
||||
#define FXAA_GATHER4_ALPHA 1
|
||||
#endif
|
||||
#ifdef GL_ARB_gpu_shader5
|
||||
#define FXAA_GATHER4_ALPHA 1
|
||||
#endif
|
||||
#ifdef GL_NV_gpu_shader5
|
||||
#define FXAA_GATHER4_ALPHA 1
|
||||
#endif
|
||||
#ifndef FXAA_GATHER4_ALPHA
|
||||
#define FXAA_GATHER4_ALPHA 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*============================================================================
|
||||
FXAA CONSOLE PS3 - TUNING KNOBS
|
||||
============================================================================*/
|
||||
#ifndef FXAA_CONSOLE__PS3_EDGE_SHARPNESS
|
||||
//
|
||||
// Consoles the sharpness of edges on PS3 only.
|
||||
// Non-PS3 tuning is done with shader input.
|
||||
//
|
||||
// Due to the PS3 being ALU bound,
|
||||
// there are only two safe values here: 4 and 8.
|
||||
// These options use the shaders ability to a free *|/ by 2|4|8.
|
||||
//
|
||||
// 8.0 is sharper
|
||||
// 4.0 is softer
|
||||
// 2.0 is really soft (good for vector graphics inputs)
|
||||
//
|
||||
#if 1
|
||||
#define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 8.0
|
||||
#endif
|
||||
#if 0
|
||||
#define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 4.0
|
||||
#endif
|
||||
#if 0
|
||||
#define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 2.0
|
||||
#endif
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_CONSOLE__PS3_EDGE_THRESHOLD
|
||||
//
|
||||
// Only effects PS3.
|
||||
// Non-PS3 tuning is done with shader input.
|
||||
//
|
||||
// The minimum amount of local contrast required to apply algorithm.
|
||||
// The console setting has a different mapping than the quality setting.
|
||||
//
|
||||
// This only applies when FXAA_EARLY_EXIT is 1.
|
||||
//
|
||||
// Due to the PS3 being ALU bound,
|
||||
// there are only two safe values here: 0.25 and 0.125.
|
||||
// These options use the shaders ability to a free *|/ by 2|4|8.
|
||||
//
|
||||
// 0.125 leaves less aliasing, but is softer
|
||||
// 0.25 leaves more aliasing, and is sharper
|
||||
//
|
||||
#if 1
|
||||
#define FXAA_CONSOLE__PS3_EDGE_THRESHOLD 0.125
|
||||
#else
|
||||
#define FXAA_CONSOLE__PS3_EDGE_THRESHOLD 0.25
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*============================================================================
|
||||
FXAA QUALITY - TUNING KNOBS
|
||||
------------------------------------------------------------------------------
|
||||
NOTE the other tuning knobs are now in the shader function inputs!
|
||||
============================================================================*/
|
||||
#ifndef FXAA_QUALITY__PRESET
|
||||
//
|
||||
// Choose the quality preset.
|
||||
// This needs to be compiled into the shader as it effects code.
|
||||
// Best option to include multiple presets is to
|
||||
// in each shader define the preset, then include this file.
|
||||
//
|
||||
// OPTIONS
|
||||
// -----------------------------------------------------------------------
|
||||
// 10 to 15 - default medium dither (10=fastest, 15=highest quality)
|
||||
// 20 to 29 - less dither, more expensive (20=fastest, 29=highest quality)
|
||||
// 39 - no dither, very expensive
|
||||
//
|
||||
// NOTES
|
||||
// -----------------------------------------------------------------------
|
||||
// 12 = slightly faster then FXAA 3.9 and higher edge quality (default)
|
||||
// 13 = about same speed as FXAA 3.9 and better than 12
|
||||
// 23 = closest to FXAA 3.9 visually and performance wise
|
||||
// _ = the lowest digit is directly related to performance
|
||||
// _ = the highest digit is directly related to style
|
||||
//
|
||||
#define FXAA_QUALITY__PRESET 12
|
||||
#endif
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
FXAA QUALITY - PRESETS
|
||||
|
||||
============================================================================*/
|
||||
|
||||
/*============================================================================
|
||||
FXAA QUALITY - MEDIUM DITHER PRESETS
|
||||
============================================================================*/
|
||||
#if (FXAA_QUALITY__PRESET == 10)
|
||||
#define FXAA_QUALITY__PS 3
|
||||
#define FXAA_QUALITY__P0 1.5
|
||||
#define FXAA_QUALITY__P1 3.0
|
||||
#define FXAA_QUALITY__P2 12.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 11)
|
||||
#define FXAA_QUALITY__PS 4
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 3.0
|
||||
#define FXAA_QUALITY__P3 12.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 12)
|
||||
#define FXAA_QUALITY__PS 5
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 4.0
|
||||
#define FXAA_QUALITY__P4 12.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 13)
|
||||
#define FXAA_QUALITY__PS 6
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 4.0
|
||||
#define FXAA_QUALITY__P5 12.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 14)
|
||||
#define FXAA_QUALITY__PS 7
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 4.0
|
||||
#define FXAA_QUALITY__P6 12.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 15)
|
||||
#define FXAA_QUALITY__PS 8
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 2.0
|
||||
#define FXAA_QUALITY__P6 4.0
|
||||
#define FXAA_QUALITY__P7 12.0
|
||||
#endif
|
||||
|
||||
/*============================================================================
|
||||
FXAA QUALITY - LOW DITHER PRESETS
|
||||
============================================================================*/
|
||||
#if (FXAA_QUALITY__PRESET == 20)
|
||||
#define FXAA_QUALITY__PS 3
|
||||
#define FXAA_QUALITY__P0 1.5
|
||||
#define FXAA_QUALITY__P1 2.0
|
||||
#define FXAA_QUALITY__P2 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 21)
|
||||
#define FXAA_QUALITY__PS 4
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 22)
|
||||
#define FXAA_QUALITY__PS 5
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 23)
|
||||
#define FXAA_QUALITY__PS 6
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 24)
|
||||
#define FXAA_QUALITY__PS 7
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 3.0
|
||||
#define FXAA_QUALITY__P6 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 25)
|
||||
#define FXAA_QUALITY__PS 8
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 2.0
|
||||
#define FXAA_QUALITY__P6 4.0
|
||||
#define FXAA_QUALITY__P7 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 26)
|
||||
#define FXAA_QUALITY__PS 9
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 2.0
|
||||
#define FXAA_QUALITY__P6 2.0
|
||||
#define FXAA_QUALITY__P7 4.0
|
||||
#define FXAA_QUALITY__P8 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 27)
|
||||
#define FXAA_QUALITY__PS 10
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 2.0
|
||||
#define FXAA_QUALITY__P6 2.0
|
||||
#define FXAA_QUALITY__P7 2.0
|
||||
#define FXAA_QUALITY__P8 4.0
|
||||
#define FXAA_QUALITY__P9 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 28)
|
||||
#define FXAA_QUALITY__PS 11
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 2.0
|
||||
#define FXAA_QUALITY__P6 2.0
|
||||
#define FXAA_QUALITY__P7 2.0
|
||||
#define FXAA_QUALITY__P8 2.0
|
||||
#define FXAA_QUALITY__P9 4.0
|
||||
#define FXAA_QUALITY__P10 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 29)
|
||||
#define FXAA_QUALITY__PS 12
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 2.0
|
||||
#define FXAA_QUALITY__P6 2.0
|
||||
#define FXAA_QUALITY__P7 2.0
|
||||
#define FXAA_QUALITY__P8 2.0
|
||||
#define FXAA_QUALITY__P9 2.0
|
||||
#define FXAA_QUALITY__P10 4.0
|
||||
#define FXAA_QUALITY__P11 8.0
|
||||
#endif
|
||||
|
||||
/*============================================================================
|
||||
FXAA QUALITY - EXTREME QUALITY
|
||||
============================================================================*/
|
||||
#if (FXAA_QUALITY__PRESET == 39)
|
||||
#define FXAA_QUALITY__PS 12
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.0
|
||||
#define FXAA_QUALITY__P2 1.0
|
||||
#define FXAA_QUALITY__P3 1.0
|
||||
#define FXAA_QUALITY__P4 1.0
|
||||
#define FXAA_QUALITY__P5 1.5
|
||||
#define FXAA_QUALITY__P6 2.0
|
||||
#define FXAA_QUALITY__P7 2.0
|
||||
#define FXAA_QUALITY__P8 2.0
|
||||
#define FXAA_QUALITY__P9 2.0
|
||||
#define FXAA_QUALITY__P10 4.0
|
||||
#define FXAA_QUALITY__P11 8.0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
API PORTING
|
||||
|
||||
============================================================================*/
|
||||
#if (FXAA_GLSL_120 == 1) || (FXAA_GLSL_130 == 1)
|
||||
#define FxaaBool bool
|
||||
#define FxaaDiscard discard
|
||||
#define FxaaFloat float
|
||||
#define FxaaFloat2 vec2
|
||||
#define FxaaFloat3 vec3
|
||||
#define FxaaFloat4 vec4
|
||||
#define FxaaHalf float
|
||||
#define FxaaHalf2 vec2
|
||||
#define FxaaHalf3 vec3
|
||||
#define FxaaHalf4 vec4
|
||||
#define FxaaInt2 ivec2
|
||||
#define FxaaSat(x) clamp(x, 0.0, 1.0)
|
||||
#define FxaaTex sampler2D
|
||||
#else
|
||||
#define FxaaBool bool
|
||||
#define FxaaDiscard clip(-1)
|
||||
#define FxaaFloat float
|
||||
#define FxaaFloat2 float2
|
||||
#define FxaaFloat3 float3
|
||||
#define FxaaFloat4 float4
|
||||
#define FxaaHalf half
|
||||
#define FxaaHalf2 half2
|
||||
#define FxaaHalf3 half3
|
||||
#define FxaaHalf4 half4
|
||||
#define FxaaSat(x) saturate(x)
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_GLSL_120 == 1)
|
||||
// Requires,
|
||||
// #version 120
|
||||
// And at least,
|
||||
// #extension GL_EXT_gpu_shader4 : enable
|
||||
// (or set FXAA_FAST_PIXEL_OFFSET 1 to work like DX9)
|
||||
#define FxaaTexTop(t, p) texture2DLod(t, p, 0.0)
|
||||
#if (FXAA_FAST_PIXEL_OFFSET == 1)
|
||||
#define FxaaTexOff(t, p, o, r) texture2DLodOffset(t, p, 0.0, o)
|
||||
#else
|
||||
#define FxaaTexOff(t, p, o, r) texture2DLod(t, p + (o * r), 0.0)
|
||||
#endif
|
||||
#if (FXAA_GATHER4_ALPHA == 1)
|
||||
// use #extension GL_ARB_gpu_shader5 : enable
|
||||
#define FxaaTexAlpha4(t, p) textureGather(t, p, 3)
|
||||
#define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)
|
||||
#define FxaaTexGreen4(t, p) textureGather(t, p, 1)
|
||||
#define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1)
|
||||
#endif
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_GLSL_130 == 1)
|
||||
// Requires "#version 130" or better
|
||||
#define FxaaTexTop(t, p) textureLod(t, p, 0.0)
|
||||
#define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o)
|
||||
#if (FXAA_GATHER4_ALPHA == 1)
|
||||
// use #extension GL_ARB_gpu_shader5 : enable
|
||||
#define FxaaTexAlpha4(t, p) textureGather(t, p, 3)
|
||||
#define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)
|
||||
#define FxaaTexGreen4(t, p) textureGather(t, p, 1)
|
||||
#define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1)
|
||||
#endif
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_HLSL_3 == 1) || (FXAA_360 == 1) || (FXAA_PS3 == 1)
|
||||
#define FxaaInt2 float2
|
||||
#define FxaaTex sampler2D
|
||||
#define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0))
|
||||
#define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0))
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_HLSL_4 == 1)
|
||||
#define FxaaInt2 int2
|
||||
struct FxaaTex { SamplerState smpl; Texture2D tex; };
|
||||
#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)
|
||||
#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_HLSL_5 == 1)
|
||||
#define FxaaInt2 int2
|
||||
struct FxaaTex { SamplerState smpl; Texture2D tex; };
|
||||
#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)
|
||||
#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)
|
||||
#define FxaaTexAlpha4(t, p) t.tex.GatherAlpha(t.smpl, p)
|
||||
#define FxaaTexOffAlpha4(t, p, o) t.tex.GatherAlpha(t.smpl, p, o)
|
||||
#define FxaaTexGreen4(t, p) t.tex.GatherGreen(t.smpl, p)
|
||||
#define FxaaTexOffGreen4(t, p, o) t.tex.GatherGreen(t.smpl, p, o)
|
||||
#endif
|
||||
|
||||
|
||||
/*============================================================================
|
||||
GREEN AS LUMA OPTION SUPPORT FUNCTION
|
||||
============================================================================*/
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
//FxaaFloat FxaaLuma(FxaaFloat4 rgba) { return rgba.w; }
|
||||
FxaaFloat FxaaLuma(FxaaFloat4 rgba) { return dot(rgba.rgb, FxaaFloat3(0.299, 0.587, 0.114)); }
|
||||
#else
|
||||
FxaaFloat FxaaLuma(FxaaFloat4 rgba) { return rgba.y; }
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
FXAA3 QUALITY - PC
|
||||
|
||||
============================================================================*/
|
||||
#if (FXAA_PC == 1)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat4 FxaaPixelShader(
|
||||
//
|
||||
// Use noperspective interpolation here (turn off perspective interpolation).
|
||||
// {xy} = center of pixel
|
||||
FxaaFloat2 pos,
|
||||
//
|
||||
// Used only for FXAA Console, and not used on the 360 version.
|
||||
// Use noperspective interpolation here (turn off perspective interpolation).
|
||||
// {xy__} = upper left of pixel
|
||||
// {__zw} = lower right of pixel
|
||||
FxaaFloat4 fxaaConsolePosPos,
|
||||
//
|
||||
// Input color texture.
|
||||
// {rgb_} = color in linear or perceptual color space
|
||||
// if (FXAA_GREEN_AS_LUMA == 0)
|
||||
// {___a} = luma in perceptual color space (not linear)
|
||||
FxaaTex tex,
|
||||
//
|
||||
// Only used on the optimized 360 version of FXAA Console.
|
||||
// For everything but 360, just use the same input here as for "tex".
|
||||
// For 360, same texture, just alias with a 2nd sampler.
|
||||
// This sampler needs to have an exponent bias of -1.
|
||||
FxaaTex fxaaConsole360TexExpBiasNegOne,
|
||||
//
|
||||
// Only used on the optimized 360 version of FXAA Console.
|
||||
// For everything but 360, just use the same input here as for "tex".
|
||||
// For 360, same texture, just alias with a 3nd sampler.
|
||||
// This sampler needs to have an exponent bias of -2.
|
||||
FxaaTex fxaaConsole360TexExpBiasNegTwo,
|
||||
//
|
||||
// Only used on FXAA Quality.
|
||||
// This must be from a constant/uniform.
|
||||
// {x_} = 1.0/screenWidthInPixels
|
||||
// {_y} = 1.0/screenHeightInPixels
|
||||
FxaaFloat2 fxaaQualityRcpFrame,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// This must be from a constant/uniform.
|
||||
// This effects sub-pixel AA quality and inversely sharpness.
|
||||
// Where N ranges between,
|
||||
// N = 0.50 (default)
|
||||
// N = 0.33 (sharper)
|
||||
// {x___} = -N/screenWidthInPixels
|
||||
// {_y__} = -N/screenHeightInPixels
|
||||
// {__z_} = N/screenWidthInPixels
|
||||
// {___w} = N/screenHeightInPixels
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// Not used on 360, but used on PS3 and PC.
|
||||
// This must be from a constant/uniform.
|
||||
// {x___} = -2.0/screenWidthInPixels
|
||||
// {_y__} = -2.0/screenHeightInPixels
|
||||
// {__z_} = 2.0/screenWidthInPixels
|
||||
// {___w} = 2.0/screenHeightInPixels
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt2,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// Only used on 360 in place of fxaaConsoleRcpFrameOpt2.
|
||||
// This must be from a constant/uniform.
|
||||
// {x___} = 8.0/screenWidthInPixels
|
||||
// {_y__} = 8.0/screenHeightInPixels
|
||||
// {__z_} = -4.0/screenWidthInPixels
|
||||
// {___w} = -4.0/screenHeightInPixels
|
||||
FxaaFloat4 fxaaConsole360RcpFrameOpt2,
|
||||
//
|
||||
// Only used on FXAA Quality.
|
||||
// This used to be the FXAA_QUALITY__SUBPIX define.
|
||||
// It is here now to allow easier tuning.
|
||||
// Choose the amount of sub-pixel aliasing removal.
|
||||
// This can effect sharpness.
|
||||
// 1.00 - upper limit (softer)
|
||||
// 0.75 - default amount of filtering
|
||||
// 0.50 - lower limit (sharper, less sub-pixel aliasing removal)
|
||||
// 0.25 - almost off
|
||||
// 0.00 - completely off
|
||||
FxaaFloat fxaaQualitySubpix,
|
||||
//
|
||||
// Only used on FXAA Quality.
|
||||
// This used to be the FXAA_QUALITY__EDGE_THRESHOLD define.
|
||||
// It is here now to allow easier tuning.
|
||||
// The minimum amount of local contrast required to apply algorithm.
|
||||
// 0.333 - too little (faster)
|
||||
// 0.250 - low quality
|
||||
// 0.166 - default
|
||||
// 0.125 - high quality
|
||||
// 0.063 - overkill (slower)
|
||||
FxaaFloat fxaaQualityEdgeThreshold,
|
||||
//
|
||||
// Only used on FXAA Quality.
|
||||
// This used to be the FXAA_QUALITY__EDGE_THRESHOLD_MIN define.
|
||||
// It is here now to allow easier tuning.
|
||||
// Trims the algorithm from processing darks.
|
||||
// 0.0833 - upper limit (default, the start of visible unfiltered edges)
|
||||
// 0.0625 - high quality (faster)
|
||||
// 0.0312 - visible limit (slower)
|
||||
// Special notes when using FXAA_GREEN_AS_LUMA,
|
||||
// Likely want to set this to zero.
|
||||
// As colors that are mostly not-green
|
||||
// will appear very dark in the green channel!
|
||||
// Tune by looking at mostly non-green content,
|
||||
// then start at zero and increase until aliasing is a problem.
|
||||
FxaaFloat fxaaQualityEdgeThresholdMin,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// This used to be the FXAA_CONSOLE__EDGE_SHARPNESS define.
|
||||
// It is here now to allow easier tuning.
|
||||
// This does not effect PS3, as this needs to be compiled in.
|
||||
// Use FXAA_CONSOLE__PS3_EDGE_SHARPNESS for PS3.
|
||||
// Due to the PS3 being ALU bound,
|
||||
// there are only three safe values here: 2 and 4 and 8.
|
||||
// These options use the shaders ability to a free *|/ by 2|4|8.
|
||||
// For all other platforms can be a non-power of two.
|
||||
// 8.0 is sharper (default!!!)
|
||||
// 4.0 is softer
|
||||
// 2.0 is really soft (good only for vector graphics inputs)
|
||||
FxaaFloat fxaaConsoleEdgeSharpness,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// This used to be the FXAA_CONSOLE__EDGE_THRESHOLD define.
|
||||
// It is here now to allow easier tuning.
|
||||
// This does not effect PS3, as this needs to be compiled in.
|
||||
// Use FXAA_CONSOLE__PS3_EDGE_THRESHOLD for PS3.
|
||||
// Due to the PS3 being ALU bound,
|
||||
// there are only two safe values here: 1/4 and 1/8.
|
||||
// These options use the shaders ability to a free *|/ by 2|4|8.
|
||||
// The console setting has a different mapping than the quality setting.
|
||||
// Other platforms can use other values.
|
||||
// 0.125 leaves less aliasing, but is softer (default!!!)
|
||||
// 0.25 leaves more aliasing, and is sharper
|
||||
FxaaFloat fxaaConsoleEdgeThreshold,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// This used to be the FXAA_CONSOLE__EDGE_THRESHOLD_MIN define.
|
||||
// It is here now to allow easier tuning.
|
||||
// Trims the algorithm from processing darks.
|
||||
// The console setting has a different mapping than the quality setting.
|
||||
// This only applies when FXAA_EARLY_EXIT is 1.
|
||||
// This does not apply to PS3,
|
||||
// PS3 was simplified to avoid more shader instructions.
|
||||
// 0.06 - faster but more aliasing in darks
|
||||
// 0.05 - default
|
||||
// 0.04 - slower and less aliasing in darks
|
||||
// Special notes when using FXAA_GREEN_AS_LUMA,
|
||||
// Likely want to set this to zero.
|
||||
// As colors that are mostly not-green
|
||||
// will appear very dark in the green channel!
|
||||
// Tune by looking at mostly non-green content,
|
||||
// then start at zero and increase until aliasing is a problem.
|
||||
FxaaFloat fxaaConsoleEdgeThresholdMin,
|
||||
//
|
||||
// Extra constants for 360 FXAA Console only.
|
||||
// Use zeros or anything else for other platforms.
|
||||
// These must be in physical constant registers and NOT immedates.
|
||||
// Immedates will result in compiler un-optimizing.
|
||||
// {xyzw} = float4(1.0, -1.0, 0.25, -0.25)
|
||||
FxaaFloat4 fxaaConsole360ConstDir
|
||||
) {
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat2 posM;
|
||||
posM.x = pos.x;
|
||||
posM.y = pos.y;
|
||||
#if (FXAA_GATHER4_ALPHA == 1)
|
||||
#if (FXAA_DISCARD == 0)
|
||||
FxaaFloat4 rgbyM = FxaaTexTop(tex, posM);
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
#define lumaM dot(rgbyM.rgb, FxaaFloat3(0.299, 0.587, 0.114))
|
||||
#else
|
||||
#define lumaM rgbyM.y
|
||||
#endif
|
||||
#endif
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
FxaaFloat4 luma4A = FxaaTexAlpha4(tex, posM);
|
||||
FxaaFloat4 luma4B = FxaaTexOffAlpha4(tex, posM, FxaaInt2(-1, -1));
|
||||
#else
|
||||
FxaaFloat4 luma4A = FxaaTexGreen4(tex, posM);
|
||||
FxaaFloat4 luma4B = FxaaTexOffGreen4(tex, posM, FxaaInt2(-1, -1));
|
||||
#endif
|
||||
#if (FXAA_DISCARD == 1)
|
||||
#define lumaM luma4A.w
|
||||
#endif
|
||||
#define lumaE luma4A.z
|
||||
#define lumaS luma4A.x
|
||||
#define lumaSE luma4A.y
|
||||
#define lumaNW luma4B.w
|
||||
#define lumaN luma4B.z
|
||||
#define lumaW luma4B.x
|
||||
#else
|
||||
FxaaFloat4 rgbyM = FxaaTexTop(tex, posM);
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
#define lumaM dot(rgbyM.rgb, FxaaFloat3(0.299, 0.587, 0.114))
|
||||
#else
|
||||
#define lumaM rgbyM.y
|
||||
#endif
|
||||
FxaaFloat lumaS = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 0, 1), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1, 0), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaN = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 0,-1), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 0), fxaaQualityRcpFrame.xy));
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat maxSM = max(lumaS, lumaM);
|
||||
FxaaFloat minSM = min(lumaS, lumaM);
|
||||
FxaaFloat maxESM = max(lumaE, maxSM);
|
||||
FxaaFloat minESM = min(lumaE, minSM);
|
||||
FxaaFloat maxWN = max(lumaN, lumaW);
|
||||
FxaaFloat minWN = min(lumaN, lumaW);
|
||||
FxaaFloat rangeMax = max(maxWN, maxESM);
|
||||
FxaaFloat rangeMin = min(minWN, minESM);
|
||||
FxaaFloat rangeMaxScaled = rangeMax * fxaaQualityEdgeThreshold;
|
||||
FxaaFloat range = rangeMax - rangeMin;
|
||||
FxaaFloat rangeMaxClamped = max(fxaaQualityEdgeThresholdMin, rangeMaxScaled);
|
||||
FxaaBool earlyExit = range < rangeMaxClamped;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
if(earlyExit)
|
||||
#if (FXAA_DISCARD == 1)
|
||||
FxaaDiscard;
|
||||
#else
|
||||
return rgbyM;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_GATHER4_ALPHA == 0)
|
||||
FxaaFloat lumaNW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1,-1), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaSE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1, 1), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaNE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1,-1), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaSW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 1), fxaaQualityRcpFrame.xy));
|
||||
#else
|
||||
FxaaFloat lumaNE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(1, -1), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaSW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 1), fxaaQualityRcpFrame.xy));
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaNS = lumaN + lumaS;
|
||||
FxaaFloat lumaWE = lumaW + lumaE;
|
||||
FxaaFloat subpixRcpRange = 1.0/range;
|
||||
FxaaFloat subpixNSWE = lumaNS + lumaWE;
|
||||
FxaaFloat edgeHorz1 = (-2.0 * lumaM) + lumaNS;
|
||||
FxaaFloat edgeVert1 = (-2.0 * lumaM) + lumaWE;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaNESE = lumaNE + lumaSE;
|
||||
FxaaFloat lumaNWNE = lumaNW + lumaNE;
|
||||
FxaaFloat edgeHorz2 = (-2.0 * lumaE) + lumaNESE;
|
||||
FxaaFloat edgeVert2 = (-2.0 * lumaN) + lumaNWNE;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaNWSW = lumaNW + lumaSW;
|
||||
FxaaFloat lumaSWSE = lumaSW + lumaSE;
|
||||
FxaaFloat edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2);
|
||||
FxaaFloat edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2);
|
||||
FxaaFloat edgeHorz3 = (-2.0 * lumaW) + lumaNWSW;
|
||||
FxaaFloat edgeVert3 = (-2.0 * lumaS) + lumaSWSE;
|
||||
FxaaFloat edgeHorz = abs(edgeHorz3) + edgeHorz4;
|
||||
FxaaFloat edgeVert = abs(edgeVert3) + edgeVert4;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat subpixNWSWNESE = lumaNWSW + lumaNESE;
|
||||
FxaaFloat lengthSign = fxaaQualityRcpFrame.x;
|
||||
FxaaBool horzSpan = edgeHorz >= edgeVert;
|
||||
FxaaFloat subpixA = subpixNSWE * 2.0 + subpixNWSWNESE;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
if(!horzSpan) lumaN = lumaW;
|
||||
if(!horzSpan) lumaS = lumaE;
|
||||
if(horzSpan) lengthSign = fxaaQualityRcpFrame.y;
|
||||
FxaaFloat subpixB = (subpixA * (1.0/12.0)) - lumaM;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat gradientN = lumaN - lumaM;
|
||||
FxaaFloat gradientS = lumaS - lumaM;
|
||||
FxaaFloat lumaNN = lumaN + lumaM;
|
||||
FxaaFloat lumaSS = lumaS + lumaM;
|
||||
FxaaBool pairN = abs(gradientN) >= abs(gradientS);
|
||||
FxaaFloat gradient = max(abs(gradientN), abs(gradientS));
|
||||
if(pairN) lengthSign = -lengthSign;
|
||||
FxaaFloat subpixC = FxaaSat(abs(subpixB) * subpixRcpRange);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat2 posB;
|
||||
posB.x = posM.x;
|
||||
posB.y = posM.y;
|
||||
FxaaFloat2 offNP;
|
||||
offNP.x = (!horzSpan) ? 0.0 : fxaaQualityRcpFrame.x;
|
||||
offNP.y = ( horzSpan) ? 0.0 : fxaaQualityRcpFrame.y;
|
||||
if(!horzSpan) posB.x += lengthSign * 0.5;
|
||||
if( horzSpan) posB.y += lengthSign * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat2 posN;
|
||||
posN.x = posB.x - offNP.x * FXAA_QUALITY__P0;
|
||||
posN.y = posB.y - offNP.y * FXAA_QUALITY__P0;
|
||||
FxaaFloat2 posP;
|
||||
posP.x = posB.x + offNP.x * FXAA_QUALITY__P0;
|
||||
posP.y = posB.y + offNP.y * FXAA_QUALITY__P0;
|
||||
FxaaFloat subpixD = ((-2.0)*subpixC) + 3.0;
|
||||
FxaaFloat lumaEndN = FxaaLuma(FxaaTexTop(tex, posN));
|
||||
FxaaFloat subpixE = subpixC * subpixC;
|
||||
FxaaFloat lumaEndP = FxaaLuma(FxaaTexTop(tex, posP));
|
||||
/*--------------------------------------------------------------------------*/
|
||||
if(!pairN) lumaNN = lumaSS;
|
||||
FxaaFloat gradientScaled = gradient * 1.0/4.0;
|
||||
FxaaFloat lumaMM = lumaM - lumaNN * 0.5;
|
||||
FxaaFloat subpixF = subpixD * subpixE;
|
||||
FxaaBool lumaMLTZero = lumaMM < 0.0;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
lumaEndN -= lumaNN * 0.5;
|
||||
lumaEndP -= lumaNN * 0.5;
|
||||
FxaaBool doneN = abs(lumaEndN) >= gradientScaled;
|
||||
FxaaBool doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P1;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P1;
|
||||
FxaaBool doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P1;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P1;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P2;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P2;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P2;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P2;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 3)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P3;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P3;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P3;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P3;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 4)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P4;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P4;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P4;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P4;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 5)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P5;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P5;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P5;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 6)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P6;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P6;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P6;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P6;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 7)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P7;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P7;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P7;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P7;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 8)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P8;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P8;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P8;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P8;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 9)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P9;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P9;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P9;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P9;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 10)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P10;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P10;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P10;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P10;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 11)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P11;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P11;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P11;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P11;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 12)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P12;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P12;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P12;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P12;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat dstN = posM.x - posN.x;
|
||||
FxaaFloat dstP = posP.x - posM.x;
|
||||
if(!horzSpan) dstN = posM.y - posN.y;
|
||||
if(!horzSpan) dstP = posP.y - posM.y;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaBool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero;
|
||||
FxaaFloat spanLength = (dstP + dstN);
|
||||
FxaaBool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero;
|
||||
FxaaFloat spanLengthRcp = 1.0/spanLength;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaBool directionN = dstN < dstP;
|
||||
FxaaFloat dst = min(dstN, dstP);
|
||||
FxaaBool goodSpan = directionN ? goodSpanN : goodSpanP;
|
||||
FxaaFloat subpixG = subpixF * subpixF;
|
||||
FxaaFloat pixelOffset = (dst * (-spanLengthRcp)) + 0.5;
|
||||
FxaaFloat subpixH = subpixG * fxaaQualitySubpix;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat pixelOffsetGood = goodSpan ? pixelOffset : 0.0;
|
||||
FxaaFloat pixelOffsetSubpix = max(pixelOffsetGood, subpixH);
|
||||
if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign;
|
||||
if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign;
|
||||
#if (FXAA_DISCARD == 1)
|
||||
return FxaaTexTop(tex, posM);
|
||||
#else
|
||||
//return FxaaFloat4(FxaaTexTop(tex, posM).xyz, lumaM);
|
||||
return FxaaFloat4(FxaaTexTop(tex, posM));
|
||||
#endif
|
||||
}
|
||||
/*==========================================================================*/
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
FXAA3 CONSOLE - PC VERSION
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
Instead of using this on PC, I'd suggest just using FXAA Quality with
|
||||
#define FXAA_QUALITY__PRESET 10
|
||||
Or
|
||||
#define FXAA_QUALITY__PRESET 20
|
||||
Either are higher qualilty and almost as fast as this on modern PC GPUs.
|
||||
============================================================================*/
|
||||
#if (FXAA_PC_CONSOLE == 1)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat4 FxaaPixelShader(
|
||||
// See FXAA Quality FxaaPixelShader() source for docs on Inputs!
|
||||
FxaaFloat2 pos,
|
||||
FxaaFloat4 fxaaConsolePosPos,
|
||||
FxaaTex tex,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegOne,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegTwo,
|
||||
FxaaFloat2 fxaaQualityRcpFrame,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt2,
|
||||
FxaaFloat4 fxaaConsole360RcpFrameOpt2,
|
||||
FxaaFloat fxaaQualitySubpix,
|
||||
FxaaFloat fxaaQualityEdgeThreshold,
|
||||
FxaaFloat fxaaQualityEdgeThresholdMin,
|
||||
FxaaFloat fxaaConsoleEdgeSharpness,
|
||||
FxaaFloat fxaaConsoleEdgeThreshold,
|
||||
FxaaFloat fxaaConsoleEdgeThresholdMin,
|
||||
FxaaFloat4 fxaaConsole360ConstDir
|
||||
) {
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaNw = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.xy));
|
||||
FxaaFloat lumaSw = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.xw));
|
||||
FxaaFloat lumaNe = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.zy));
|
||||
FxaaFloat lumaSe = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.zw));
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat4 rgbyM = FxaaTexTop(tex, pos.xy);
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
FxaaFloat lumaM = rgbyM.w;
|
||||
#else
|
||||
FxaaFloat lumaM = rgbyM.y;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaMaxNwSw = max(lumaNw, lumaSw);
|
||||
lumaNe += 1.0/384.0;
|
||||
FxaaFloat lumaMinNwSw = min(lumaNw, lumaSw);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaMaxNeSe = max(lumaNe, lumaSe);
|
||||
FxaaFloat lumaMinNeSe = min(lumaNe, lumaSe);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaMax = max(lumaMaxNeSe, lumaMaxNwSw);
|
||||
FxaaFloat lumaMin = min(lumaMinNeSe, lumaMinNwSw);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaMaxScaled = lumaMax * fxaaConsoleEdgeThreshold;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaMinM = min(lumaMin, lumaM);
|
||||
FxaaFloat lumaMaxScaledClamped = max(fxaaConsoleEdgeThresholdMin, lumaMaxScaled);
|
||||
FxaaFloat lumaMaxM = max(lumaMax, lumaM);
|
||||
FxaaFloat dirSwMinusNe = lumaSw - lumaNe;
|
||||
FxaaFloat lumaMaxSubMinM = lumaMaxM - lumaMinM;
|
||||
FxaaFloat dirSeMinusNw = lumaSe - lumaNw;
|
||||
if(lumaMaxSubMinM < lumaMaxScaledClamped) return rgbyM;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat2 dir;
|
||||
dir.x = dirSwMinusNe + dirSeMinusNw;
|
||||
dir.y = dirSwMinusNe - dirSeMinusNw;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat2 dir1 = normalize(dir.xy);
|
||||
FxaaFloat4 rgbyN1 = FxaaTexTop(tex, pos.xy - dir1 * fxaaConsoleRcpFrameOpt.zw);
|
||||
FxaaFloat4 rgbyP1 = FxaaTexTop(tex, pos.xy + dir1 * fxaaConsoleRcpFrameOpt.zw);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat dirAbsMinTimesC = min(abs(dir1.x), abs(dir1.y)) * fxaaConsoleEdgeSharpness;
|
||||
FxaaFloat2 dir2 = clamp(dir1.xy / dirAbsMinTimesC, -2.0, 2.0);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat4 rgbyN2 = FxaaTexTop(tex, pos.xy - dir2 * fxaaConsoleRcpFrameOpt2.zw);
|
||||
FxaaFloat4 rgbyP2 = FxaaTexTop(tex, pos.xy + dir2 * fxaaConsoleRcpFrameOpt2.zw);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat4 rgbyA = rgbyN1 + rgbyP1;
|
||||
FxaaFloat4 rgbyB = ((rgbyN2 + rgbyP2) * 0.25) + (rgbyA * 0.25);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
FxaaBool twoTap = (rgbyB.w < lumaMin) || (rgbyB.w > lumaMax);
|
||||
#else
|
||||
FxaaBool twoTap = (rgbyB.y < lumaMin) || (rgbyB.y > lumaMax);
|
||||
#endif
|
||||
if(twoTap) rgbyB.xyz = rgbyA.xyz * 0.5;
|
||||
return rgbyB; }
|
||||
/*==========================================================================*/
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
FXAA3 CONSOLE - 360 PIXEL SHADER
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
This optimized version thanks to suggestions from Andy Luedke.
|
||||
Should be fully tex bound in all cases.
|
||||
As of the FXAA 3.11 release, I have still not tested this code,
|
||||
however I fixed a bug which was in both FXAA 3.9 and FXAA 3.10.
|
||||
And note this is replacing the old unoptimized version.
|
||||
If it does not work, please let me know so I can fix it.
|
||||
============================================================================*/
|
||||
#if (FXAA_360 == 1)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
[reduceTempRegUsage(4)]
|
||||
float4 FxaaPixelShader(
|
||||
// See FXAA Quality FxaaPixelShader() source for docs on Inputs!
|
||||
FxaaFloat2 pos,
|
||||
FxaaFloat4 fxaaConsolePosPos,
|
||||
FxaaTex tex,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegOne,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegTwo,
|
||||
FxaaFloat2 fxaaQualityRcpFrame,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt2,
|
||||
FxaaFloat4 fxaaConsole360RcpFrameOpt2,
|
||||
FxaaFloat fxaaQualitySubpix,
|
||||
FxaaFloat fxaaQualityEdgeThreshold,
|
||||
FxaaFloat fxaaQualityEdgeThresholdMin,
|
||||
FxaaFloat fxaaConsoleEdgeSharpness,
|
||||
FxaaFloat fxaaConsoleEdgeThreshold,
|
||||
FxaaFloat fxaaConsoleEdgeThresholdMin,
|
||||
FxaaFloat4 fxaaConsole360ConstDir
|
||||
) {
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 lumaNwNeSwSe;
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
asm {
|
||||
tfetch2D lumaNwNeSwSe.w___, tex, pos.xy, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false
|
||||
tfetch2D lumaNwNeSwSe._w__, tex, pos.xy, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false
|
||||
tfetch2D lumaNwNeSwSe.__w_, tex, pos.xy, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false
|
||||
tfetch2D lumaNwNeSwSe.___w, tex, pos.xy, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false
|
||||
};
|
||||
#else
|
||||
asm {
|
||||
tfetch2D lumaNwNeSwSe.y___, tex, pos.xy, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false
|
||||
tfetch2D lumaNwNeSwSe._y__, tex, pos.xy, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false
|
||||
tfetch2D lumaNwNeSwSe.__y_, tex, pos.xy, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false
|
||||
tfetch2D lumaNwNeSwSe.___y, tex, pos.xy, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false
|
||||
};
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
lumaNwNeSwSe.y += 1.0/384.0;
|
||||
float2 lumaMinTemp = min(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);
|
||||
float2 lumaMaxTemp = max(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);
|
||||
float lumaMin = min(lumaMinTemp.x, lumaMinTemp.y);
|
||||
float lumaMax = max(lumaMaxTemp.x, lumaMaxTemp.y);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 rgbyM = tex2Dlod(tex, float4(pos.xy, 0.0, 0.0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
float lumaMinM = min(lumaMin, rgbyM.w);
|
||||
float lumaMaxM = max(lumaMax, rgbyM.w);
|
||||
#else
|
||||
float lumaMinM = min(lumaMin, rgbyM.y);
|
||||
float lumaMaxM = max(lumaMax, rgbyM.y);
|
||||
#endif
|
||||
if((lumaMaxM - lumaMinM) < max(fxaaConsoleEdgeThresholdMin, lumaMax * fxaaConsoleEdgeThreshold)) return rgbyM;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float2 dir;
|
||||
dir.x = dot(lumaNwNeSwSe, fxaaConsole360ConstDir.yyxx);
|
||||
dir.y = dot(lumaNwNeSwSe, fxaaConsole360ConstDir.xyxy);
|
||||
dir = normalize(dir);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 dir1 = dir.xyxy * fxaaConsoleRcpFrameOpt.xyzw;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 dir2;
|
||||
float dirAbsMinTimesC = min(abs(dir.x), abs(dir.y)) * fxaaConsoleEdgeSharpness;
|
||||
dir2 = saturate(fxaaConsole360ConstDir.zzww * dir.xyxy / dirAbsMinTimesC + 0.5);
|
||||
dir2 = dir2 * fxaaConsole360RcpFrameOpt2.xyxy + fxaaConsole360RcpFrameOpt2.zwzw;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 rgbyN1 = tex2Dlod(fxaaConsole360TexExpBiasNegOne, float4(pos.xy + dir1.xy, 0.0, 0.0));
|
||||
float4 rgbyP1 = tex2Dlod(fxaaConsole360TexExpBiasNegOne, float4(pos.xy + dir1.zw, 0.0, 0.0));
|
||||
float4 rgbyN2 = tex2Dlod(fxaaConsole360TexExpBiasNegTwo, float4(pos.xy + dir2.xy, 0.0, 0.0));
|
||||
float4 rgbyP2 = tex2Dlod(fxaaConsole360TexExpBiasNegTwo, float4(pos.xy + dir2.zw, 0.0, 0.0));
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 rgbyA = rgbyN1 + rgbyP1;
|
||||
float4 rgbyB = rgbyN2 + rgbyP2 + rgbyA * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 rgbyR = ((FxaaLuma(rgbyB) - lumaMax) > 0.0) ? rgbyA : rgbyB;
|
||||
rgbyR = ((FxaaLuma(rgbyB) - lumaMin) > 0.0) ? rgbyR : rgbyA;
|
||||
return rgbyR; }
|
||||
/*==========================================================================*/
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (NO EARLY EXIT)
|
||||
|
||||
==============================================================================
|
||||
The code below does not exactly match the assembly.
|
||||
I have a feeling that 12 cycles is possible, but was not able to get there.
|
||||
Might have to increase register count to get full performance.
|
||||
Note this shader does not use perspective interpolation.
|
||||
|
||||
Use the following cgc options,
|
||||
|
||||
--fenable-bx2 --fastmath --fastprecision --nofloatbindings
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
NVSHADERPERF OUTPUT
|
||||
------------------------------------------------------------------------------
|
||||
For reference and to aid in debug, output of NVShaderPerf should match this,
|
||||
|
||||
Shader to schedule:
|
||||
0: texpkb h0.w(TRUE), v5.zyxx, #0
|
||||
2: addh h2.z(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x
|
||||
4: texpkb h0.w(TRUE), v5.xwxx, #0
|
||||
6: addh h0.z(TRUE), -h2, h0.w
|
||||
7: texpkb h1.w(TRUE), v5, #0
|
||||
9: addh h0.x(TRUE), h0.z, -h1.w
|
||||
10: addh h3.w(TRUE), h0.z, h1
|
||||
11: texpkb h2.w(TRUE), v5.zwzz, #0
|
||||
13: addh h0.z(TRUE), h3.w, -h2.w
|
||||
14: addh h0.x(TRUE), h2.w, h0
|
||||
15: nrmh h1.xz(TRUE), h0_n
|
||||
16: minh_m8 h0.x(TRUE), |h1|, |h1.z|
|
||||
17: maxh h4.w(TRUE), h0, h1
|
||||
18: divx h2.xy(TRUE), h1_n.xzzw, h0_n
|
||||
19: movr r1.zw(TRUE), v4.xxxy
|
||||
20: madr r2.xz(TRUE), -h1, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zzww, r1.zzww
|
||||
22: minh h5.w(TRUE), h0, h1
|
||||
23: texpkb h0(TRUE), r2.xzxx, #0
|
||||
25: madr r0.zw(TRUE), h1.xzxz, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w), r1
|
||||
27: maxh h4.x(TRUE), h2.z, h2.w
|
||||
28: texpkb h1(TRUE), r0.zwzz, #0
|
||||
30: addh_d2 h1(TRUE), h0, h1
|
||||
31: madr r0.xy(TRUE), -h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
|
||||
33: texpkb h0(TRUE), r0, #0
|
||||
35: minh h4.z(TRUE), h2, h2.w
|
||||
36: fenct TRUE
|
||||
37: madr r1.xy(TRUE), h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
|
||||
39: texpkb h2(TRUE), r1, #0
|
||||
41: addh_d2 h0(TRUE), h0, h2
|
||||
42: maxh h2.w(TRUE), h4, h4.x
|
||||
43: minh h2.x(TRUE), h5.w, h4.z
|
||||
44: addh_d2 h0(TRUE), h0, h1
|
||||
45: slth h2.x(TRUE), h0.w, h2
|
||||
46: sgth h2.w(TRUE), h0, h2
|
||||
47: movh h0(TRUE), h0
|
||||
48: addx.c0 rc(TRUE), h2, h2.w
|
||||
49: movh h0(c0.NE.x), h1
|
||||
|
||||
IPU0 ------ Simplified schedule: --------
|
||||
Pass | Unit | uOp | PC: Op
|
||||
-----+--------+------+-------------------------
|
||||
1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
|
||||
| TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
|
||||
| SCB1 | add | 2: ADDh h2.z, h0.--w-, const.--x-;
|
||||
| | |
|
||||
2 | SCT0/1 | mov | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0;
|
||||
| TEX | txl | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0;
|
||||
| SCB1 | add | 6: ADDh h0.z,-h2, h0.--w-;
|
||||
| | |
|
||||
3 | SCT0/1 | mov | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0;
|
||||
| TEX | txl | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0;
|
||||
| SCB0 | add | 9: ADDh h0.x, h0.z---,-h1.w---;
|
||||
| SCB1 | add | 10: ADDh h3.w, h0.---z, h1;
|
||||
| | |
|
||||
4 | SCT0/1 | mov | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
|
||||
| TEX | txl | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
|
||||
| SCB0 | add | 14: ADDh h0.x, h2.w---, h0;
|
||||
| SCB1 | add | 13: ADDh h0.z, h3.--w-,-h2.--w-;
|
||||
| | |
|
||||
5 | SCT1 | mov | 15: NRMh h1.xz, h0;
|
||||
| SRB | nrm | 15: NRMh h1.xz, h0;
|
||||
| SCB0 | min | 16: MINh*8 h0.x, |h1|, |h1.z---|;
|
||||
| SCB1 | max | 17: MAXh h4.w, h0, h1;
|
||||
| | |
|
||||
6 | SCT0 | div | 18: DIVx h2.xy, h1.xz--, h0;
|
||||
| SCT1 | mov | 19: MOVr r1.zw, g[TEX0].--xy;
|
||||
| SCB0 | mad | 20: MADr r2.xz,-h1, const.z-w-, r1.z-w-;
|
||||
| SCB1 | min | 22: MINh h5.w, h0, h1;
|
||||
| | |
|
||||
7 | SCT0/1 | mov | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0;
|
||||
| TEX | txl | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0;
|
||||
| SCB0 | max | 27: MAXh h4.x, h2.z---, h2.w---;
|
||||
| SCB1 | mad | 25: MADr r0.zw, h1.--xz, const, r1;
|
||||
| | |
|
||||
8 | SCT0/1 | mov | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0;
|
||||
| TEX | txl | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0;
|
||||
| SCB0/1 | add | 30: ADDh/2 h1, h0, h1;
|
||||
| | |
|
||||
9 | SCT0 | mad | 31: MADr r0.xy,-h2, const.xy--, r1.zw--;
|
||||
| SCT1 | mov | 33: TXLr h0, r0, const.zzzz, TEX0;
|
||||
| TEX | txl | 33: TXLr h0, r0, const.zzzz, TEX0;
|
||||
| SCB1 | min | 35: MINh h4.z, h2, h2.--w-;
|
||||
| | |
|
||||
10 | SCT0 | mad | 37: MADr r1.xy, h2, const.xy--, r1.zw--;
|
||||
| SCT1 | mov | 39: TXLr h2, r1, const.zzzz, TEX0;
|
||||
| TEX | txl | 39: TXLr h2, r1, const.zzzz, TEX0;
|
||||
| SCB0/1 | add | 41: ADDh/2 h0, h0, h2;
|
||||
| | |
|
||||
11 | SCT0 | min | 43: MINh h2.x, h5.w---, h4.z---;
|
||||
| SCT1 | max | 42: MAXh h2.w, h4, h4.---x;
|
||||
| SCB0/1 | add | 44: ADDh/2 h0, h0, h1;
|
||||
| | |
|
||||
12 | SCT0 | set | 45: SLTh h2.x, h0.w---, h2;
|
||||
| SCT1 | set | 46: SGTh h2.w, h0, h2;
|
||||
| SCB0/1 | mul | 47: MOVh h0, h0;
|
||||
| | |
|
||||
13 | SCT0 | mad | 48: ADDxc0_s rc, h2, h2.w---;
|
||||
| SCB0/1 | mul | 49: MOVh h0(NE0.xxxx), h1;
|
||||
|
||||
Pass SCT TEX SCB
|
||||
1: 0% 100% 25%
|
||||
2: 0% 100% 25%
|
||||
3: 0% 100% 50%
|
||||
4: 0% 100% 50%
|
||||
5: 0% 0% 50%
|
||||
6: 100% 0% 75%
|
||||
7: 0% 100% 75%
|
||||
8: 0% 100% 100%
|
||||
9: 0% 100% 25%
|
||||
10: 0% 100% 100%
|
||||
11: 50% 0% 100%
|
||||
12: 50% 0% 100%
|
||||
13: 25% 0% 100%
|
||||
|
||||
MEAN: 17% 61% 67%
|
||||
|
||||
Pass SCT0 SCT1 TEX SCB0 SCB1
|
||||
1: 0% 0% 100% 0% 100%
|
||||
2: 0% 0% 100% 0% 100%
|
||||
3: 0% 0% 100% 100% 100%
|
||||
4: 0% 0% 100% 100% 100%
|
||||
5: 0% 0% 0% 100% 100%
|
||||
6: 100% 100% 0% 100% 100%
|
||||
7: 0% 0% 100% 100% 100%
|
||||
8: 0% 0% 100% 100% 100%
|
||||
9: 0% 0% 100% 0% 100%
|
||||
10: 0% 0% 100% 100% 100%
|
||||
11: 100% 100% 0% 100% 100%
|
||||
12: 100% 100% 0% 100% 100%
|
||||
13: 100% 0% 0% 100% 100%
|
||||
|
||||
MEAN: 30% 23% 61% 76% 100%
|
||||
Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5
|
||||
Results 13 cycles, 3 r regs, 923,076,923 pixels/s
|
||||
============================================================================*/
|
||||
#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 0)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#pragma regcount 7
|
||||
#pragma disablepc all
|
||||
#pragma option O3
|
||||
#pragma option OutColorPrec=fp16
|
||||
#pragma texformat default RGBA8
|
||||
/*==========================================================================*/
|
||||
half4 FxaaPixelShader(
|
||||
// See FXAA Quality FxaaPixelShader() source for docs on Inputs!
|
||||
FxaaFloat2 pos,
|
||||
FxaaFloat4 fxaaConsolePosPos,
|
||||
FxaaTex tex,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegOne,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegTwo,
|
||||
FxaaFloat2 fxaaQualityRcpFrame,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt2,
|
||||
FxaaFloat4 fxaaConsole360RcpFrameOpt2,
|
||||
FxaaFloat fxaaQualitySubpix,
|
||||
FxaaFloat fxaaQualityEdgeThreshold,
|
||||
FxaaFloat fxaaQualityEdgeThresholdMin,
|
||||
FxaaFloat fxaaConsoleEdgeSharpness,
|
||||
FxaaFloat fxaaConsoleEdgeThreshold,
|
||||
FxaaFloat fxaaConsoleEdgeThresholdMin,
|
||||
FxaaFloat4 fxaaConsole360ConstDir
|
||||
) {
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (1)
|
||||
half4 dir;
|
||||
half4 lumaNe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zy, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
lumaNe.w += half(1.0/512.0);
|
||||
dir.x = -lumaNe.w;
|
||||
dir.z = -lumaNe.w;
|
||||
#else
|
||||
lumaNe.y += half(1.0/512.0);
|
||||
dir.x = -lumaNe.y;
|
||||
dir.z = -lumaNe.y;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (2)
|
||||
half4 lumaSw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xw, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
dir.x += lumaSw.w;
|
||||
dir.z += lumaSw.w;
|
||||
#else
|
||||
dir.x += lumaSw.y;
|
||||
dir.z += lumaSw.y;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (3)
|
||||
half4 lumaNw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xy, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
dir.x -= lumaNw.w;
|
||||
dir.z += lumaNw.w;
|
||||
#else
|
||||
dir.x -= lumaNw.y;
|
||||
dir.z += lumaNw.y;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (4)
|
||||
half4 lumaSe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zw, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
dir.x += lumaSe.w;
|
||||
dir.z -= lumaSe.w;
|
||||
#else
|
||||
dir.x += lumaSe.y;
|
||||
dir.z -= lumaSe.y;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (5)
|
||||
half4 dir1_pos;
|
||||
dir1_pos.xy = normalize(dir.xyz).xz;
|
||||
half dirAbsMinTimesC = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__PS3_EDGE_SHARPNESS);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (6)
|
||||
half4 dir2_pos;
|
||||
dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimesC, half(-2.0), half(2.0));
|
||||
dir1_pos.zw = pos.xy;
|
||||
dir2_pos.zw = pos.xy;
|
||||
half4 temp1N;
|
||||
temp1N.xy = dir1_pos.zw - dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (7)
|
||||
temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0));
|
||||
half4 rgby1;
|
||||
rgby1.xy = dir1_pos.zw + dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (8)
|
||||
rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0));
|
||||
rgby1 = (temp1N + rgby1) * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (9)
|
||||
half4 temp2N;
|
||||
temp2N.xy = dir2_pos.zw - dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw;
|
||||
temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0));
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (10)
|
||||
half4 rgby2;
|
||||
rgby2.xy = dir2_pos.zw + dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw;
|
||||
rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0));
|
||||
rgby2 = (temp2N + rgby2) * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (11)
|
||||
// compilier moves these scalar ops up to other cycles
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w));
|
||||
half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w));
|
||||
#else
|
||||
half lumaMin = min(min(lumaNw.y, lumaSw.y), min(lumaNe.y, lumaSe.y));
|
||||
half lumaMax = max(max(lumaNw.y, lumaSw.y), max(lumaNe.y, lumaSe.y));
|
||||
#endif
|
||||
rgby2 = (rgby2 + rgby1) * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (12)
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
bool twoTapLt = rgby2.w < lumaMin;
|
||||
bool twoTapGt = rgby2.w > lumaMax;
|
||||
#else
|
||||
bool twoTapLt = rgby2.y < lumaMin;
|
||||
bool twoTapGt = rgby2.y > lumaMax;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (13)
|
||||
if(twoTapLt || twoTapGt) rgby2 = rgby1;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
return rgby2; }
|
||||
/*==========================================================================*/
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (WITH EARLY EXIT)
|
||||
|
||||
==============================================================================
|
||||
The code mostly matches the assembly.
|
||||
I have a feeling that 14 cycles is possible, but was not able to get there.
|
||||
Might have to increase register count to get full performance.
|
||||
Note this shader does not use perspective interpolation.
|
||||
|
||||
Use the following cgc options,
|
||||
|
||||
--fenable-bx2 --fastmath --fastprecision --nofloatbindings
|
||||
|
||||
Use of FXAA_GREEN_AS_LUMA currently adds a cycle (16 clks).
|
||||
Will look at fixing this for FXAA 3.12.
|
||||
------------------------------------------------------------------------------
|
||||
NVSHADERPERF OUTPUT
|
||||
------------------------------------------------------------------------------
|
||||
For reference and to aid in debug, output of NVShaderPerf should match this,
|
||||
|
||||
Shader to schedule:
|
||||
0: texpkb h0.w(TRUE), v5.zyxx, #0
|
||||
2: addh h2.y(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x
|
||||
4: texpkb h1.w(TRUE), v5.xwxx, #0
|
||||
6: addh h0.x(TRUE), h1.w, -h2.y
|
||||
7: texpkb h2.w(TRUE), v5.zwzz, #0
|
||||
9: minh h4.w(TRUE), h2.y, h2
|
||||
10: maxh h5.x(TRUE), h2.y, h2.w
|
||||
11: texpkb h0.w(TRUE), v5, #0
|
||||
13: addh h3.w(TRUE), -h0, h0.x
|
||||
14: addh h0.x(TRUE), h0.w, h0
|
||||
15: addh h0.z(TRUE), -h2.w, h0.x
|
||||
16: addh h0.x(TRUE), h2.w, h3.w
|
||||
17: minh h5.y(TRUE), h0.w, h1.w
|
||||
18: nrmh h2.xz(TRUE), h0_n
|
||||
19: minh_m8 h2.w(TRUE), |h2.x|, |h2.z|
|
||||
20: divx h4.xy(TRUE), h2_n.xzzw, h2_n.w
|
||||
21: movr r1.zw(TRUE), v4.xxxy
|
||||
22: maxh h2.w(TRUE), h0, h1
|
||||
23: fenct TRUE
|
||||
24: madr r0.xy(TRUE), -h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz
|
||||
26: texpkb h0(TRUE), r0, #0
|
||||
28: maxh h5.x(TRUE), h2.w, h5
|
||||
29: minh h5.w(TRUE), h5.y, h4
|
||||
30: madr r1.xy(TRUE), h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz
|
||||
32: texpkb h2(TRUE), r1, #0
|
||||
34: addh_d2 h2(TRUE), h0, h2
|
||||
35: texpkb h1(TRUE), v4, #0
|
||||
37: maxh h5.y(TRUE), h5.x, h1.w
|
||||
38: minh h4.w(TRUE), h1, h5
|
||||
39: madr r0.xy(TRUE), -h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
|
||||
41: texpkb h0(TRUE), r0, #0
|
||||
43: addh_m8 h5.z(TRUE), h5.y, -h4.w
|
||||
44: madr r2.xy(TRUE), h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
|
||||
46: texpkb h3(TRUE), r2, #0
|
||||
48: addh_d2 h0(TRUE), h0, h3
|
||||
49: addh_d2 h3(TRUE), h0, h2
|
||||
50: movh h0(TRUE), h3
|
||||
51: slth h3.x(TRUE), h3.w, h5.w
|
||||
52: sgth h3.w(TRUE), h3, h5.x
|
||||
53: addx.c0 rc(TRUE), h3.x, h3
|
||||
54: slth.c0 rc(TRUE), h5.z, h5
|
||||
55: movh h0(c0.NE.w), h2
|
||||
56: movh h0(c0.NE.x), h1
|
||||
|
||||
IPU0 ------ Simplified schedule: --------
|
||||
Pass | Unit | uOp | PC: Op
|
||||
-----+--------+------+-------------------------
|
||||
1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
|
||||
| TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
|
||||
| SCB0 | add | 2: ADDh h2.y, h0.-w--, const.-x--;
|
||||
| | |
|
||||
2 | SCT0/1 | mov | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0;
|
||||
| TEX | txl | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0;
|
||||
| SCB0 | add | 6: ADDh h0.x, h1.w---,-h2.y---;
|
||||
| | |
|
||||
3 | SCT0/1 | mov | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
|
||||
| TEX | txl | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
|
||||
| SCB0 | max | 10: MAXh h5.x, h2.y---, h2.w---;
|
||||
| SCB1 | min | 9: MINh h4.w, h2.---y, h2;
|
||||
| | |
|
||||
4 | SCT0/1 | mov | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0;
|
||||
| TEX | txl | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0;
|
||||
| SCB0 | add | 14: ADDh h0.x, h0.w---, h0;
|
||||
| SCB1 | add | 13: ADDh h3.w,-h0, h0.---x;
|
||||
| | |
|
||||
5 | SCT0 | mad | 16: ADDh h0.x, h2.w---, h3.w---;
|
||||
| SCT1 | mad | 15: ADDh h0.z,-h2.--w-, h0.--x-;
|
||||
| SCB0 | min | 17: MINh h5.y, h0.-w--, h1.-w--;
|
||||
| | |
|
||||
6 | SCT1 | mov | 18: NRMh h2.xz, h0;
|
||||
| SRB | nrm | 18: NRMh h2.xz, h0;
|
||||
| SCB1 | min | 19: MINh*8 h2.w, |h2.---x|, |h2.---z|;
|
||||
| | |
|
||||
7 | SCT0 | div | 20: DIVx h4.xy, h2.xz--, h2.ww--;
|
||||
| SCT1 | mov | 21: MOVr r1.zw, g[TEX0].--xy;
|
||||
| SCB1 | max | 22: MAXh h2.w, h0, h1;
|
||||
| | |
|
||||
8 | SCT0 | mad | 24: MADr r0.xy,-h2.xz--, const.zw--, r1.zw--;
|
||||
| SCT1 | mov | 26: TXLr h0, r0, const.xxxx, TEX0;
|
||||
| TEX | txl | 26: TXLr h0, r0, const.xxxx, TEX0;
|
||||
| SCB0 | max | 28: MAXh h5.x, h2.w---, h5;
|
||||
| SCB1 | min | 29: MINh h5.w, h5.---y, h4;
|
||||
| | |
|
||||
9 | SCT0 | mad | 30: MADr r1.xy, h2.xz--, const.zw--, r1.zw--;
|
||||
| SCT1 | mov | 32: TXLr h2, r1, const.xxxx, TEX0;
|
||||
| TEX | txl | 32: TXLr h2, r1, const.xxxx, TEX0;
|
||||
| SCB0/1 | add | 34: ADDh/2 h2, h0, h2;
|
||||
| | |
|
||||
10 | SCT0/1 | mov | 35: TXLr h1, g[TEX0], const.xxxx, TEX0;
|
||||
| TEX | txl | 35: TXLr h1, g[TEX0], const.xxxx, TEX0;
|
||||
| SCB0 | max | 37: MAXh h5.y, h5.-x--, h1.-w--;
|
||||
| SCB1 | min | 38: MINh h4.w, h1, h5;
|
||||
| | |
|
||||
11 | SCT0 | mad | 39: MADr r0.xy,-h4, const.xy--, r1.zw--;
|
||||
| SCT1 | mov | 41: TXLr h0, r0, const.zzzz, TEX0;
|
||||
| TEX | txl | 41: TXLr h0, r0, const.zzzz, TEX0;
|
||||
| SCB0 | mad | 44: MADr r2.xy, h4, const.xy--, r1.zw--;
|
||||
| SCB1 | add | 43: ADDh*8 h5.z, h5.--y-,-h4.--w-;
|
||||
| | |
|
||||
12 | SCT0/1 | mov | 46: TXLr h3, r2, const.xxxx, TEX0;
|
||||
| TEX | txl | 46: TXLr h3, r2, const.xxxx, TEX0;
|
||||
| SCB0/1 | add | 48: ADDh/2 h0, h0, h3;
|
||||
| | |
|
||||
13 | SCT0/1 | mad | 49: ADDh/2 h3, h0, h2;
|
||||
| SCB0/1 | mul | 50: MOVh h0, h3;
|
||||
| | |
|
||||
14 | SCT0 | set | 51: SLTh h3.x, h3.w---, h5.w---;
|
||||
| SCT1 | set | 52: SGTh h3.w, h3, h5.---x;
|
||||
| SCB0 | set | 54: SLThc0 rc, h5.z---, h5;
|
||||
| SCB1 | add | 53: ADDxc0_s rc, h3.---x, h3;
|
||||
| | |
|
||||
15 | SCT0/1 | mul | 55: MOVh h0(NE0.wwww), h2;
|
||||
| SCB0/1 | mul | 56: MOVh h0(NE0.xxxx), h1;
|
||||
|
||||
Pass SCT TEX SCB
|
||||
1: 0% 100% 25%
|
||||
2: 0% 100% 25%
|
||||
3: 0% 100% 50%
|
||||
4: 0% 100% 50%
|
||||
5: 50% 0% 25%
|
||||
6: 0% 0% 25%
|
||||
7: 100% 0% 25%
|
||||
8: 0% 100% 50%
|
||||
9: 0% 100% 100%
|
||||
10: 0% 100% 50%
|
||||
11: 0% 100% 75%
|
||||
12: 0% 100% 100%
|
||||
13: 100% 0% 100%
|
||||
14: 50% 0% 50%
|
||||
15: 100% 0% 100%
|
||||
|
||||
MEAN: 26% 60% 56%
|
||||
|
||||
Pass SCT0 SCT1 TEX SCB0 SCB1
|
||||
1: 0% 0% 100% 100% 0%
|
||||
2: 0% 0% 100% 100% 0%
|
||||
3: 0% 0% 100% 100% 100%
|
||||
4: 0% 0% 100% 100% 100%
|
||||
5: 100% 100% 0% 100% 0%
|
||||
6: 0% 0% 0% 0% 100%
|
||||
7: 100% 100% 0% 0% 100%
|
||||
8: 0% 0% 100% 100% 100%
|
||||
9: 0% 0% 100% 100% 100%
|
||||
10: 0% 0% 100% 100% 100%
|
||||
11: 0% 0% 100% 100% 100%
|
||||
12: 0% 0% 100% 100% 100%
|
||||
13: 100% 100% 0% 100% 100%
|
||||
14: 100% 100% 0% 100% 100%
|
||||
15: 100% 100% 0% 100% 100%
|
||||
|
||||
MEAN: 33% 33% 60% 86% 80%
|
||||
Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5
|
||||
Results 15 cycles, 3 r regs, 800,000,000 pixels/s
|
||||
============================================================================*/
|
||||
#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 1)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#pragma regcount 7
|
||||
#pragma disablepc all
|
||||
#pragma option O2
|
||||
#pragma option OutColorPrec=fp16
|
||||
#pragma texformat default RGBA8
|
||||
/*==========================================================================*/
|
||||
half4 FxaaPixelShader(
|
||||
// See FXAA Quality FxaaPixelShader() source for docs on Inputs!
|
||||
FxaaFloat2 pos,
|
||||
FxaaFloat4 fxaaConsolePosPos,
|
||||
FxaaTex tex,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegOne,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegTwo,
|
||||
FxaaFloat2 fxaaQualityRcpFrame,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt2,
|
||||
FxaaFloat4 fxaaConsole360RcpFrameOpt2,
|
||||
FxaaFloat fxaaQualitySubpix,
|
||||
FxaaFloat fxaaQualityEdgeThreshold,
|
||||
FxaaFloat fxaaQualityEdgeThresholdMin,
|
||||
FxaaFloat fxaaConsoleEdgeSharpness,
|
||||
FxaaFloat fxaaConsoleEdgeThreshold,
|
||||
FxaaFloat fxaaConsoleEdgeThresholdMin,
|
||||
FxaaFloat4 fxaaConsole360ConstDir
|
||||
) {
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (1)
|
||||
half4 rgbyNe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zy, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half lumaNe = rgbyNe.w + half(1.0/512.0);
|
||||
#else
|
||||
half lumaNe = rgbyNe.y + half(1.0/512.0);
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (2)
|
||||
half4 lumaSw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xw, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half lumaSwNegNe = lumaSw.w - lumaNe;
|
||||
#else
|
||||
half lumaSwNegNe = lumaSw.y - lumaNe;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (3)
|
||||
half4 lumaNw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xy, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half lumaMaxNwSw = max(lumaNw.w, lumaSw.w);
|
||||
half lumaMinNwSw = min(lumaNw.w, lumaSw.w);
|
||||
#else
|
||||
half lumaMaxNwSw = max(lumaNw.y, lumaSw.y);
|
||||
half lumaMinNwSw = min(lumaNw.y, lumaSw.y);
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (4)
|
||||
half4 lumaSe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zw, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half dirZ = lumaNw.w + lumaSwNegNe;
|
||||
half dirX = -lumaNw.w + lumaSwNegNe;
|
||||
#else
|
||||
half dirZ = lumaNw.y + lumaSwNegNe;
|
||||
half dirX = -lumaNw.y + lumaSwNegNe;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (5)
|
||||
half3 dir;
|
||||
dir.y = 0.0;
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
dir.x = lumaSe.w + dirX;
|
||||
dir.z = -lumaSe.w + dirZ;
|
||||
half lumaMinNeSe = min(lumaNe, lumaSe.w);
|
||||
#else
|
||||
dir.x = lumaSe.y + dirX;
|
||||
dir.z = -lumaSe.y + dirZ;
|
||||
half lumaMinNeSe = min(lumaNe, lumaSe.y);
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (6)
|
||||
half4 dir1_pos;
|
||||
dir1_pos.xy = normalize(dir).xz;
|
||||
half dirAbsMinTimes8 = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__PS3_EDGE_SHARPNESS);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (7)
|
||||
half4 dir2_pos;
|
||||
dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimes8, half(-2.0), half(2.0));
|
||||
dir1_pos.zw = pos.xy;
|
||||
dir2_pos.zw = pos.xy;
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half lumaMaxNeSe = max(lumaNe, lumaSe.w);
|
||||
#else
|
||||
half lumaMaxNeSe = max(lumaNe, lumaSe.y);
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (8)
|
||||
half4 temp1N;
|
||||
temp1N.xy = dir1_pos.zw - dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw;
|
||||
temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0));
|
||||
half lumaMax = max(lumaMaxNwSw, lumaMaxNeSe);
|
||||
half lumaMin = min(lumaMinNwSw, lumaMinNeSe);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (9)
|
||||
half4 rgby1;
|
||||
rgby1.xy = dir1_pos.zw + dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw;
|
||||
rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0));
|
||||
rgby1 = (temp1N + rgby1) * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (10)
|
||||
half4 rgbyM = h4tex2Dlod(tex, half4(pos.xy, 0.0, 0.0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half lumaMaxM = max(lumaMax, rgbyM.w);
|
||||
half lumaMinM = min(lumaMin, rgbyM.w);
|
||||
#else
|
||||
half lumaMaxM = max(lumaMax, rgbyM.y);
|
||||
half lumaMinM = min(lumaMin, rgbyM.y);
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (11)
|
||||
half4 temp2N;
|
||||
temp2N.xy = dir2_pos.zw - dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw;
|
||||
temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0));
|
||||
half4 rgby2;
|
||||
rgby2.xy = dir2_pos.zw + dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw;
|
||||
half lumaRangeM = (lumaMaxM - lumaMinM) / FXAA_CONSOLE__PS3_EDGE_THRESHOLD;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (12)
|
||||
rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0));
|
||||
rgby2 = (temp2N + rgby2) * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (13)
|
||||
rgby2 = (rgby2 + rgby1) * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (14)
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
bool twoTapLt = rgby2.w < lumaMin;
|
||||
bool twoTapGt = rgby2.w > lumaMax;
|
||||
#else
|
||||
bool twoTapLt = rgby2.y < lumaMin;
|
||||
bool twoTapGt = rgby2.y > lumaMax;
|
||||
#endif
|
||||
bool earlyExit = lumaRangeM < lumaMax;
|
||||
bool twoTap = twoTapLt || twoTapGt;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (15)
|
||||
if(twoTap) rgby2 = rgby1;
|
||||
if(earlyExit) rgby2 = rgbyM;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
return rgby2; }
|
||||
/*==========================================================================*/
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
in vec4 pos;
|
||||
|
||||
uniform sampler2D tex0;
|
||||
uniform vec2 dt;
|
||||
|
||||
void main() {
|
||||
qgl_FragColor = FxaaPixelShader(
|
||||
//
|
||||
// Use noperspective interpolation here (turn off perspective interpolation).
|
||||
// {xy} = center of pixel
|
||||
pos.xy,
|
||||
//
|
||||
// Used only for FXAA Console, and not used on the 360 version.
|
||||
// Use noperspective interpolation here (turn off perspective interpolation).
|
||||
// {xy__} = upper left of pixel
|
||||
// {__zw} = lower right of pixel
|
||||
vec4(0),
|
||||
//
|
||||
// Input color texture.
|
||||
// {rgb_} = color in linear or perceptual color space
|
||||
// if (FXAA_GREEN_AS_LUMA == 0)
|
||||
// {___a} = luma in perceptual color space (not linear)
|
||||
tex0,
|
||||
//
|
||||
// Only used on the optimized 360 version of FXAA Console.
|
||||
// For everything but 360, just use the same input here as for "tex".
|
||||
// For 360, same texture, just alias with a 2nd sampler.
|
||||
// This sampler needs to have an exponent bias of -1.
|
||||
tex0,
|
||||
//
|
||||
// Only used on the optimized 360 version of FXAA Console.
|
||||
// For everything but 360, just use the same input here as for "tex".
|
||||
// For 360, same texture, just alias with a 3nd sampler.
|
||||
// This sampler needs to have an exponent bias of -2.
|
||||
tex0,
|
||||
//
|
||||
// Only used on FXAA Quality.
|
||||
// This must be from a constant/uniform.
|
||||
// {x_} = 1.0/screenWidthInPixels
|
||||
// {_y} = 1.0/screenHeightInPixels
|
||||
dt,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// This must be from a constant/uniform.
|
||||
// This effects sub-pixel AA quality and inversely sharpness.
|
||||
// Where N ranges between,
|
||||
// N = 0.50 (default)
|
||||
// N = 0.33 (sharper)
|
||||
// {x___} = -N/screenWidthInPixels
|
||||
// {_y__} = -N/screenHeightInPixels
|
||||
// {__z_} = N/screenWidthInPixels
|
||||
// {___w} = N/screenHeightInPixels
|
||||
vec4(0),
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// Not used on 360, but used on PS3 and PC.
|
||||
// This must be from a constant/uniform.
|
||||
// {x___} = -2.0/screenWidthInPixels
|
||||
// {_y__} = -2.0/screenHeightInPixels
|
||||
// {__z_} = 2.0/screenWidthInPixels
|
||||
// {___w} = 2.0/screenHeightInPixels
|
||||
vec4(0),
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// Only used on 360 in place of fxaaConsoleRcpFrameOpt2.
|
||||
// This must be from a constant/uniform.
|
||||
// {x___} = 8.0/screenWidthInPixels
|
||||
// {_y__} = 8.0/screenHeightInPixels
|
||||
// {__z_} = -4.0/screenWidthInPixels
|
||||
// {___w} = -4.0/screenHeightInPixels
|
||||
vec4(0),
|
||||
//
|
||||
// Only used on FXAA Quality.
|
||||
// This used to be the FXAA_QUALITY__SUBPIX define.
|
||||
// It is here now to allow easier tuning.
|
||||
// Choose the amount of sub-pixel aliasing removal.
|
||||
// This can effect sharpness.
|
||||
// 1.00 - upper limit (softer)
|
||||
// 0.75 - default amount of filtering
|
||||
// 0.50 - lower limit (sharper, less sub-pixel aliasing removal)
|
||||
// 0.25 - almost off
|
||||
// 0.00 - completely off
|
||||
0.75,
|
||||
//
|
||||
// Only used on FXAA Quality.
|
||||
// This used to be the FXAA_QUALITY__EDGE_THRESHOLD define.
|
||||
// It is here now to allow easier tuning.
|
||||
// The minimum amount of local contrast required to apply algorithm.
|
||||
// 0.333 - too little (faster)
|
||||
// 0.250 - low quality
|
||||
// 0.166 - default
|
||||
// 0.125 - high quality
|
||||
// 0.063 - overkill (slower)
|
||||
0.125,
|
||||
//
|
||||
// Only used on FXAA Quality.
|
||||
// This used to be the FXAA_QUALITY__EDGE_THRESHOLD_MIN define.
|
||||
// It is here now to allow easier tuning.
|
||||
// Trims the algorithm from processing darks.
|
||||
// 0.0833 - upper limit (default, the start of visible unfiltered edges)
|
||||
// 0.0625 - high quality (faster)
|
||||
// 0.0312 - visible limit (slower)
|
||||
// Special notes when using FXAA_GREEN_AS_LUMA,
|
||||
// Likely want to set this to zero.
|
||||
// As colors that are mostly not-green
|
||||
// will appear very dark in the green channel!
|
||||
// Tune by looking at mostly non-green content,
|
||||
// then start at zero and increase until aliasing is a problem.
|
||||
0.0625,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// This used to be the FXAA_CONSOLE__EDGE_SHARPNESS define.
|
||||
// It is here now to allow easier tuning.
|
||||
// This does not effect PS3, as this needs to be compiled in.
|
||||
// Use FXAA_CONSOLE__PS3_EDGE_SHARPNESS for PS3.
|
||||
// Due to the PS3 being ALU bound,
|
||||
// there are only three safe values here: 2 and 4 and 8.
|
||||
// These options use the shaders ability to a free *|/ by 2|4|8.
|
||||
// For all other platforms can be a non-power of two.
|
||||
// 8.0 is sharper (default!!!)
|
||||
// 4.0 is softer
|
||||
// 2.0 is really soft (good only for vector graphics inputs)
|
||||
0.,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// This used to be the FXAA_CONSOLE__EDGE_THRESHOLD define.
|
||||
// It is here now to allow easier tuning.
|
||||
// This does not effect PS3, as this needs to be compiled in.
|
||||
// Use FXAA_CONSOLE__PS3_EDGE_THRESHOLD for PS3.
|
||||
// Due to the PS3 being ALU bound,
|
||||
// there are only two safe values here: 1/4 and 1/8.
|
||||
// These options use the shaders ability to a free *|/ by 2|4|8.
|
||||
// The console setting has a different mapping than the quality setting.
|
||||
// Other platforms can use other values.
|
||||
// 0.125 leaves less aliasing, but is softer (default!!!)
|
||||
// 0.25 leaves more aliasing, and is sharper
|
||||
0.,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// This used to be the FXAA_CONSOLE__EDGE_THRESHOLD_MIN define.
|
||||
// It is here now to allow easier tuning.
|
||||
// Trims the algorithm from processing darks.
|
||||
// The console setting has a different mapping than the quality setting.
|
||||
// This only applies when FXAA_EARLY_EXIT is 1.
|
||||
// This does not apply to PS3,
|
||||
// PS3 was simplified to avoid more shader instructions.
|
||||
// 0.06 - faster but more aliasing in darks
|
||||
// 0.05 - default
|
||||
// 0.04 - slower and less aliasing in darks
|
||||
// Special notes when using FXAA_GREEN_AS_LUMA,
|
||||
// Likely want to set this to zero.
|
||||
// As colors that are mostly not-green
|
||||
// will appear very dark in the green channel!
|
||||
// Tune by looking at mostly non-green content,
|
||||
// then start at zero and increase until aliasing is a problem.
|
||||
0.,
|
||||
//
|
||||
// Extra constants for 360 FXAA Console only.
|
||||
// Use zeros or anything else for other platforms.
|
||||
// These must be in physical constant registers and NOT immedates.
|
||||
// Immedates will result in compiler un-optimizing.
|
||||
// {xyzw} = float4(1.0, -1.0, 0.25, -0.25)
|
||||
vec4(0));
|
||||
}
|
||||
11
qglengine/shaders/fxaa.vert
Normal file
11
qglengine/shaders/fxaa.vert
Normal file
@@ -0,0 +1,11 @@
|
||||
out vec4 pos;
|
||||
|
||||
const float FXAA_SUBPIX_SHIFT = 1. / 4.;
|
||||
uniform vec2 dt;
|
||||
|
||||
void main(void) {
|
||||
qgl_FragTexture = qgl_Texture;
|
||||
pos.xy = qgl_Texture.xy;
|
||||
pos.zw = qgl_Texture.xy - (dt * (0.5 + FXAA_SUBPIX_SHIFT));
|
||||
gl_Position = qgl_ftransform();
|
||||
}
|
||||
2048
qglengine/shaders/fxaa_v3.h
Normal file
2048
qglengine/shaders/fxaa_v3.h
Normal file
@@ -0,0 +1,2048 @@
|
||||
/*============================================================================
|
||||
|
||||
|
||||
NVIDIA FXAA 3.11 by TIMOTHY LOTTES
|
||||
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
COPYRIGHT (C) 2010, 2011 NVIDIA CORPORATION. ALL RIGHTS RESERVED.
|
||||
------------------------------------------------------------------------------
|
||||
TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED
|
||||
*AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA
|
||||
OR ITS SUPPLIERS BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT, OR
|
||||
CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR
|
||||
LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION,
|
||||
OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR INABILITY TO USE
|
||||
THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
DAMAGES.
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
INTEGRATION CHECKLIST
|
||||
------------------------------------------------------------------------------
|
||||
(1.)
|
||||
In the shader source, setup defines for the desired configuration.
|
||||
When providing multiple shaders (for different presets),
|
||||
simply setup the defines differently in multiple files.
|
||||
Example,
|
||||
|
||||
#define FXAA_PC 1
|
||||
#define FXAA_HLSL_5 1
|
||||
#define FXAA_QUALITY__PRESET 12
|
||||
|
||||
Or,
|
||||
|
||||
#define FXAA_360 1
|
||||
|
||||
Or,
|
||||
|
||||
#define FXAA_PS3 1
|
||||
|
||||
Etc.
|
||||
|
||||
(2.)
|
||||
Then include this file,
|
||||
|
||||
#include "Fxaa3_11.h"
|
||||
|
||||
(3.)
|
||||
Then call the FXAA pixel shader from within your desired shader.
|
||||
Look at the FXAA Quality FxaaPixelShader() for docs on inputs.
|
||||
As for FXAA 3.11 all inputs for all shaders are the same
|
||||
to enable easy porting between platforms.
|
||||
|
||||
return FxaaPixelShader(...);
|
||||
|
||||
(4.)
|
||||
Insure pass prior to FXAA outputs RGBL (see next section).
|
||||
Or use,
|
||||
|
||||
#define FXAA_GREEN_AS_LUMA 1
|
||||
|
||||
(5.)
|
||||
Setup engine to provide the following constants
|
||||
which are used in the FxaaPixelShader() inputs,
|
||||
|
||||
FxaaFloat2 fxaaQualityRcpFrame,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt2,
|
||||
FxaaFloat4 fxaaConsole360RcpFrameOpt2,
|
||||
FxaaFloat fxaaQualitySubpix,
|
||||
FxaaFloat fxaaQualityEdgeThreshold,
|
||||
FxaaFloat fxaaQualityEdgeThresholdMin,
|
||||
FxaaFloat fxaaConsoleEdgeSharpness,
|
||||
FxaaFloat fxaaConsoleEdgeThreshold,
|
||||
FxaaFloat fxaaConsoleEdgeThresholdMin,
|
||||
FxaaFloat4 fxaaConsole360ConstDir
|
||||
|
||||
Look at the FXAA Quality FxaaPixelShader() for docs on inputs.
|
||||
|
||||
(6.)
|
||||
Have FXAA vertex shader run as a full screen triangle,
|
||||
and output "pos" and "fxaaConsolePosPos"
|
||||
such that inputs in the pixel shader provide,
|
||||
|
||||
// {xy} = center of pixel
|
||||
FxaaFloat2 pos,
|
||||
|
||||
// {xy__} = upper left of pixel
|
||||
// {__zw} = lower right of pixel
|
||||
FxaaFloat4 fxaaConsolePosPos,
|
||||
|
||||
(7.)
|
||||
Insure the texture sampler(s) used by FXAA are set to bilinear filtering.
|
||||
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
INTEGRATION - RGBL AND COLORSPACE
|
||||
------------------------------------------------------------------------------
|
||||
FXAA3 requires RGBL as input unless the following is set,
|
||||
|
||||
#define FXAA_GREEN_AS_LUMA 1
|
||||
|
||||
In which case the engine uses green in place of luma,
|
||||
and requires RGB input is in a non-linear colorspace.
|
||||
|
||||
RGB should be LDR (low dynamic range).
|
||||
Specifically do FXAA after tonemapping.
|
||||
|
||||
RGB data as returned by a texture fetch can be non-linear,
|
||||
or linear when FXAA_GREEN_AS_LUMA is not set.
|
||||
Note an "sRGB format" texture counts as linear,
|
||||
because the result of a texture fetch is linear data.
|
||||
Regular "RGBA8" textures in the sRGB colorspace are non-linear.
|
||||
|
||||
If FXAA_GREEN_AS_LUMA is not set,
|
||||
luma must be stored in the alpha channel prior to running FXAA.
|
||||
This luma should be in a perceptual space (could be gamma 2.0).
|
||||
Example pass before FXAA where output is gamma 2.0 encoded,
|
||||
|
||||
color.rgb = ToneMap(color.rgb); // linear color output
|
||||
color.rgb = sqrt(color.rgb); // gamma 2.0 color output
|
||||
return color;
|
||||
|
||||
To use FXAA,
|
||||
|
||||
color.rgb = ToneMap(color.rgb); // linear color output
|
||||
color.rgb = sqrt(color.rgb); // gamma 2.0 color output
|
||||
color.a = dot(color.rgb, FxaaFloat3(0.299, 0.587, 0.114)); // compute luma
|
||||
return color;
|
||||
|
||||
Another example where output is linear encoded,
|
||||
say for instance writing to an sRGB formated render target,
|
||||
where the render target does the conversion back to sRGB after blending,
|
||||
|
||||
color.rgb = ToneMap(color.rgb); // linear color output
|
||||
return color;
|
||||
|
||||
To use FXAA,
|
||||
|
||||
color.rgb = ToneMap(color.rgb); // linear color output
|
||||
color.a = sqrt(dot(color.rgb, FxaaFloat3(0.299, 0.587, 0.114))); // compute luma
|
||||
return color;
|
||||
|
||||
Getting luma correct is required for the algorithm to work correctly.
|
||||
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
BEING LINEARLY CORRECT?
|
||||
------------------------------------------------------------------------------
|
||||
Applying FXAA to a framebuffer with linear RGB color will look worse.
|
||||
This is very counter intuitive, but happends to be true in this case.
|
||||
The reason is because dithering artifacts will be more visiable
|
||||
in a linear colorspace.
|
||||
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
COMPLEX INTEGRATION
|
||||
------------------------------------------------------------------------------
|
||||
Q. What if the engine is blending into RGB before wanting to run FXAA?
|
||||
|
||||
A. In the last opaque pass prior to FXAA,
|
||||
have the pass write out luma into alpha.
|
||||
Then blend into RGB only.
|
||||
FXAA should be able to run ok
|
||||
assuming the blending pass did not any add aliasing.
|
||||
This should be the common case for particles and common blending passes.
|
||||
|
||||
A. Or use FXAA_GREEN_AS_LUMA.
|
||||
|
||||
============================================================================*/
|
||||
|
||||
/*============================================================================
|
||||
|
||||
INTEGRATION KNOBS
|
||||
|
||||
============================================================================*/
|
||||
//
|
||||
// FXAA_PS3 and FXAA_360 choose the console algorithm (FXAA3 CONSOLE).
|
||||
// FXAA_360_OPT is a prototype for the new optimized 360 version.
|
||||
//
|
||||
// 1 = Use API.
|
||||
// 0 = Don't use API.
|
||||
//
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_PS3
|
||||
#define FXAA_PS3 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_360
|
||||
#define FXAA_360 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_360_OPT
|
||||
#define FXAA_360_OPT 0
|
||||
#endif
|
||||
/*==========================================================================*/
|
||||
#ifndef FXAA_PC
|
||||
//
|
||||
// FXAA Quality
|
||||
// The high quality PC algorithm.
|
||||
//
|
||||
#define FXAA_PC 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_PC_CONSOLE
|
||||
//
|
||||
// The console algorithm for PC is included
|
||||
// for developers targeting really low spec machines.
|
||||
// Likely better to just run FXAA_PC, and use a really low preset.
|
||||
//
|
||||
#define FXAA_PC_CONSOLE 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_GLSL_120
|
||||
#define FXAA_GLSL_120 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_GLSL_130
|
||||
#define FXAA_GLSL_130 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_HLSL_3
|
||||
#define FXAA_HLSL_3 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_HLSL_4
|
||||
#define FXAA_HLSL_4 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_HLSL_5
|
||||
#define FXAA_HLSL_5 0
|
||||
#endif
|
||||
/*==========================================================================*/
|
||||
#ifndef FXAA_GREEN_AS_LUMA
|
||||
//
|
||||
// For those using non-linear color,
|
||||
// and either not able to get luma in alpha, or not wanting to,
|
||||
// this enables FXAA to run using green as a proxy for luma.
|
||||
// So with this enabled, no need to pack luma in alpha.
|
||||
//
|
||||
// This will turn off AA on anything which lacks some amount of green.
|
||||
// Pure red and blue or combination of only R and B, will get no AA.
|
||||
//
|
||||
// Might want to lower the settings for both,
|
||||
// fxaaConsoleEdgeThresholdMin
|
||||
// fxaaQualityEdgeThresholdMin
|
||||
// In order to insure AA does not get turned off on colors
|
||||
// which contain a minor amount of green.
|
||||
//
|
||||
// 1 = On.
|
||||
// 0 = Off.
|
||||
//
|
||||
#define FXAA_GREEN_AS_LUMA 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_EARLY_EXIT
|
||||
//
|
||||
// Controls algorithm's early exit path.
|
||||
// On PS3 turning this ON adds 2 cycles to the shader.
|
||||
// On 360 turning this OFF adds 10ths of a millisecond to the shader.
|
||||
// Turning this off on console will result in a more blurry image.
|
||||
// So this defaults to on.
|
||||
//
|
||||
// 1 = On.
|
||||
// 0 = Off.
|
||||
//
|
||||
#define FXAA_EARLY_EXIT 1
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_DISCARD
|
||||
//
|
||||
// Only valid for PC OpenGL currently.
|
||||
// Probably will not work when FXAA_GREEN_AS_LUMA = 1.
|
||||
//
|
||||
// 1 = Use discard on pixels which don't need AA.
|
||||
// For APIs which enable concurrent TEX+ROP from same surface.
|
||||
// 0 = Return unchanged color on pixels which don't need AA.
|
||||
//
|
||||
#define FXAA_DISCARD 0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_FAST_PIXEL_OFFSET
|
||||
//
|
||||
// Used for GLSL 120 only.
|
||||
//
|
||||
// 1 = GL API supports fast pixel offsets
|
||||
// 0 = do not use fast pixel offsets
|
||||
//
|
||||
#ifdef GL_EXT_gpu_shader4
|
||||
#define FXAA_FAST_PIXEL_OFFSET 1
|
||||
#endif
|
||||
#ifdef GL_NV_gpu_shader5
|
||||
#define FXAA_FAST_PIXEL_OFFSET 1
|
||||
#endif
|
||||
#ifdef GL_ARB_gpu_shader5
|
||||
#define FXAA_FAST_PIXEL_OFFSET 1
|
||||
#endif
|
||||
#ifndef FXAA_FAST_PIXEL_OFFSET
|
||||
#define FXAA_FAST_PIXEL_OFFSET 0
|
||||
#endif
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_GATHER4_ALPHA
|
||||
//
|
||||
// 1 = API supports gather4 on alpha channel.
|
||||
// 0 = API does not support gather4 on alpha channel.
|
||||
//
|
||||
#if (FXAA_HLSL_5 == 1)
|
||||
#define FXAA_GATHER4_ALPHA 1
|
||||
#endif
|
||||
#ifdef GL_ARB_gpu_shader5
|
||||
#define FXAA_GATHER4_ALPHA 1
|
||||
#endif
|
||||
#ifdef GL_NV_gpu_shader5
|
||||
#define FXAA_GATHER4_ALPHA 1
|
||||
#endif
|
||||
#ifndef FXAA_GATHER4_ALPHA
|
||||
#define FXAA_GATHER4_ALPHA 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*============================================================================
|
||||
FXAA CONSOLE PS3 - TUNING KNOBS
|
||||
============================================================================*/
|
||||
#ifndef FXAA_CONSOLE__PS3_EDGE_SHARPNESS
|
||||
//
|
||||
// Consoles the sharpness of edges on PS3 only.
|
||||
// Non-PS3 tuning is done with shader input.
|
||||
//
|
||||
// Due to the PS3 being ALU bound,
|
||||
// there are only two safe values here: 4 and 8.
|
||||
// These options use the shaders ability to a free *|/ by 2|4|8.
|
||||
//
|
||||
// 8.0 is sharper
|
||||
// 4.0 is softer
|
||||
// 2.0 is really soft (good for vector graphics inputs)
|
||||
//
|
||||
#if 1
|
||||
#define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 8.0
|
||||
#endif
|
||||
#if 0
|
||||
#define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 4.0
|
||||
#endif
|
||||
#if 0
|
||||
#define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 2.0
|
||||
#endif
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef FXAA_CONSOLE__PS3_EDGE_THRESHOLD
|
||||
//
|
||||
// Only effects PS3.
|
||||
// Non-PS3 tuning is done with shader input.
|
||||
//
|
||||
// The minimum amount of local contrast required to apply algorithm.
|
||||
// The console setting has a different mapping than the quality setting.
|
||||
//
|
||||
// This only applies when FXAA_EARLY_EXIT is 1.
|
||||
//
|
||||
// Due to the PS3 being ALU bound,
|
||||
// there are only two safe values here: 0.25 and 0.125.
|
||||
// These options use the shaders ability to a free *|/ by 2|4|8.
|
||||
//
|
||||
// 0.125 leaves less aliasing, but is softer
|
||||
// 0.25 leaves more aliasing, and is sharper
|
||||
//
|
||||
#if 1
|
||||
#define FXAA_CONSOLE__PS3_EDGE_THRESHOLD 0.125
|
||||
#else
|
||||
#define FXAA_CONSOLE__PS3_EDGE_THRESHOLD 0.25
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*============================================================================
|
||||
FXAA QUALITY - TUNING KNOBS
|
||||
------------------------------------------------------------------------------
|
||||
NOTE the other tuning knobs are now in the shader function inputs!
|
||||
============================================================================*/
|
||||
#ifndef FXAA_QUALITY__PRESET
|
||||
//
|
||||
// Choose the quality preset.
|
||||
// This needs to be compiled into the shader as it effects code.
|
||||
// Best option to include multiple presets is to
|
||||
// in each shader define the preset, then include this file.
|
||||
//
|
||||
// OPTIONS
|
||||
// -----------------------------------------------------------------------
|
||||
// 10 to 15 - default medium dither (10=fastest, 15=highest quality)
|
||||
// 20 to 29 - less dither, more expensive (20=fastest, 29=highest quality)
|
||||
// 39 - no dither, very expensive
|
||||
//
|
||||
// NOTES
|
||||
// -----------------------------------------------------------------------
|
||||
// 12 = slightly faster then FXAA 3.9 and higher edge quality (default)
|
||||
// 13 = about same speed as FXAA 3.9 and better than 12
|
||||
// 23 = closest to FXAA 3.9 visually and performance wise
|
||||
// _ = the lowest digit is directly related to performance
|
||||
// _ = the highest digit is directly related to style
|
||||
//
|
||||
#define FXAA_QUALITY__PRESET 12
|
||||
#endif
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
FXAA QUALITY - PRESETS
|
||||
|
||||
============================================================================*/
|
||||
|
||||
/*============================================================================
|
||||
FXAA QUALITY - MEDIUM DITHER PRESETS
|
||||
============================================================================*/
|
||||
#if (FXAA_QUALITY__PRESET == 10)
|
||||
#define FXAA_QUALITY__PS 3
|
||||
#define FXAA_QUALITY__P0 1.5
|
||||
#define FXAA_QUALITY__P1 3.0
|
||||
#define FXAA_QUALITY__P2 12.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 11)
|
||||
#define FXAA_QUALITY__PS 4
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 3.0
|
||||
#define FXAA_QUALITY__P3 12.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 12)
|
||||
#define FXAA_QUALITY__PS 5
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 4.0
|
||||
#define FXAA_QUALITY__P4 12.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 13)
|
||||
#define FXAA_QUALITY__PS 6
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 4.0
|
||||
#define FXAA_QUALITY__P5 12.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 14)
|
||||
#define FXAA_QUALITY__PS 7
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 4.0
|
||||
#define FXAA_QUALITY__P6 12.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 15)
|
||||
#define FXAA_QUALITY__PS 8
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 2.0
|
||||
#define FXAA_QUALITY__P6 4.0
|
||||
#define FXAA_QUALITY__P7 12.0
|
||||
#endif
|
||||
|
||||
/*============================================================================
|
||||
FXAA QUALITY - LOW DITHER PRESETS
|
||||
============================================================================*/
|
||||
#if (FXAA_QUALITY__PRESET == 20)
|
||||
#define FXAA_QUALITY__PS 3
|
||||
#define FXAA_QUALITY__P0 1.5
|
||||
#define FXAA_QUALITY__P1 2.0
|
||||
#define FXAA_QUALITY__P2 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 21)
|
||||
#define FXAA_QUALITY__PS 4
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 22)
|
||||
#define FXAA_QUALITY__PS 5
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 23)
|
||||
#define FXAA_QUALITY__PS 6
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 24)
|
||||
#define FXAA_QUALITY__PS 7
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 3.0
|
||||
#define FXAA_QUALITY__P6 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 25)
|
||||
#define FXAA_QUALITY__PS 8
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 2.0
|
||||
#define FXAA_QUALITY__P6 4.0
|
||||
#define FXAA_QUALITY__P7 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 26)
|
||||
#define FXAA_QUALITY__PS 9
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 2.0
|
||||
#define FXAA_QUALITY__P6 2.0
|
||||
#define FXAA_QUALITY__P7 4.0
|
||||
#define FXAA_QUALITY__P8 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 27)
|
||||
#define FXAA_QUALITY__PS 10
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 2.0
|
||||
#define FXAA_QUALITY__P6 2.0
|
||||
#define FXAA_QUALITY__P7 2.0
|
||||
#define FXAA_QUALITY__P8 4.0
|
||||
#define FXAA_QUALITY__P9 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 28)
|
||||
#define FXAA_QUALITY__PS 11
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 2.0
|
||||
#define FXAA_QUALITY__P6 2.0
|
||||
#define FXAA_QUALITY__P7 2.0
|
||||
#define FXAA_QUALITY__P8 2.0
|
||||
#define FXAA_QUALITY__P9 4.0
|
||||
#define FXAA_QUALITY__P10 8.0
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PRESET == 29)
|
||||
#define FXAA_QUALITY__PS 12
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.5
|
||||
#define FXAA_QUALITY__P2 2.0
|
||||
#define FXAA_QUALITY__P3 2.0
|
||||
#define FXAA_QUALITY__P4 2.0
|
||||
#define FXAA_QUALITY__P5 2.0
|
||||
#define FXAA_QUALITY__P6 2.0
|
||||
#define FXAA_QUALITY__P7 2.0
|
||||
#define FXAA_QUALITY__P8 2.0
|
||||
#define FXAA_QUALITY__P9 2.0
|
||||
#define FXAA_QUALITY__P10 4.0
|
||||
#define FXAA_QUALITY__P11 8.0
|
||||
#endif
|
||||
|
||||
/*============================================================================
|
||||
FXAA QUALITY - EXTREME QUALITY
|
||||
============================================================================*/
|
||||
#if (FXAA_QUALITY__PRESET == 39)
|
||||
#define FXAA_QUALITY__PS 12
|
||||
#define FXAA_QUALITY__P0 1.0
|
||||
#define FXAA_QUALITY__P1 1.0
|
||||
#define FXAA_QUALITY__P2 1.0
|
||||
#define FXAA_QUALITY__P3 1.0
|
||||
#define FXAA_QUALITY__P4 1.0
|
||||
#define FXAA_QUALITY__P5 1.5
|
||||
#define FXAA_QUALITY__P6 2.0
|
||||
#define FXAA_QUALITY__P7 2.0
|
||||
#define FXAA_QUALITY__P8 2.0
|
||||
#define FXAA_QUALITY__P9 2.0
|
||||
#define FXAA_QUALITY__P10 4.0
|
||||
#define FXAA_QUALITY__P11 8.0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
API PORTING
|
||||
|
||||
============================================================================*/
|
||||
#if (FXAA_GLSL_120 == 1) || (FXAA_GLSL_130 == 1)
|
||||
#define FxaaBool bool
|
||||
#define FxaaDiscard discard
|
||||
#define FxaaFloat float
|
||||
#define FxaaFloat2 vec2
|
||||
#define FxaaFloat3 vec3
|
||||
#define FxaaFloat4 vec4
|
||||
#define FxaaHalf float
|
||||
#define FxaaHalf2 vec2
|
||||
#define FxaaHalf3 vec3
|
||||
#define FxaaHalf4 vec4
|
||||
#define FxaaInt2 ivec2
|
||||
#define FxaaSat(x) clamp(x, 0.0, 1.0)
|
||||
#define FxaaTex sampler2D
|
||||
#else
|
||||
#define FxaaBool bool
|
||||
#define FxaaDiscard clip(-1)
|
||||
#define FxaaFloat float
|
||||
#define FxaaFloat2 float2
|
||||
#define FxaaFloat3 float3
|
||||
#define FxaaFloat4 float4
|
||||
#define FxaaHalf half
|
||||
#define FxaaHalf2 half2
|
||||
#define FxaaHalf3 half3
|
||||
#define FxaaHalf4 half4
|
||||
#define FxaaSat(x) saturate(x)
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_GLSL_120 == 1)
|
||||
// Requires,
|
||||
// #version 120
|
||||
// And at least,
|
||||
// #extension GL_EXT_gpu_shader4 : enable
|
||||
// (or set FXAA_FAST_PIXEL_OFFSET 1 to work like DX9)
|
||||
#define FxaaTexTop(t, p) texture2DLod(t, p, 0.0)
|
||||
#if (FXAA_FAST_PIXEL_OFFSET == 1)
|
||||
#define FxaaTexOff(t, p, o, r) texture2DLodOffset(t, p, 0.0, o)
|
||||
#else
|
||||
#define FxaaTexOff(t, p, o, r) texture2DLod(t, p + (o * r), 0.0)
|
||||
#endif
|
||||
#if (FXAA_GATHER4_ALPHA == 1)
|
||||
// use #extension GL_ARB_gpu_shader5 : enable
|
||||
#define FxaaTexAlpha4(t, p) textureGather(t, p, 3)
|
||||
#define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)
|
||||
#define FxaaTexGreen4(t, p) textureGather(t, p, 1)
|
||||
#define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1)
|
||||
#endif
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_GLSL_130 == 1)
|
||||
// Requires "#version 130" or better
|
||||
#define FxaaTexTop(t, p) textureLod(t, p, 0.0)
|
||||
#define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o)
|
||||
#if (FXAA_GATHER4_ALPHA == 1)
|
||||
// use #extension GL_ARB_gpu_shader5 : enable
|
||||
#define FxaaTexAlpha4(t, p) textureGather(t, p, 3)
|
||||
#define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)
|
||||
#define FxaaTexGreen4(t, p) textureGather(t, p, 1)
|
||||
#define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1)
|
||||
#endif
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_HLSL_3 == 1) || (FXAA_360 == 1) || (FXAA_PS3 == 1)
|
||||
#define FxaaInt2 float2
|
||||
#define FxaaTex sampler2D
|
||||
#define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0))
|
||||
#define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0))
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_HLSL_4 == 1)
|
||||
#define FxaaInt2 int2
|
||||
struct FxaaTex { SamplerState smpl; Texture2D tex; };
|
||||
#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)
|
||||
#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_HLSL_5 == 1)
|
||||
#define FxaaInt2 int2
|
||||
struct FxaaTex { SamplerState smpl; Texture2D tex; };
|
||||
#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)
|
||||
#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)
|
||||
#define FxaaTexAlpha4(t, p) t.tex.GatherAlpha(t.smpl, p)
|
||||
#define FxaaTexOffAlpha4(t, p, o) t.tex.GatherAlpha(t.smpl, p, o)
|
||||
#define FxaaTexGreen4(t, p) t.tex.GatherGreen(t.smpl, p)
|
||||
#define FxaaTexOffGreen4(t, p, o) t.tex.GatherGreen(t.smpl, p, o)
|
||||
#endif
|
||||
|
||||
|
||||
/*============================================================================
|
||||
GREEN AS LUMA OPTION SUPPORT FUNCTION
|
||||
============================================================================*/
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
//FxaaFloat FxaaLuma(FxaaFloat4 rgba) { return rgba.w; }
|
||||
FxaaFloat FxaaLuma(FxaaFloat4 rgba) { return dot(rgba.rgb, FxaaFloat3(0.299, 0.587, 0.114)); }
|
||||
#else
|
||||
FxaaFloat FxaaLuma(FxaaFloat4 rgba) { return rgba.y; }
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
FXAA3 QUALITY - PC
|
||||
|
||||
============================================================================*/
|
||||
#if (FXAA_PC == 1)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat4 FxaaPixelShader(
|
||||
//
|
||||
// Use noperspective interpolation here (turn off perspective interpolation).
|
||||
// {xy} = center of pixel
|
||||
FxaaFloat2 pos,
|
||||
//
|
||||
// Used only for FXAA Console, and not used on the 360 version.
|
||||
// Use noperspective interpolation here (turn off perspective interpolation).
|
||||
// {xy__} = upper left of pixel
|
||||
// {__zw} = lower right of pixel
|
||||
FxaaFloat4 fxaaConsolePosPos,
|
||||
//
|
||||
// Input color texture.
|
||||
// {rgb_} = color in linear or perceptual color space
|
||||
// if (FXAA_GREEN_AS_LUMA == 0)
|
||||
// {___a} = luma in perceptual color space (not linear)
|
||||
FxaaTex tex,
|
||||
//
|
||||
// Only used on the optimized 360 version of FXAA Console.
|
||||
// For everything but 360, just use the same input here as for "tex".
|
||||
// For 360, same texture, just alias with a 2nd sampler.
|
||||
// This sampler needs to have an exponent bias of -1.
|
||||
FxaaTex fxaaConsole360TexExpBiasNegOne,
|
||||
//
|
||||
// Only used on the optimized 360 version of FXAA Console.
|
||||
// For everything but 360, just use the same input here as for "tex".
|
||||
// For 360, same texture, just alias with a 3nd sampler.
|
||||
// This sampler needs to have an exponent bias of -2.
|
||||
FxaaTex fxaaConsole360TexExpBiasNegTwo,
|
||||
//
|
||||
// Only used on FXAA Quality.
|
||||
// This must be from a constant/uniform.
|
||||
// {x_} = 1.0/screenWidthInPixels
|
||||
// {_y} = 1.0/screenHeightInPixels
|
||||
FxaaFloat2 fxaaQualityRcpFrame,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// This must be from a constant/uniform.
|
||||
// This effects sub-pixel AA quality and inversely sharpness.
|
||||
// Where N ranges between,
|
||||
// N = 0.50 (default)
|
||||
// N = 0.33 (sharper)
|
||||
// {x___} = -N/screenWidthInPixels
|
||||
// {_y__} = -N/screenHeightInPixels
|
||||
// {__z_} = N/screenWidthInPixels
|
||||
// {___w} = N/screenHeightInPixels
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// Not used on 360, but used on PS3 and PC.
|
||||
// This must be from a constant/uniform.
|
||||
// {x___} = -2.0/screenWidthInPixels
|
||||
// {_y__} = -2.0/screenHeightInPixels
|
||||
// {__z_} = 2.0/screenWidthInPixels
|
||||
// {___w} = 2.0/screenHeightInPixels
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt2,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// Only used on 360 in place of fxaaConsoleRcpFrameOpt2.
|
||||
// This must be from a constant/uniform.
|
||||
// {x___} = 8.0/screenWidthInPixels
|
||||
// {_y__} = 8.0/screenHeightInPixels
|
||||
// {__z_} = -4.0/screenWidthInPixels
|
||||
// {___w} = -4.0/screenHeightInPixels
|
||||
FxaaFloat4 fxaaConsole360RcpFrameOpt2,
|
||||
//
|
||||
// Only used on FXAA Quality.
|
||||
// This used to be the FXAA_QUALITY__SUBPIX define.
|
||||
// It is here now to allow easier tuning.
|
||||
// Choose the amount of sub-pixel aliasing removal.
|
||||
// This can effect sharpness.
|
||||
// 1.00 - upper limit (softer)
|
||||
// 0.75 - default amount of filtering
|
||||
// 0.50 - lower limit (sharper, less sub-pixel aliasing removal)
|
||||
// 0.25 - almost off
|
||||
// 0.00 - completely off
|
||||
FxaaFloat fxaaQualitySubpix,
|
||||
//
|
||||
// Only used on FXAA Quality.
|
||||
// This used to be the FXAA_QUALITY__EDGE_THRESHOLD define.
|
||||
// It is here now to allow easier tuning.
|
||||
// The minimum amount of local contrast required to apply algorithm.
|
||||
// 0.333 - too little (faster)
|
||||
// 0.250 - low quality
|
||||
// 0.166 - default
|
||||
// 0.125 - high quality
|
||||
// 0.063 - overkill (slower)
|
||||
FxaaFloat fxaaQualityEdgeThreshold,
|
||||
//
|
||||
// Only used on FXAA Quality.
|
||||
// This used to be the FXAA_QUALITY__EDGE_THRESHOLD_MIN define.
|
||||
// It is here now to allow easier tuning.
|
||||
// Trims the algorithm from processing darks.
|
||||
// 0.0833 - upper limit (default, the start of visible unfiltered edges)
|
||||
// 0.0625 - high quality (faster)
|
||||
// 0.0312 - visible limit (slower)
|
||||
// Special notes when using FXAA_GREEN_AS_LUMA,
|
||||
// Likely want to set this to zero.
|
||||
// As colors that are mostly not-green
|
||||
// will appear very dark in the green channel!
|
||||
// Tune by looking at mostly non-green content,
|
||||
// then start at zero and increase until aliasing is a problem.
|
||||
FxaaFloat fxaaQualityEdgeThresholdMin,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// This used to be the FXAA_CONSOLE__EDGE_SHARPNESS define.
|
||||
// It is here now to allow easier tuning.
|
||||
// This does not effect PS3, as this needs to be compiled in.
|
||||
// Use FXAA_CONSOLE__PS3_EDGE_SHARPNESS for PS3.
|
||||
// Due to the PS3 being ALU bound,
|
||||
// there are only three safe values here: 2 and 4 and 8.
|
||||
// These options use the shaders ability to a free *|/ by 2|4|8.
|
||||
// For all other platforms can be a non-power of two.
|
||||
// 8.0 is sharper (default!!!)
|
||||
// 4.0 is softer
|
||||
// 2.0 is really soft (good only for vector graphics inputs)
|
||||
FxaaFloat fxaaConsoleEdgeSharpness,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// This used to be the FXAA_CONSOLE__EDGE_THRESHOLD define.
|
||||
// It is here now to allow easier tuning.
|
||||
// This does not effect PS3, as this needs to be compiled in.
|
||||
// Use FXAA_CONSOLE__PS3_EDGE_THRESHOLD for PS3.
|
||||
// Due to the PS3 being ALU bound,
|
||||
// there are only two safe values here: 1/4 and 1/8.
|
||||
// These options use the shaders ability to a free *|/ by 2|4|8.
|
||||
// The console setting has a different mapping than the quality setting.
|
||||
// Other platforms can use other values.
|
||||
// 0.125 leaves less aliasing, but is softer (default!!!)
|
||||
// 0.25 leaves more aliasing, and is sharper
|
||||
FxaaFloat fxaaConsoleEdgeThreshold,
|
||||
//
|
||||
// Only used on FXAA Console.
|
||||
// This used to be the FXAA_CONSOLE__EDGE_THRESHOLD_MIN define.
|
||||
// It is here now to allow easier tuning.
|
||||
// Trims the algorithm from processing darks.
|
||||
// The console setting has a different mapping than the quality setting.
|
||||
// This only applies when FXAA_EARLY_EXIT is 1.
|
||||
// This does not apply to PS3,
|
||||
// PS3 was simplified to avoid more shader instructions.
|
||||
// 0.06 - faster but more aliasing in darks
|
||||
// 0.05 - default
|
||||
// 0.04 - slower and less aliasing in darks
|
||||
// Special notes when using FXAA_GREEN_AS_LUMA,
|
||||
// Likely want to set this to zero.
|
||||
// As colors that are mostly not-green
|
||||
// will appear very dark in the green channel!
|
||||
// Tune by looking at mostly non-green content,
|
||||
// then start at zero and increase until aliasing is a problem.
|
||||
FxaaFloat fxaaConsoleEdgeThresholdMin,
|
||||
//
|
||||
// Extra constants for 360 FXAA Console only.
|
||||
// Use zeros or anything else for other platforms.
|
||||
// These must be in physical constant registers and NOT immedates.
|
||||
// Immedates will result in compiler un-optimizing.
|
||||
// {xyzw} = float4(1.0, -1.0, 0.25, -0.25)
|
||||
FxaaFloat4 fxaaConsole360ConstDir
|
||||
) {
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat2 posM;
|
||||
posM.x = pos.x;
|
||||
posM.y = pos.y;
|
||||
#if (FXAA_GATHER4_ALPHA == 1)
|
||||
#if (FXAA_DISCARD == 0)
|
||||
FxaaFloat4 rgbyM = FxaaTexTop(tex, posM);
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
#define lumaM rgbyM.w
|
||||
#else
|
||||
#define lumaM rgbyM.y
|
||||
#endif
|
||||
#endif
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
FxaaFloat4 luma4A = FxaaTexAlpha4(tex, posM);
|
||||
FxaaFloat4 luma4B = FxaaTexOffAlpha4(tex, posM, FxaaInt2(-1, -1));
|
||||
#else
|
||||
FxaaFloat4 luma4A = FxaaTexGreen4(tex, posM);
|
||||
FxaaFloat4 luma4B = FxaaTexOffGreen4(tex, posM, FxaaInt2(-1, -1));
|
||||
#endif
|
||||
#if (FXAA_DISCARD == 1)
|
||||
#define lumaM luma4A.w
|
||||
#endif
|
||||
#define lumaE luma4A.z
|
||||
#define lumaS luma4A.x
|
||||
#define lumaSE luma4A.y
|
||||
#define lumaNW luma4B.w
|
||||
#define lumaN luma4B.z
|
||||
#define lumaW luma4B.x
|
||||
#else
|
||||
FxaaFloat4 rgbyM = FxaaTexTop(tex, posM);
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
#define lumaM rgbyM.w
|
||||
#else
|
||||
#define lumaM rgbyM.y
|
||||
#endif
|
||||
FxaaFloat lumaS = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 0, 1), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1, 0), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaN = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 0,-1), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 0), fxaaQualityRcpFrame.xy));
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat maxSM = max(lumaS, lumaM);
|
||||
FxaaFloat minSM = min(lumaS, lumaM);
|
||||
FxaaFloat maxESM = max(lumaE, maxSM);
|
||||
FxaaFloat minESM = min(lumaE, minSM);
|
||||
FxaaFloat maxWN = max(lumaN, lumaW);
|
||||
FxaaFloat minWN = min(lumaN, lumaW);
|
||||
FxaaFloat rangeMax = max(maxWN, maxESM);
|
||||
FxaaFloat rangeMin = min(minWN, minESM);
|
||||
FxaaFloat rangeMaxScaled = rangeMax * fxaaQualityEdgeThreshold;
|
||||
FxaaFloat range = rangeMax - rangeMin;
|
||||
FxaaFloat rangeMaxClamped = max(fxaaQualityEdgeThresholdMin, rangeMaxScaled);
|
||||
FxaaBool earlyExit = range < rangeMaxClamped;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
if(earlyExit)
|
||||
#if (FXAA_DISCARD == 1)
|
||||
FxaaDiscard;
|
||||
#else
|
||||
return rgbyM;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_GATHER4_ALPHA == 0)
|
||||
FxaaFloat lumaNW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1,-1), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaSE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1, 1), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaNE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1,-1), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaSW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 1), fxaaQualityRcpFrame.xy));
|
||||
#else
|
||||
FxaaFloat lumaNE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(1, -1), fxaaQualityRcpFrame.xy));
|
||||
FxaaFloat lumaSW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 1), fxaaQualityRcpFrame.xy));
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaNS = lumaN + lumaS;
|
||||
FxaaFloat lumaWE = lumaW + lumaE;
|
||||
FxaaFloat subpixRcpRange = 1.0/range;
|
||||
FxaaFloat subpixNSWE = lumaNS + lumaWE;
|
||||
FxaaFloat edgeHorz1 = (-2.0 * lumaM) + lumaNS;
|
||||
FxaaFloat edgeVert1 = (-2.0 * lumaM) + lumaWE;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaNESE = lumaNE + lumaSE;
|
||||
FxaaFloat lumaNWNE = lumaNW + lumaNE;
|
||||
FxaaFloat edgeHorz2 = (-2.0 * lumaE) + lumaNESE;
|
||||
FxaaFloat edgeVert2 = (-2.0 * lumaN) + lumaNWNE;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaNWSW = lumaNW + lumaSW;
|
||||
FxaaFloat lumaSWSE = lumaSW + lumaSE;
|
||||
FxaaFloat edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2);
|
||||
FxaaFloat edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2);
|
||||
FxaaFloat edgeHorz3 = (-2.0 * lumaW) + lumaNWSW;
|
||||
FxaaFloat edgeVert3 = (-2.0 * lumaS) + lumaSWSE;
|
||||
FxaaFloat edgeHorz = abs(edgeHorz3) + edgeHorz4;
|
||||
FxaaFloat edgeVert = abs(edgeVert3) + edgeVert4;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat subpixNWSWNESE = lumaNWSW + lumaNESE;
|
||||
FxaaFloat lengthSign = fxaaQualityRcpFrame.x;
|
||||
FxaaBool horzSpan = edgeHorz >= edgeVert;
|
||||
FxaaFloat subpixA = subpixNSWE * 2.0 + subpixNWSWNESE;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
if(!horzSpan) lumaN = lumaW;
|
||||
if(!horzSpan) lumaS = lumaE;
|
||||
if(horzSpan) lengthSign = fxaaQualityRcpFrame.y;
|
||||
FxaaFloat subpixB = (subpixA * (1.0/12.0)) - lumaM;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat gradientN = lumaN - lumaM;
|
||||
FxaaFloat gradientS = lumaS - lumaM;
|
||||
FxaaFloat lumaNN = lumaN + lumaM;
|
||||
FxaaFloat lumaSS = lumaS + lumaM;
|
||||
FxaaBool pairN = abs(gradientN) >= abs(gradientS);
|
||||
FxaaFloat gradient = max(abs(gradientN), abs(gradientS));
|
||||
if(pairN) lengthSign = -lengthSign;
|
||||
FxaaFloat subpixC = FxaaSat(abs(subpixB) * subpixRcpRange);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat2 posB;
|
||||
posB.x = posM.x;
|
||||
posB.y = posM.y;
|
||||
FxaaFloat2 offNP;
|
||||
offNP.x = (!horzSpan) ? 0.0 : fxaaQualityRcpFrame.x;
|
||||
offNP.y = ( horzSpan) ? 0.0 : fxaaQualityRcpFrame.y;
|
||||
if(!horzSpan) posB.x += lengthSign * 0.5;
|
||||
if( horzSpan) posB.y += lengthSign * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat2 posN;
|
||||
posN.x = posB.x - offNP.x * FXAA_QUALITY__P0;
|
||||
posN.y = posB.y - offNP.y * FXAA_QUALITY__P0;
|
||||
FxaaFloat2 posP;
|
||||
posP.x = posB.x + offNP.x * FXAA_QUALITY__P0;
|
||||
posP.y = posB.y + offNP.y * FXAA_QUALITY__P0;
|
||||
FxaaFloat subpixD = ((-2.0)*subpixC) + 3.0;
|
||||
FxaaFloat lumaEndN = FxaaLuma(FxaaTexTop(tex, posN));
|
||||
FxaaFloat subpixE = subpixC * subpixC;
|
||||
FxaaFloat lumaEndP = FxaaLuma(FxaaTexTop(tex, posP));
|
||||
/*--------------------------------------------------------------------------*/
|
||||
if(!pairN) lumaNN = lumaSS;
|
||||
FxaaFloat gradientScaled = gradient * 1.0/4.0;
|
||||
FxaaFloat lumaMM = lumaM - lumaNN * 0.5;
|
||||
FxaaFloat subpixF = subpixD * subpixE;
|
||||
FxaaBool lumaMLTZero = lumaMM < 0.0;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
lumaEndN -= lumaNN * 0.5;
|
||||
lumaEndP -= lumaNN * 0.5;
|
||||
FxaaBool doneN = abs(lumaEndN) >= gradientScaled;
|
||||
FxaaBool doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P1;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P1;
|
||||
FxaaBool doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P1;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P1;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P2;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P2;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P2;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P2;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 3)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P3;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P3;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P3;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P3;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 4)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P4;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P4;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P4;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P4;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 5)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P5;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P5;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P5;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 6)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P6;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P6;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P6;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P6;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 7)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P7;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P7;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P7;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P7;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 8)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P8;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P8;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P8;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P8;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 9)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P9;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P9;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P9;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P9;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 10)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P10;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P10;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P10;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P10;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 11)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P11;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P11;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P11;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P11;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_QUALITY__PS > 12)
|
||||
if(doneNP) {
|
||||
if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
|
||||
if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
|
||||
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
||||
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
||||
doneN = abs(lumaEndN) >= gradientScaled;
|
||||
doneP = abs(lumaEndP) >= gradientScaled;
|
||||
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P12;
|
||||
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P12;
|
||||
doneNP = (!doneN) || (!doneP);
|
||||
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P12;
|
||||
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P12;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
}
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat dstN = posM.x - posN.x;
|
||||
FxaaFloat dstP = posP.x - posM.x;
|
||||
if(!horzSpan) dstN = posM.y - posN.y;
|
||||
if(!horzSpan) dstP = posP.y - posM.y;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaBool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero;
|
||||
FxaaFloat spanLength = (dstP + dstN);
|
||||
FxaaBool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero;
|
||||
FxaaFloat spanLengthRcp = 1.0/spanLength;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaBool directionN = dstN < dstP;
|
||||
FxaaFloat dst = min(dstN, dstP);
|
||||
FxaaBool goodSpan = directionN ? goodSpanN : goodSpanP;
|
||||
FxaaFloat subpixG = subpixF * subpixF;
|
||||
FxaaFloat pixelOffset = (dst * (-spanLengthRcp)) + 0.5;
|
||||
FxaaFloat subpixH = subpixG * fxaaQualitySubpix;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat pixelOffsetGood = goodSpan ? pixelOffset : 0.0;
|
||||
FxaaFloat pixelOffsetSubpix = max(pixelOffsetGood, subpixH);
|
||||
if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign;
|
||||
if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign;
|
||||
#if (FXAA_DISCARD == 1)
|
||||
return FxaaTexTop(tex, posM);
|
||||
#else
|
||||
return FxaaFloat4(FxaaTexTop(tex, posM).xyz, lumaM);
|
||||
#endif
|
||||
}
|
||||
/*==========================================================================*/
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
FXAA3 CONSOLE - PC VERSION
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
Instead of using this on PC, I'd suggest just using FXAA Quality with
|
||||
#define FXAA_QUALITY__PRESET 10
|
||||
Or
|
||||
#define FXAA_QUALITY__PRESET 20
|
||||
Either are higher qualilty and almost as fast as this on modern PC GPUs.
|
||||
============================================================================*/
|
||||
#if (FXAA_PC_CONSOLE == 1)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat4 FxaaPixelShader(
|
||||
// See FXAA Quality FxaaPixelShader() source for docs on Inputs!
|
||||
FxaaFloat2 pos,
|
||||
FxaaFloat4 fxaaConsolePosPos,
|
||||
FxaaTex tex,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegOne,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegTwo,
|
||||
FxaaFloat2 fxaaQualityRcpFrame,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt2,
|
||||
FxaaFloat4 fxaaConsole360RcpFrameOpt2,
|
||||
FxaaFloat fxaaQualitySubpix,
|
||||
FxaaFloat fxaaQualityEdgeThreshold,
|
||||
FxaaFloat fxaaQualityEdgeThresholdMin,
|
||||
FxaaFloat fxaaConsoleEdgeSharpness,
|
||||
FxaaFloat fxaaConsoleEdgeThreshold,
|
||||
FxaaFloat fxaaConsoleEdgeThresholdMin,
|
||||
FxaaFloat4 fxaaConsole360ConstDir
|
||||
) {
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaNw = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.xy));
|
||||
FxaaFloat lumaSw = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.xw));
|
||||
FxaaFloat lumaNe = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.zy));
|
||||
FxaaFloat lumaSe = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.zw));
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat4 rgbyM = FxaaTexTop(tex, pos.xy);
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
FxaaFloat lumaM = rgbyM.w;
|
||||
#else
|
||||
FxaaFloat lumaM = rgbyM.y;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaMaxNwSw = max(lumaNw, lumaSw);
|
||||
lumaNe += 1.0/384.0;
|
||||
FxaaFloat lumaMinNwSw = min(lumaNw, lumaSw);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaMaxNeSe = max(lumaNe, lumaSe);
|
||||
FxaaFloat lumaMinNeSe = min(lumaNe, lumaSe);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaMax = max(lumaMaxNeSe, lumaMaxNwSw);
|
||||
FxaaFloat lumaMin = min(lumaMinNeSe, lumaMinNwSw);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaMaxScaled = lumaMax * fxaaConsoleEdgeThreshold;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat lumaMinM = min(lumaMin, lumaM);
|
||||
FxaaFloat lumaMaxScaledClamped = max(fxaaConsoleEdgeThresholdMin, lumaMaxScaled);
|
||||
FxaaFloat lumaMaxM = max(lumaMax, lumaM);
|
||||
FxaaFloat dirSwMinusNe = lumaSw - lumaNe;
|
||||
FxaaFloat lumaMaxSubMinM = lumaMaxM - lumaMinM;
|
||||
FxaaFloat dirSeMinusNw = lumaSe - lumaNw;
|
||||
if(lumaMaxSubMinM < lumaMaxScaledClamped) return rgbyM;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat2 dir;
|
||||
dir.x = dirSwMinusNe + dirSeMinusNw;
|
||||
dir.y = dirSwMinusNe - dirSeMinusNw;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat2 dir1 = normalize(dir.xy);
|
||||
FxaaFloat4 rgbyN1 = FxaaTexTop(tex, pos.xy - dir1 * fxaaConsoleRcpFrameOpt.zw);
|
||||
FxaaFloat4 rgbyP1 = FxaaTexTop(tex, pos.xy + dir1 * fxaaConsoleRcpFrameOpt.zw);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat dirAbsMinTimesC = min(abs(dir1.x), abs(dir1.y)) * fxaaConsoleEdgeSharpness;
|
||||
FxaaFloat2 dir2 = clamp(dir1.xy / dirAbsMinTimesC, -2.0, 2.0);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat4 rgbyN2 = FxaaTexTop(tex, pos.xy - dir2 * fxaaConsoleRcpFrameOpt2.zw);
|
||||
FxaaFloat4 rgbyP2 = FxaaTexTop(tex, pos.xy + dir2 * fxaaConsoleRcpFrameOpt2.zw);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
FxaaFloat4 rgbyA = rgbyN1 + rgbyP1;
|
||||
FxaaFloat4 rgbyB = ((rgbyN2 + rgbyP2) * 0.25) + (rgbyA * 0.25);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
FxaaBool twoTap = (rgbyB.w < lumaMin) || (rgbyB.w > lumaMax);
|
||||
#else
|
||||
FxaaBool twoTap = (rgbyB.y < lumaMin) || (rgbyB.y > lumaMax);
|
||||
#endif
|
||||
if(twoTap) rgbyB.xyz = rgbyA.xyz * 0.5;
|
||||
return rgbyB; }
|
||||
/*==========================================================================*/
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
FXAA3 CONSOLE - 360 PIXEL SHADER
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
This optimized version thanks to suggestions from Andy Luedke.
|
||||
Should be fully tex bound in all cases.
|
||||
As of the FXAA 3.11 release, I have still not tested this code,
|
||||
however I fixed a bug which was in both FXAA 3.9 and FXAA 3.10.
|
||||
And note this is replacing the old unoptimized version.
|
||||
If it does not work, please let me know so I can fix it.
|
||||
============================================================================*/
|
||||
#if (FXAA_360 == 1)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
[reduceTempRegUsage(4)]
|
||||
float4 FxaaPixelShader(
|
||||
// See FXAA Quality FxaaPixelShader() source for docs on Inputs!
|
||||
FxaaFloat2 pos,
|
||||
FxaaFloat4 fxaaConsolePosPos,
|
||||
FxaaTex tex,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegOne,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegTwo,
|
||||
FxaaFloat2 fxaaQualityRcpFrame,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt2,
|
||||
FxaaFloat4 fxaaConsole360RcpFrameOpt2,
|
||||
FxaaFloat fxaaQualitySubpix,
|
||||
FxaaFloat fxaaQualityEdgeThreshold,
|
||||
FxaaFloat fxaaQualityEdgeThresholdMin,
|
||||
FxaaFloat fxaaConsoleEdgeSharpness,
|
||||
FxaaFloat fxaaConsoleEdgeThreshold,
|
||||
FxaaFloat fxaaConsoleEdgeThresholdMin,
|
||||
FxaaFloat4 fxaaConsole360ConstDir
|
||||
) {
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 lumaNwNeSwSe;
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
asm {
|
||||
tfetch2D lumaNwNeSwSe.w___, tex, pos.xy, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false
|
||||
tfetch2D lumaNwNeSwSe._w__, tex, pos.xy, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false
|
||||
tfetch2D lumaNwNeSwSe.__w_, tex, pos.xy, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false
|
||||
tfetch2D lumaNwNeSwSe.___w, tex, pos.xy, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false
|
||||
};
|
||||
#else
|
||||
asm {
|
||||
tfetch2D lumaNwNeSwSe.y___, tex, pos.xy, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false
|
||||
tfetch2D lumaNwNeSwSe._y__, tex, pos.xy, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false
|
||||
tfetch2D lumaNwNeSwSe.__y_, tex, pos.xy, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false
|
||||
tfetch2D lumaNwNeSwSe.___y, tex, pos.xy, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false
|
||||
};
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
lumaNwNeSwSe.y += 1.0/384.0;
|
||||
float2 lumaMinTemp = min(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);
|
||||
float2 lumaMaxTemp = max(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);
|
||||
float lumaMin = min(lumaMinTemp.x, lumaMinTemp.y);
|
||||
float lumaMax = max(lumaMaxTemp.x, lumaMaxTemp.y);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 rgbyM = tex2Dlod(tex, float4(pos.xy, 0.0, 0.0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
float lumaMinM = min(lumaMin, rgbyM.w);
|
||||
float lumaMaxM = max(lumaMax, rgbyM.w);
|
||||
#else
|
||||
float lumaMinM = min(lumaMin, rgbyM.y);
|
||||
float lumaMaxM = max(lumaMax, rgbyM.y);
|
||||
#endif
|
||||
if((lumaMaxM - lumaMinM) < max(fxaaConsoleEdgeThresholdMin, lumaMax * fxaaConsoleEdgeThreshold)) return rgbyM;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float2 dir;
|
||||
dir.x = dot(lumaNwNeSwSe, fxaaConsole360ConstDir.yyxx);
|
||||
dir.y = dot(lumaNwNeSwSe, fxaaConsole360ConstDir.xyxy);
|
||||
dir = normalize(dir);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 dir1 = dir.xyxy * fxaaConsoleRcpFrameOpt.xyzw;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 dir2;
|
||||
float dirAbsMinTimesC = min(abs(dir.x), abs(dir.y)) * fxaaConsoleEdgeSharpness;
|
||||
dir2 = saturate(fxaaConsole360ConstDir.zzww * dir.xyxy / dirAbsMinTimesC + 0.5);
|
||||
dir2 = dir2 * fxaaConsole360RcpFrameOpt2.xyxy + fxaaConsole360RcpFrameOpt2.zwzw;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 rgbyN1 = tex2Dlod(fxaaConsole360TexExpBiasNegOne, float4(pos.xy + dir1.xy, 0.0, 0.0));
|
||||
float4 rgbyP1 = tex2Dlod(fxaaConsole360TexExpBiasNegOne, float4(pos.xy + dir1.zw, 0.0, 0.0));
|
||||
float4 rgbyN2 = tex2Dlod(fxaaConsole360TexExpBiasNegTwo, float4(pos.xy + dir2.xy, 0.0, 0.0));
|
||||
float4 rgbyP2 = tex2Dlod(fxaaConsole360TexExpBiasNegTwo, float4(pos.xy + dir2.zw, 0.0, 0.0));
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 rgbyA = rgbyN1 + rgbyP1;
|
||||
float4 rgbyB = rgbyN2 + rgbyP2 + rgbyA * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
float4 rgbyR = ((FxaaLuma(rgbyB) - lumaMax) > 0.0) ? rgbyA : rgbyB;
|
||||
rgbyR = ((FxaaLuma(rgbyB) - lumaMin) > 0.0) ? rgbyR : rgbyA;
|
||||
return rgbyR; }
|
||||
/*==========================================================================*/
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (NO EARLY EXIT)
|
||||
|
||||
==============================================================================
|
||||
The code below does not exactly match the assembly.
|
||||
I have a feeling that 12 cycles is possible, but was not able to get there.
|
||||
Might have to increase register count to get full performance.
|
||||
Note this shader does not use perspective interpolation.
|
||||
|
||||
Use the following cgc options,
|
||||
|
||||
--fenable-bx2 --fastmath --fastprecision --nofloatbindings
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
NVSHADERPERF OUTPUT
|
||||
------------------------------------------------------------------------------
|
||||
For reference and to aid in debug, output of NVShaderPerf should match this,
|
||||
|
||||
Shader to schedule:
|
||||
0: texpkb h0.w(TRUE), v5.zyxx, #0
|
||||
2: addh h2.z(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x
|
||||
4: texpkb h0.w(TRUE), v5.xwxx, #0
|
||||
6: addh h0.z(TRUE), -h2, h0.w
|
||||
7: texpkb h1.w(TRUE), v5, #0
|
||||
9: addh h0.x(TRUE), h0.z, -h1.w
|
||||
10: addh h3.w(TRUE), h0.z, h1
|
||||
11: texpkb h2.w(TRUE), v5.zwzz, #0
|
||||
13: addh h0.z(TRUE), h3.w, -h2.w
|
||||
14: addh h0.x(TRUE), h2.w, h0
|
||||
15: nrmh h1.xz(TRUE), h0_n
|
||||
16: minh_m8 h0.x(TRUE), |h1|, |h1.z|
|
||||
17: maxh h4.w(TRUE), h0, h1
|
||||
18: divx h2.xy(TRUE), h1_n.xzzw, h0_n
|
||||
19: movr r1.zw(TRUE), v4.xxxy
|
||||
20: madr r2.xz(TRUE), -h1, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zzww, r1.zzww
|
||||
22: minh h5.w(TRUE), h0, h1
|
||||
23: texpkb h0(TRUE), r2.xzxx, #0
|
||||
25: madr r0.zw(TRUE), h1.xzxz, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w), r1
|
||||
27: maxh h4.x(TRUE), h2.z, h2.w
|
||||
28: texpkb h1(TRUE), r0.zwzz, #0
|
||||
30: addh_d2 h1(TRUE), h0, h1
|
||||
31: madr r0.xy(TRUE), -h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
|
||||
33: texpkb h0(TRUE), r0, #0
|
||||
35: minh h4.z(TRUE), h2, h2.w
|
||||
36: fenct TRUE
|
||||
37: madr r1.xy(TRUE), h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
|
||||
39: texpkb h2(TRUE), r1, #0
|
||||
41: addh_d2 h0(TRUE), h0, h2
|
||||
42: maxh h2.w(TRUE), h4, h4.x
|
||||
43: minh h2.x(TRUE), h5.w, h4.z
|
||||
44: addh_d2 h0(TRUE), h0, h1
|
||||
45: slth h2.x(TRUE), h0.w, h2
|
||||
46: sgth h2.w(TRUE), h0, h2
|
||||
47: movh h0(TRUE), h0
|
||||
48: addx.c0 rc(TRUE), h2, h2.w
|
||||
49: movh h0(c0.NE.x), h1
|
||||
|
||||
IPU0 ------ Simplified schedule: --------
|
||||
Pass | Unit | uOp | PC: Op
|
||||
-----+--------+------+-------------------------
|
||||
1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
|
||||
| TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
|
||||
| SCB1 | add | 2: ADDh h2.z, h0.--w-, const.--x-;
|
||||
| | |
|
||||
2 | SCT0/1 | mov | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0;
|
||||
| TEX | txl | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0;
|
||||
| SCB1 | add | 6: ADDh h0.z,-h2, h0.--w-;
|
||||
| | |
|
||||
3 | SCT0/1 | mov | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0;
|
||||
| TEX | txl | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0;
|
||||
| SCB0 | add | 9: ADDh h0.x, h0.z---,-h1.w---;
|
||||
| SCB1 | add | 10: ADDh h3.w, h0.---z, h1;
|
||||
| | |
|
||||
4 | SCT0/1 | mov | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
|
||||
| TEX | txl | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
|
||||
| SCB0 | add | 14: ADDh h0.x, h2.w---, h0;
|
||||
| SCB1 | add | 13: ADDh h0.z, h3.--w-,-h2.--w-;
|
||||
| | |
|
||||
5 | SCT1 | mov | 15: NRMh h1.xz, h0;
|
||||
| SRB | nrm | 15: NRMh h1.xz, h0;
|
||||
| SCB0 | min | 16: MINh*8 h0.x, |h1|, |h1.z---|;
|
||||
| SCB1 | max | 17: MAXh h4.w, h0, h1;
|
||||
| | |
|
||||
6 | SCT0 | div | 18: DIVx h2.xy, h1.xz--, h0;
|
||||
| SCT1 | mov | 19: MOVr r1.zw, g[TEX0].--xy;
|
||||
| SCB0 | mad | 20: MADr r2.xz,-h1, const.z-w-, r1.z-w-;
|
||||
| SCB1 | min | 22: MINh h5.w, h0, h1;
|
||||
| | |
|
||||
7 | SCT0/1 | mov | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0;
|
||||
| TEX | txl | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0;
|
||||
| SCB0 | max | 27: MAXh h4.x, h2.z---, h2.w---;
|
||||
| SCB1 | mad | 25: MADr r0.zw, h1.--xz, const, r1;
|
||||
| | |
|
||||
8 | SCT0/1 | mov | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0;
|
||||
| TEX | txl | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0;
|
||||
| SCB0/1 | add | 30: ADDh/2 h1, h0, h1;
|
||||
| | |
|
||||
9 | SCT0 | mad | 31: MADr r0.xy,-h2, const.xy--, r1.zw--;
|
||||
| SCT1 | mov | 33: TXLr h0, r0, const.zzzz, TEX0;
|
||||
| TEX | txl | 33: TXLr h0, r0, const.zzzz, TEX0;
|
||||
| SCB1 | min | 35: MINh h4.z, h2, h2.--w-;
|
||||
| | |
|
||||
10 | SCT0 | mad | 37: MADr r1.xy, h2, const.xy--, r1.zw--;
|
||||
| SCT1 | mov | 39: TXLr h2, r1, const.zzzz, TEX0;
|
||||
| TEX | txl | 39: TXLr h2, r1, const.zzzz, TEX0;
|
||||
| SCB0/1 | add | 41: ADDh/2 h0, h0, h2;
|
||||
| | |
|
||||
11 | SCT0 | min | 43: MINh h2.x, h5.w---, h4.z---;
|
||||
| SCT1 | max | 42: MAXh h2.w, h4, h4.---x;
|
||||
| SCB0/1 | add | 44: ADDh/2 h0, h0, h1;
|
||||
| | |
|
||||
12 | SCT0 | set | 45: SLTh h2.x, h0.w---, h2;
|
||||
| SCT1 | set | 46: SGTh h2.w, h0, h2;
|
||||
| SCB0/1 | mul | 47: MOVh h0, h0;
|
||||
| | |
|
||||
13 | SCT0 | mad | 48: ADDxc0_s rc, h2, h2.w---;
|
||||
| SCB0/1 | mul | 49: MOVh h0(NE0.xxxx), h1;
|
||||
|
||||
Pass SCT TEX SCB
|
||||
1: 0% 100% 25%
|
||||
2: 0% 100% 25%
|
||||
3: 0% 100% 50%
|
||||
4: 0% 100% 50%
|
||||
5: 0% 0% 50%
|
||||
6: 100% 0% 75%
|
||||
7: 0% 100% 75%
|
||||
8: 0% 100% 100%
|
||||
9: 0% 100% 25%
|
||||
10: 0% 100% 100%
|
||||
11: 50% 0% 100%
|
||||
12: 50% 0% 100%
|
||||
13: 25% 0% 100%
|
||||
|
||||
MEAN: 17% 61% 67%
|
||||
|
||||
Pass SCT0 SCT1 TEX SCB0 SCB1
|
||||
1: 0% 0% 100% 0% 100%
|
||||
2: 0% 0% 100% 0% 100%
|
||||
3: 0% 0% 100% 100% 100%
|
||||
4: 0% 0% 100% 100% 100%
|
||||
5: 0% 0% 0% 100% 100%
|
||||
6: 100% 100% 0% 100% 100%
|
||||
7: 0% 0% 100% 100% 100%
|
||||
8: 0% 0% 100% 100% 100%
|
||||
9: 0% 0% 100% 0% 100%
|
||||
10: 0% 0% 100% 100% 100%
|
||||
11: 100% 100% 0% 100% 100%
|
||||
12: 100% 100% 0% 100% 100%
|
||||
13: 100% 0% 0% 100% 100%
|
||||
|
||||
MEAN: 30% 23% 61% 76% 100%
|
||||
Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5
|
||||
Results 13 cycles, 3 r regs, 923,076,923 pixels/s
|
||||
============================================================================*/
|
||||
#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 0)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#pragma regcount 7
|
||||
#pragma disablepc all
|
||||
#pragma option O3
|
||||
#pragma option OutColorPrec=fp16
|
||||
#pragma texformat default RGBA8
|
||||
/*==========================================================================*/
|
||||
half4 FxaaPixelShader(
|
||||
// See FXAA Quality FxaaPixelShader() source for docs on Inputs!
|
||||
FxaaFloat2 pos,
|
||||
FxaaFloat4 fxaaConsolePosPos,
|
||||
FxaaTex tex,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegOne,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegTwo,
|
||||
FxaaFloat2 fxaaQualityRcpFrame,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt2,
|
||||
FxaaFloat4 fxaaConsole360RcpFrameOpt2,
|
||||
FxaaFloat fxaaQualitySubpix,
|
||||
FxaaFloat fxaaQualityEdgeThreshold,
|
||||
FxaaFloat fxaaQualityEdgeThresholdMin,
|
||||
FxaaFloat fxaaConsoleEdgeSharpness,
|
||||
FxaaFloat fxaaConsoleEdgeThreshold,
|
||||
FxaaFloat fxaaConsoleEdgeThresholdMin,
|
||||
FxaaFloat4 fxaaConsole360ConstDir
|
||||
) {
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (1)
|
||||
half4 dir;
|
||||
half4 lumaNe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zy, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
lumaNe.w += half(1.0/512.0);
|
||||
dir.x = -lumaNe.w;
|
||||
dir.z = -lumaNe.w;
|
||||
#else
|
||||
lumaNe.y += half(1.0/512.0);
|
||||
dir.x = -lumaNe.y;
|
||||
dir.z = -lumaNe.y;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (2)
|
||||
half4 lumaSw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xw, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
dir.x += lumaSw.w;
|
||||
dir.z += lumaSw.w;
|
||||
#else
|
||||
dir.x += lumaSw.y;
|
||||
dir.z += lumaSw.y;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (3)
|
||||
half4 lumaNw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xy, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
dir.x -= lumaNw.w;
|
||||
dir.z += lumaNw.w;
|
||||
#else
|
||||
dir.x -= lumaNw.y;
|
||||
dir.z += lumaNw.y;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (4)
|
||||
half4 lumaSe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zw, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
dir.x += lumaSe.w;
|
||||
dir.z -= lumaSe.w;
|
||||
#else
|
||||
dir.x += lumaSe.y;
|
||||
dir.z -= lumaSe.y;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (5)
|
||||
half4 dir1_pos;
|
||||
dir1_pos.xy = normalize(dir.xyz).xz;
|
||||
half dirAbsMinTimesC = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__PS3_EDGE_SHARPNESS);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (6)
|
||||
half4 dir2_pos;
|
||||
dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimesC, half(-2.0), half(2.0));
|
||||
dir1_pos.zw = pos.xy;
|
||||
dir2_pos.zw = pos.xy;
|
||||
half4 temp1N;
|
||||
temp1N.xy = dir1_pos.zw - dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (7)
|
||||
temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0));
|
||||
half4 rgby1;
|
||||
rgby1.xy = dir1_pos.zw + dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (8)
|
||||
rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0));
|
||||
rgby1 = (temp1N + rgby1) * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (9)
|
||||
half4 temp2N;
|
||||
temp2N.xy = dir2_pos.zw - dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw;
|
||||
temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0));
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (10)
|
||||
half4 rgby2;
|
||||
rgby2.xy = dir2_pos.zw + dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw;
|
||||
rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0));
|
||||
rgby2 = (temp2N + rgby2) * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (11)
|
||||
// compilier moves these scalar ops up to other cycles
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w));
|
||||
half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w));
|
||||
#else
|
||||
half lumaMin = min(min(lumaNw.y, lumaSw.y), min(lumaNe.y, lumaSe.y));
|
||||
half lumaMax = max(max(lumaNw.y, lumaSw.y), max(lumaNe.y, lumaSe.y));
|
||||
#endif
|
||||
rgby2 = (rgby2 + rgby1) * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (12)
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
bool twoTapLt = rgby2.w < lumaMin;
|
||||
bool twoTapGt = rgby2.w > lumaMax;
|
||||
#else
|
||||
bool twoTapLt = rgby2.y < lumaMin;
|
||||
bool twoTapGt = rgby2.y > lumaMax;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (13)
|
||||
if(twoTapLt || twoTapGt) rgby2 = rgby1;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
return rgby2; }
|
||||
/*==========================================================================*/
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*============================================================================
|
||||
|
||||
FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (WITH EARLY EXIT)
|
||||
|
||||
==============================================================================
|
||||
The code mostly matches the assembly.
|
||||
I have a feeling that 14 cycles is possible, but was not able to get there.
|
||||
Might have to increase register count to get full performance.
|
||||
Note this shader does not use perspective interpolation.
|
||||
|
||||
Use the following cgc options,
|
||||
|
||||
--fenable-bx2 --fastmath --fastprecision --nofloatbindings
|
||||
|
||||
Use of FXAA_GREEN_AS_LUMA currently adds a cycle (16 clks).
|
||||
Will look at fixing this for FXAA 3.12.
|
||||
------------------------------------------------------------------------------
|
||||
NVSHADERPERF OUTPUT
|
||||
------------------------------------------------------------------------------
|
||||
For reference and to aid in debug, output of NVShaderPerf should match this,
|
||||
|
||||
Shader to schedule:
|
||||
0: texpkb h0.w(TRUE), v5.zyxx, #0
|
||||
2: addh h2.y(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x
|
||||
4: texpkb h1.w(TRUE), v5.xwxx, #0
|
||||
6: addh h0.x(TRUE), h1.w, -h2.y
|
||||
7: texpkb h2.w(TRUE), v5.zwzz, #0
|
||||
9: minh h4.w(TRUE), h2.y, h2
|
||||
10: maxh h5.x(TRUE), h2.y, h2.w
|
||||
11: texpkb h0.w(TRUE), v5, #0
|
||||
13: addh h3.w(TRUE), -h0, h0.x
|
||||
14: addh h0.x(TRUE), h0.w, h0
|
||||
15: addh h0.z(TRUE), -h2.w, h0.x
|
||||
16: addh h0.x(TRUE), h2.w, h3.w
|
||||
17: minh h5.y(TRUE), h0.w, h1.w
|
||||
18: nrmh h2.xz(TRUE), h0_n
|
||||
19: minh_m8 h2.w(TRUE), |h2.x|, |h2.z|
|
||||
20: divx h4.xy(TRUE), h2_n.xzzw, h2_n.w
|
||||
21: movr r1.zw(TRUE), v4.xxxy
|
||||
22: maxh h2.w(TRUE), h0, h1
|
||||
23: fenct TRUE
|
||||
24: madr r0.xy(TRUE), -h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz
|
||||
26: texpkb h0(TRUE), r0, #0
|
||||
28: maxh h5.x(TRUE), h2.w, h5
|
||||
29: minh h5.w(TRUE), h5.y, h4
|
||||
30: madr r1.xy(TRUE), h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz
|
||||
32: texpkb h2(TRUE), r1, #0
|
||||
34: addh_d2 h2(TRUE), h0, h2
|
||||
35: texpkb h1(TRUE), v4, #0
|
||||
37: maxh h5.y(TRUE), h5.x, h1.w
|
||||
38: minh h4.w(TRUE), h1, h5
|
||||
39: madr r0.xy(TRUE), -h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
|
||||
41: texpkb h0(TRUE), r0, #0
|
||||
43: addh_m8 h5.z(TRUE), h5.y, -h4.w
|
||||
44: madr r2.xy(TRUE), h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
|
||||
46: texpkb h3(TRUE), r2, #0
|
||||
48: addh_d2 h0(TRUE), h0, h3
|
||||
49: addh_d2 h3(TRUE), h0, h2
|
||||
50: movh h0(TRUE), h3
|
||||
51: slth h3.x(TRUE), h3.w, h5.w
|
||||
52: sgth h3.w(TRUE), h3, h5.x
|
||||
53: addx.c0 rc(TRUE), h3.x, h3
|
||||
54: slth.c0 rc(TRUE), h5.z, h5
|
||||
55: movh h0(c0.NE.w), h2
|
||||
56: movh h0(c0.NE.x), h1
|
||||
|
||||
IPU0 ------ Simplified schedule: --------
|
||||
Pass | Unit | uOp | PC: Op
|
||||
-----+--------+------+-------------------------
|
||||
1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
|
||||
| TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
|
||||
| SCB0 | add | 2: ADDh h2.y, h0.-w--, const.-x--;
|
||||
| | |
|
||||
2 | SCT0/1 | mov | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0;
|
||||
| TEX | txl | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0;
|
||||
| SCB0 | add | 6: ADDh h0.x, h1.w---,-h2.y---;
|
||||
| | |
|
||||
3 | SCT0/1 | mov | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
|
||||
| TEX | txl | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
|
||||
| SCB0 | max | 10: MAXh h5.x, h2.y---, h2.w---;
|
||||
| SCB1 | min | 9: MINh h4.w, h2.---y, h2;
|
||||
| | |
|
||||
4 | SCT0/1 | mov | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0;
|
||||
| TEX | txl | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0;
|
||||
| SCB0 | add | 14: ADDh h0.x, h0.w---, h0;
|
||||
| SCB1 | add | 13: ADDh h3.w,-h0, h0.---x;
|
||||
| | |
|
||||
5 | SCT0 | mad | 16: ADDh h0.x, h2.w---, h3.w---;
|
||||
| SCT1 | mad | 15: ADDh h0.z,-h2.--w-, h0.--x-;
|
||||
| SCB0 | min | 17: MINh h5.y, h0.-w--, h1.-w--;
|
||||
| | |
|
||||
6 | SCT1 | mov | 18: NRMh h2.xz, h0;
|
||||
| SRB | nrm | 18: NRMh h2.xz, h0;
|
||||
| SCB1 | min | 19: MINh*8 h2.w, |h2.---x|, |h2.---z|;
|
||||
| | |
|
||||
7 | SCT0 | div | 20: DIVx h4.xy, h2.xz--, h2.ww--;
|
||||
| SCT1 | mov | 21: MOVr r1.zw, g[TEX0].--xy;
|
||||
| SCB1 | max | 22: MAXh h2.w, h0, h1;
|
||||
| | |
|
||||
8 | SCT0 | mad | 24: MADr r0.xy,-h2.xz--, const.zw--, r1.zw--;
|
||||
| SCT1 | mov | 26: TXLr h0, r0, const.xxxx, TEX0;
|
||||
| TEX | txl | 26: TXLr h0, r0, const.xxxx, TEX0;
|
||||
| SCB0 | max | 28: MAXh h5.x, h2.w---, h5;
|
||||
| SCB1 | min | 29: MINh h5.w, h5.---y, h4;
|
||||
| | |
|
||||
9 | SCT0 | mad | 30: MADr r1.xy, h2.xz--, const.zw--, r1.zw--;
|
||||
| SCT1 | mov | 32: TXLr h2, r1, const.xxxx, TEX0;
|
||||
| TEX | txl | 32: TXLr h2, r1, const.xxxx, TEX0;
|
||||
| SCB0/1 | add | 34: ADDh/2 h2, h0, h2;
|
||||
| | |
|
||||
10 | SCT0/1 | mov | 35: TXLr h1, g[TEX0], const.xxxx, TEX0;
|
||||
| TEX | txl | 35: TXLr h1, g[TEX0], const.xxxx, TEX0;
|
||||
| SCB0 | max | 37: MAXh h5.y, h5.-x--, h1.-w--;
|
||||
| SCB1 | min | 38: MINh h4.w, h1, h5;
|
||||
| | |
|
||||
11 | SCT0 | mad | 39: MADr r0.xy,-h4, const.xy--, r1.zw--;
|
||||
| SCT1 | mov | 41: TXLr h0, r0, const.zzzz, TEX0;
|
||||
| TEX | txl | 41: TXLr h0, r0, const.zzzz, TEX0;
|
||||
| SCB0 | mad | 44: MADr r2.xy, h4, const.xy--, r1.zw--;
|
||||
| SCB1 | add | 43: ADDh*8 h5.z, h5.--y-,-h4.--w-;
|
||||
| | |
|
||||
12 | SCT0/1 | mov | 46: TXLr h3, r2, const.xxxx, TEX0;
|
||||
| TEX | txl | 46: TXLr h3, r2, const.xxxx, TEX0;
|
||||
| SCB0/1 | add | 48: ADDh/2 h0, h0, h3;
|
||||
| | |
|
||||
13 | SCT0/1 | mad | 49: ADDh/2 h3, h0, h2;
|
||||
| SCB0/1 | mul | 50: MOVh h0, h3;
|
||||
| | |
|
||||
14 | SCT0 | set | 51: SLTh h3.x, h3.w---, h5.w---;
|
||||
| SCT1 | set | 52: SGTh h3.w, h3, h5.---x;
|
||||
| SCB0 | set | 54: SLThc0 rc, h5.z---, h5;
|
||||
| SCB1 | add | 53: ADDxc0_s rc, h3.---x, h3;
|
||||
| | |
|
||||
15 | SCT0/1 | mul | 55: MOVh h0(NE0.wwww), h2;
|
||||
| SCB0/1 | mul | 56: MOVh h0(NE0.xxxx), h1;
|
||||
|
||||
Pass SCT TEX SCB
|
||||
1: 0% 100% 25%
|
||||
2: 0% 100% 25%
|
||||
3: 0% 100% 50%
|
||||
4: 0% 100% 50%
|
||||
5: 50% 0% 25%
|
||||
6: 0% 0% 25%
|
||||
7: 100% 0% 25%
|
||||
8: 0% 100% 50%
|
||||
9: 0% 100% 100%
|
||||
10: 0% 100% 50%
|
||||
11: 0% 100% 75%
|
||||
12: 0% 100% 100%
|
||||
13: 100% 0% 100%
|
||||
14: 50% 0% 50%
|
||||
15: 100% 0% 100%
|
||||
|
||||
MEAN: 26% 60% 56%
|
||||
|
||||
Pass SCT0 SCT1 TEX SCB0 SCB1
|
||||
1: 0% 0% 100% 100% 0%
|
||||
2: 0% 0% 100% 100% 0%
|
||||
3: 0% 0% 100% 100% 100%
|
||||
4: 0% 0% 100% 100% 100%
|
||||
5: 100% 100% 0% 100% 0%
|
||||
6: 0% 0% 0% 0% 100%
|
||||
7: 100% 100% 0% 0% 100%
|
||||
8: 0% 0% 100% 100% 100%
|
||||
9: 0% 0% 100% 100% 100%
|
||||
10: 0% 0% 100% 100% 100%
|
||||
11: 0% 0% 100% 100% 100%
|
||||
12: 0% 0% 100% 100% 100%
|
||||
13: 100% 100% 0% 100% 100%
|
||||
14: 100% 100% 0% 100% 100%
|
||||
15: 100% 100% 0% 100% 100%
|
||||
|
||||
MEAN: 33% 33% 60% 86% 80%
|
||||
Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5
|
||||
Results 15 cycles, 3 r regs, 800,000,000 pixels/s
|
||||
============================================================================*/
|
||||
#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 1)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#pragma regcount 7
|
||||
#pragma disablepc all
|
||||
#pragma option O2
|
||||
#pragma option OutColorPrec=fp16
|
||||
#pragma texformat default RGBA8
|
||||
/*==========================================================================*/
|
||||
half4 FxaaPixelShader(
|
||||
// See FXAA Quality FxaaPixelShader() source for docs on Inputs!
|
||||
FxaaFloat2 pos,
|
||||
FxaaFloat4 fxaaConsolePosPos,
|
||||
FxaaTex tex,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegOne,
|
||||
FxaaTex fxaaConsole360TexExpBiasNegTwo,
|
||||
FxaaFloat2 fxaaQualityRcpFrame,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt,
|
||||
FxaaFloat4 fxaaConsoleRcpFrameOpt2,
|
||||
FxaaFloat4 fxaaConsole360RcpFrameOpt2,
|
||||
FxaaFloat fxaaQualitySubpix,
|
||||
FxaaFloat fxaaQualityEdgeThreshold,
|
||||
FxaaFloat fxaaQualityEdgeThresholdMin,
|
||||
FxaaFloat fxaaConsoleEdgeSharpness,
|
||||
FxaaFloat fxaaConsoleEdgeThreshold,
|
||||
FxaaFloat fxaaConsoleEdgeThresholdMin,
|
||||
FxaaFloat4 fxaaConsole360ConstDir
|
||||
) {
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (1)
|
||||
half4 rgbyNe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zy, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half lumaNe = rgbyNe.w + half(1.0/512.0);
|
||||
#else
|
||||
half lumaNe = rgbyNe.y + half(1.0/512.0);
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (2)
|
||||
half4 lumaSw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xw, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half lumaSwNegNe = lumaSw.w - lumaNe;
|
||||
#else
|
||||
half lumaSwNegNe = lumaSw.y - lumaNe;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (3)
|
||||
half4 lumaNw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xy, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half lumaMaxNwSw = max(lumaNw.w, lumaSw.w);
|
||||
half lumaMinNwSw = min(lumaNw.w, lumaSw.w);
|
||||
#else
|
||||
half lumaMaxNwSw = max(lumaNw.y, lumaSw.y);
|
||||
half lumaMinNwSw = min(lumaNw.y, lumaSw.y);
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (4)
|
||||
half4 lumaSe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zw, 0, 0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half dirZ = lumaNw.w + lumaSwNegNe;
|
||||
half dirX = -lumaNw.w + lumaSwNegNe;
|
||||
#else
|
||||
half dirZ = lumaNw.y + lumaSwNegNe;
|
||||
half dirX = -lumaNw.y + lumaSwNegNe;
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (5)
|
||||
half3 dir;
|
||||
dir.y = 0.0;
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
dir.x = lumaSe.w + dirX;
|
||||
dir.z = -lumaSe.w + dirZ;
|
||||
half lumaMinNeSe = min(lumaNe, lumaSe.w);
|
||||
#else
|
||||
dir.x = lumaSe.y + dirX;
|
||||
dir.z = -lumaSe.y + dirZ;
|
||||
half lumaMinNeSe = min(lumaNe, lumaSe.y);
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (6)
|
||||
half4 dir1_pos;
|
||||
dir1_pos.xy = normalize(dir).xz;
|
||||
half dirAbsMinTimes8 = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__PS3_EDGE_SHARPNESS);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (7)
|
||||
half4 dir2_pos;
|
||||
dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimes8, half(-2.0), half(2.0));
|
||||
dir1_pos.zw = pos.xy;
|
||||
dir2_pos.zw = pos.xy;
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half lumaMaxNeSe = max(lumaNe, lumaSe.w);
|
||||
#else
|
||||
half lumaMaxNeSe = max(lumaNe, lumaSe.y);
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (8)
|
||||
half4 temp1N;
|
||||
temp1N.xy = dir1_pos.zw - dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw;
|
||||
temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0));
|
||||
half lumaMax = max(lumaMaxNwSw, lumaMaxNeSe);
|
||||
half lumaMin = min(lumaMinNwSw, lumaMinNeSe);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (9)
|
||||
half4 rgby1;
|
||||
rgby1.xy = dir1_pos.zw + dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw;
|
||||
rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0));
|
||||
rgby1 = (temp1N + rgby1) * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (10)
|
||||
half4 rgbyM = h4tex2Dlod(tex, half4(pos.xy, 0.0, 0.0));
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
half lumaMaxM = max(lumaMax, rgbyM.w);
|
||||
half lumaMinM = min(lumaMin, rgbyM.w);
|
||||
#else
|
||||
half lumaMaxM = max(lumaMax, rgbyM.y);
|
||||
half lumaMinM = min(lumaMin, rgbyM.y);
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (11)
|
||||
half4 temp2N;
|
||||
temp2N.xy = dir2_pos.zw - dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw;
|
||||
temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0));
|
||||
half4 rgby2;
|
||||
rgby2.xy = dir2_pos.zw + dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw;
|
||||
half lumaRangeM = (lumaMaxM - lumaMinM) / FXAA_CONSOLE__PS3_EDGE_THRESHOLD;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (12)
|
||||
rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0));
|
||||
rgby2 = (temp2N + rgby2) * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (13)
|
||||
rgby2 = (rgby2 + rgby1) * 0.5;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (14)
|
||||
#if (FXAA_GREEN_AS_LUMA == 0)
|
||||
bool twoTapLt = rgby2.w < lumaMin;
|
||||
bool twoTapGt = rgby2.w > lumaMax;
|
||||
#else
|
||||
bool twoTapLt = rgby2.y < lumaMin;
|
||||
bool twoTapGt = rgby2.y > lumaMax;
|
||||
#endif
|
||||
bool earlyExit = lumaRangeM < lumaMax;
|
||||
bool twoTap = twoTapLt || twoTapGt;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
// (15)
|
||||
if(twoTap) rgby2 = rgby1;
|
||||
if(earlyExit) rgby2 = rgbyM;
|
||||
/*--------------------------------------------------------------------------*/
|
||||
return rgby2; }
|
||||
/*==========================================================================*/
|
||||
#endif
|
||||
@@ -83,7 +83,7 @@ void TonemappingProc::timerEvent(QTimerEvent * e) {
|
||||
if (!fbo_1x1.isInit()) return;
|
||||
if (timer_delim == 0)
|
||||
need_render_sum = true;
|
||||
timer_delim = (++timer_delim) % 10;
|
||||
timer_delim = (timer_delim + 1) % 10;
|
||||
mutex.lock();
|
||||
float fmax = frame_max;
|
||||
mutex.unlock();
|
||||
|
||||
Reference in New Issue
Block a user