diff --git a/editor/import/scene_importer_mesh.cpp b/editor/import/scene_importer_mesh.cpp
index bc7e8a1626..fa1a027a8d 100644
--- a/editor/import/scene_importer_mesh.cpp
+++ b/editor/import/scene_importer_mesh.cpp
@@ -30,6 +30,7 @@
 
 #include "scene_importer_mesh.h"
 
+#include "core/math/math_defs.h"
 #include "scene/resources/surface_tool.h"
 
 void EditorSceneImporterMesh::add_blend_shape(const String &p_name) {
@@ -141,6 +142,18 @@ void EditorSceneImporterMesh::set_surface_material(int p_surface, const Ref<Mate
 	surfaces.write[p_surface].material = p_material;
 }
 
+Basis EditorSceneImporterMesh::compute_rotation_matrix_from_ortho_6d(Vector3 p_x_raw, Vector3 p_y_raw) {
+	Vector3 x = p_x_raw.normalized();
+	Vector3 z = x.cross(p_y_raw);
+	z = z.normalized();
+	Vector3 y = z.cross(x);
+	Basis basis;
+	basis.set_axis(Vector3::AXIS_X, x);
+	basis.set_axis(Vector3::AXIS_Y, y);
+	basis.set_axis(Vector3::AXIS_Z, z);
+	return basis;
+}
+
 void EditorSceneImporterMesh::generate_lods() {
 	if (!SurfaceTool::simplify_func) {
 		return;
@@ -151,6 +164,9 @@ void EditorSceneImporterMesh::generate_lods() {
 	if (!SurfaceTool::simplify_sloppy_func) {
 		return;
 	}
+	if (!SurfaceTool::simplify_with_attrib_func) {
+		return;
+	}
 
 	for (int i = 0; i < surfaces.size(); i++) {
 		if (surfaces[i].primitive != Mesh::PRIMITIVE_TRIANGLES) {
@@ -163,59 +179,62 @@ void EditorSceneImporterMesh::generate_lods() {
 		if (indices.size() == 0) {
 			continue; //no lods if no indices
 		}
+		Vector<Vector3> normals = surfaces[i].arrays[RS::ARRAY_NORMAL];
 		uint32_t vertex_count = vertices.size();
 		const Vector3 *vertices_ptr = vertices.ptr();
-
-		int min_indices = 10;
-		int index_target = indices.size() / 2;
-		print_line("Total indices: " + itos(indices.size()));
-		float mesh_scale = SurfaceTool::simplify_scale_func((const float *)vertices_ptr, vertex_count, sizeof(Vector3));
-		const float target_error = 1e-3f;
-		float abs_target_error = target_error / mesh_scale;
+		Vector<float> attributes;
+		Vector<float> normal_weights;
+		int32_t attribute_count = 6;
+		if (normals.size()) {
+			attributes.resize(normals.size() * attribute_count);
+			for (int32_t normal_i = 0; normal_i < normals.size(); normal_i++) {
+				Basis basis;
+				basis.set_euler(normals[normal_i]);
+				Vector3 basis_x = basis.get_axis(0);
+				Vector3 basis_y = basis.get_axis(1);
+				basis = compute_rotation_matrix_from_ortho_6d(basis_x, basis_y);
+				basis_x = basis.get_axis(0);
+				basis_y = basis.get_axis(1);
+				attributes.write[normal_i * attribute_count + 0] = basis_x.x;
+				attributes.write[normal_i * attribute_count + 1] = basis_x.y;
+				attributes.write[normal_i * attribute_count + 2] = basis_x.z;
+				attributes.write[normal_i * attribute_count + 3] = basis_y.x;
+				attributes.write[normal_i * attribute_count + 4] = basis_y.y;
+				attributes.write[normal_i * attribute_count + 5] = basis_y.z;
+			}
+			normal_weights.resize(vertex_count);
+			for (int32_t weight_i = 0; weight_i < normal_weights.size(); weight_i++) {
+				normal_weights.write[weight_i] = 1.0;
+			}
+		} else {
+			attribute_count = 0;
+		}
+		const int min_indices = 10;
+		const float error_tolerance = 1.44224'95703; // Cube root of 3
+		const float threshold = 1.0 / error_tolerance;
+		int index_target = indices.size() * threshold;
+		float max_mesh_error_percentage = 1e0f;
+		float mesh_error = 0.0f;
 		while (index_target > min_indices) {
-			float error;
 			Vector<int> new_indices;
 			new_indices.resize(indices.size());
-			size_t new_len = SurfaceTool::simplify_func((unsigned int *)new_indices.ptrw(), (const unsigned int *)indices.ptr(), indices.size(), (const float *)vertices_ptr, vertex_count, sizeof(Vector3), index_target, abs_target_error, &error);
-			if ((int)new_len > (index_target * 120 / 100)) {
-				// Attribute discontinuities break normals.
-				bool is_sloppy = false;
-				if (is_sloppy) {
-					abs_target_error = target_error / mesh_scale;
-					index_target = new_len;
-					while (index_target > min_indices) {
-						Vector<int> sloppy_new_indices;
-						sloppy_new_indices.resize(indices.size());
-						new_len = SurfaceTool::simplify_sloppy_func((unsigned int *)sloppy_new_indices.ptrw(), (const unsigned int *)indices.ptr(), indices.size(), (const float *)vertices_ptr, vertex_count, sizeof(Vector3), index_target, abs_target_error, &error);
-						if ((int)new_len > (index_target * 120 / 100)) {
-							break; // 20 percent tolerance
-						}
-						sloppy_new_indices.resize(new_len);
-						Surface::LOD lod;
-						lod.distance = error * mesh_scale;
-						abs_target_error = lod.distance;
-						if (Math::is_equal_approx(abs_target_error, 0.0f)) {
-							return;
-						}
-						lod.indices = sloppy_new_indices;
-						print_line("Lod " + itos(surfaces.write[i].lods.size()) + " shoot for " + itos(index_target / 3) + " triangles, got " + itos(new_len / 3) + " triangles. Distance " + rtos(lod.distance) + ". Use simplify sloppy.");
-						surfaces.write[i].lods.push_back(lod);
-						index_target /= 2;
-					}
-				}
-				break; // 20 percent tolerance
+			size_t new_len = SurfaceTool::simplify_with_attrib_func((unsigned int *)new_indices.ptrw(), (const unsigned int *)indices.ptr(), indices.size(), (const float *)vertices_ptr, vertex_count, sizeof(Vector3), index_target, max_mesh_error_percentage, &mesh_error, (float *)attributes.ptrw(), normal_weights.ptrw(), attribute_count);
+			if ((int)new_len > (index_target * error_tolerance)) {
+				break;
+			}
+			Surface::LOD lod;
+			lod.distance = mesh_error;
+			if (Math::is_equal_approx(mesh_error, 0.0f)) {
+				break;
+			}
+			if (new_len <= 0) {
+				break;
 			}
 			new_indices.resize(new_len);
-			Surface::LOD lod;
-			lod.distance = error * mesh_scale;
-			abs_target_error = lod.distance;
-			if (Math::is_equal_approx(abs_target_error, 0.0f)) {
-				return;
-			}
 			lod.indices = new_indices;
-			print_line("Lod " + itos(surfaces.write[i].lods.size()) + " shoot for " + itos(index_target / 3) + " triangles, got " + itos(new_len / 3) + " triangles. Distance " + rtos(lod.distance));
+			print_line("Lod " + itos(surfaces.write[i].lods.size()) + " begin with " + itos(indices.size() / 3) + " triangles and shoot for " + itos(index_target / 3) + " triangles. Got " + itos(new_len / 3) + " triangles. Lod screen ratio " + rtos(lod.distance));
 			surfaces.write[i].lods.push_back(lod);
-			index_target /= 2;
+			index_target *= threshold;
 		}
 	}
 }
diff --git a/editor/import/scene_importer_mesh.h b/editor/import/scene_importer_mesh.h
index b3e8137e0a..c00339a620 100644
--- a/editor/import/scene_importer_mesh.h
+++ b/editor/import/scene_importer_mesh.h
@@ -67,6 +67,7 @@ class EditorSceneImporterMesh : public Resource {
 	Ref<EditorSceneImporterMesh> shadow_mesh;
 
 	Size2i lightmap_size_hint;
+	Basis compute_rotation_matrix_from_ortho_6d(Vector3 p_x_raw, Vector3 y_raw);
 
 protected:
 	void _set_data(const Dictionary &p_data);
diff --git a/modules/meshoptimizer/register_types.cpp b/modules/meshoptimizer/register_types.cpp
index a03310f518..77cc82a4e2 100644
--- a/modules/meshoptimizer/register_types.cpp
+++ b/modules/meshoptimizer/register_types.cpp
@@ -35,6 +35,7 @@
 void register_meshoptimizer_types() {
 	SurfaceTool::optimize_vertex_cache_func = meshopt_optimizeVertexCache;
 	SurfaceTool::simplify_func = meshopt_simplify;
+	SurfaceTool::simplify_with_attrib_func = meshopt_simplifyWithAttributes;
 	SurfaceTool::simplify_scale_func = meshopt_simplifyScale;
 	SurfaceTool::simplify_sloppy_func = meshopt_simplifySloppy;
 }
diff --git a/scene/resources/surface_tool.cpp b/scene/resources/surface_tool.cpp
index ff682a40f4..f2143e683d 100644
--- a/scene/resources/surface_tool.cpp
+++ b/scene/resources/surface_tool.cpp
@@ -35,6 +35,7 @@
 
 SurfaceTool::OptimizeVertexCacheFunc SurfaceTool::optimize_vertex_cache_func = nullptr;
 SurfaceTool::SimplifyFunc SurfaceTool::simplify_func = nullptr;
+SurfaceTool::SimplifyWithAttribFunc SurfaceTool::simplify_with_attrib_func = nullptr;
 SurfaceTool::SimplifyScaleFunc SurfaceTool::simplify_scale_func = nullptr;
 SurfaceTool::SimplifySloppyFunc SurfaceTool::simplify_sloppy_func = nullptr;
 
diff --git a/scene/resources/surface_tool.h b/scene/resources/surface_tool.h
index 28addf2245..f5f3a95b14 100644
--- a/scene/resources/surface_tool.h
+++ b/scene/resources/surface_tool.h
@@ -78,6 +78,8 @@ public:
 	static OptimizeVertexCacheFunc optimize_vertex_cache_func;
 	typedef size_t (*SimplifyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_error);
 	static SimplifyFunc simplify_func;
+	typedef size_t (*SimplifyWithAttribFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float *result_error, const float *attributes, const float *attribute_weights, size_t attribute_count);
+	static SimplifyWithAttribFunc simplify_with_attrib_func;
 	typedef float (*SimplifyScaleFunc)(const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
 	static SimplifyScaleFunc simplify_scale_func;
 	typedef size_t (*SimplifySloppyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *out_result_error);
diff --git a/thirdparty/README.md b/thirdparty/README.md
index 7cedd1a2cb..d3599fd195 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -375,6 +375,9 @@ Files extracted from upstream repository:
 - All files in `src/`.
 - `LICENSE.md`.
 
+An [experimental upstream feature](https://github.com/zeux/meshoptimizer/tree/simplify-attr),
+has been backported, see patch in `patches` directory.
+
 
 ## miniupnpc
 
diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h
index fe8d349731..e44b99ce52 100644
--- a/thirdparty/meshoptimizer/meshoptimizer.h
+++ b/thirdparty/meshoptimizer/meshoptimizer.h
@@ -298,6 +298,11 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver
  */
 MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error);
 
+/**
+ * Experimental: Mesh simplifier with attribute metric; attributes follow xyz position data atm (vertex data must contain 3 + attribute_count floats per vertex)
+ */
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count);
+
 /**
  * Experimental: Mesh simplifier (sloppy)
  * Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance
diff --git a/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch b/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch
new file mode 100644
index 0000000000..cf648b0da3
--- /dev/null
+++ b/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch
@@ -0,0 +1,262 @@
+diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h
+index fe8d349731..e44b99ce52 100644
+--- a/thirdparty/meshoptimizer/meshoptimizer.h
++++ b/thirdparty/meshoptimizer/meshoptimizer.h
+@@ -298,6 +298,11 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver
+  */
+ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error);
+ 
++/**
++ * Experimental: Mesh simplifier with attribute metric; attributes follow xyz position data atm (vertex data must contain 3 + attribute_count floats per vertex)
++ */
++MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count);
++
+ /**
+  * Experimental: Mesh simplifier (sloppy)
+  * Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance
+diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp
+index b2cb589462..059cabb055 100644
+--- a/thirdparty/meshoptimizer/simplifier.cpp
++++ b/thirdparty/meshoptimizer/simplifier.cpp
+@@ -20,6 +20,8 @@
+ #define TRACESTATS(i) (void)0
+ #endif
+ 
++#define ATTRIBUTES 8
++
+ // This work is based on:
+ // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997
+ // Michael Garland. Quadric-based polygonal surface simplification. 1999
+@@ -358,6 +360,10 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned
+ struct Vector3
+ {
+ 	float x, y, z;
++
++#if ATTRIBUTES
++	float a[ATTRIBUTES];
++#endif
+ };
+ 
+ static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
+@@ -414,6 +420,13 @@ struct Quadric
+ 	float a10, a20, a21;
+ 	float b0, b1, b2, c;
+ 	float w;
++
++#if ATTRIBUTES
++	float gx[ATTRIBUTES];
++	float gy[ATTRIBUTES];
++	float gz[ATTRIBUTES];
++	float gw[ATTRIBUTES];
++#endif
+ };
+ 
+ struct Collapse
+@@ -456,6 +469,16 @@ static void quadricAdd(Quadric& Q, const Quadric& R)
+ 	Q.b2 += R.b2;
+ 	Q.c += R.c;
+ 	Q.w += R.w;
++
++#if ATTRIBUTES
++	for (int k = 0; k < ATTRIBUTES; ++k)
++	{
++		Q.gx[k] += R.gx[k];
++		Q.gy[k] += R.gy[k];
++		Q.gz[k] += R.gz[k];
++		Q.gw[k] += R.gw[k];
++	}
++#endif
+ }
+ 
+ static float quadricError(const Quadric& Q, const Vector3& v)
+@@ -481,6 +504,17 @@ static float quadricError(const Quadric& Q, const Vector3& v)
+ 	r += ry * v.y;
+ 	r += rz * v.z;
+ 
++#if ATTRIBUTES
++	// see quadricUpdateAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr
++	for (int k = 0; k < ATTRIBUTES; ++k)
++	{
++		float a = v.a[k];
++
++		r += a * a * Q.w;
++		r -= 2 * a * (v.x * Q.gx[k] + v.y * Q.gy[k] + v.z * Q.gz[k] + Q.gw[k]);
++	}
++#endif
++
+ 	float s = Q.w == 0.f ? 0.f : 1.f / Q.w;
+ 
+ 	return fabsf(r) * s;
+@@ -504,6 +538,13 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo
+ 	Q.b2 = c * dw;
+ 	Q.c = d * dw;
+ 	Q.w = w;
++
++#if ATTRIBUTES
++	memset(Q.gx, 0, sizeof(Q.gx));
++	memset(Q.gy, 0, sizeof(Q.gy));
++	memset(Q.gz, 0, sizeof(Q.gz));
++	memset(Q.gw, 0, sizeof(Q.gw));
++#endif
+ }
+ 
+ static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w)
+@@ -556,6 +597,84 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3
+ 	quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight);
+ }
+ 
++#if ATTRIBUTES
++static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float w)
++{
++	// for each attribute we want to encode the following function into the quadric:
++	// (eval(pos) - attr)^2
++	// where eval(pos) interpolates attribute across the triangle like so:
++	// eval(pos) = pos.x * gx + pos.y * gy + pos.z * gz + gw
++	// where gx/gy/gz/gw are gradients
++	Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
++	Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
++
++	// we compute gradients using barycentric coordinates; barycentric coordinates can be computed as follows:
++	// v = (d11 * d20 - d01 * d21) / denom
++	// w = (d00 * d21 - d01 * d20) / denom
++	// u = 1 - v - w
++	// here v0, v1 are triangle edge vectors, v2 is a vector from point to triangle corner, and dij = dot(vi, vj)
++	const Vector3& v0 = p10;
++	const Vector3& v1 = p20;
++	float d00 = v0.x * v0.x + v0.y * v0.y + v0.z * v0.z;
++	float d01 = v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
++	float d11 = v1.x * v1.x + v1.y * v1.y + v1.z * v1.z;
++	float denom = d00 * d11 - d01 * d01;
++	float denomr = denom == 0 ? 0.f : 1.f / denom;
++
++	// precompute gradient factors
++	// these are derived by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w and factoring out common factors that are shared between attributes
++	float gx1 = (d11 * v0.x - d01 * v1.x) * denomr;
++	float gx2 = (d00 * v1.x - d01 * v0.x) * denomr;
++	float gy1 = (d11 * v0.y - d01 * v1.y) * denomr;
++	float gy2 = (d00 * v1.y - d01 * v0.y) * denomr;
++	float gz1 = (d11 * v0.z - d01 * v1.z) * denomr;
++	float gz2 = (d00 * v1.z - d01 * v0.z) * denomr;
++
++	for (int k = 0; k < ATTRIBUTES; ++k)
++	{
++		float a0 = p0.a[k], a1 = p1.a[k], a2 = p2.a[k];
++
++		// compute gradient of eval(pos) for x/y/z/w
++		// the formulas below are obtained by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w
++		float gx = gx1 * (a1 - a0) + gx2 * (a2 - a0);
++		float gy = gy1 * (a1 - a0) + gy2 * (a2 - a0);
++		float gz = gz1 * (a1 - a0) + gz2 * (a2 - a0);
++		float gw = a0 - p0.x * gx - p0.y * gy - p0.z * gz;
++
++		// quadric encodes (eval(pos)-attr)^2; this means that the resulting expansion needs to compute, for example, pos.x * pos.y * K
++		// since quadrics already encode factors for pos.x * pos.y, we can accumulate almost everything in basic quadric fields
++		Q.a00 += w * (gx * gx);
++		Q.a11 += w * (gy * gy);
++		Q.a22 += w * (gz * gz);
++
++		Q.a10 += w * (gy * gx);
++		Q.a20 += w * (gz * gx);
++		Q.a21 += w * (gz * gy);
++
++		Q.b0 += w * (gx * gw);
++		Q.b1 += w * (gy * gw);
++		Q.b2 += w * (gz * gw);
++
++		Q.c += w * (gw * gw);
++
++		// the only remaining sum components are ones that depend on attr; these will be addded during error evaluation, see quadricError
++		Q.gx[k] = w * gx;
++		Q.gy[k] = w * gy;
++		Q.gz[k] = w * gz;
++		Q.gw[k] = w * gw;
++
++#if TRACE > 2
++		printf("attr%d: %e %e %e\n",
++			k,
++			(gx * p0.x + gy * p0.y + gz * p0.z + gw - a0),
++			(gx * p1.x + gy * p1.y + gz * p1.z + gw - a1),
++			(gx * p2.x + gy * p2.y + gz * p2.z + gw - a2)
++			);
++#endif
++	}
++}
++#endif
++
+ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
+ {
+ 	for (size_t i = 0; i < index_count; i += 3)
+@@ -567,6 +686,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
+ 		Quadric Q;
+ 		quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f);
+ 
++#if ATTRIBUTES
++		quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w);
++#endif
+ 		quadricAdd(vertex_quadrics[remap[i0]], Q);
+ 		quadricAdd(vertex_quadrics[remap[i1]], Q);
+ 		quadricAdd(vertex_quadrics[remap[i2]], Q);
+@@ -1259,13 +1381,19 @@ unsigned int* meshopt_simplifyDebugLoopBack = 0;
+ #endif
+ 
+ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error)
++{
++	return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, out_result_error, 0, 0, 0);
++}
++
++size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count)
+ {
+ 	using namespace meshopt;
+ 
+ 	assert(index_count % 3 == 0);
+-	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+-	assert(vertex_positions_stride % sizeof(float) == 0);
++	assert(vertex_stride > 0 && vertex_stride <= 256);
++	assert(vertex_stride % sizeof(float) == 0);
+ 	assert(target_index_count <= index_count);
++	assert(attribute_count <= ATTRIBUTES);
+ 
+ 	meshopt_Allocator allocator;
+ 
+@@ -1279,7 +1407,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+ 	// build position remap that maps each vertex to the one with identical position
+ 	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+ 	unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count);
+-	buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator);
++	buildPositionRemap(remap, wedge, vertex_data, vertex_count, vertex_stride, allocator);
+ 
+ 	// classify vertices; vertex kind determines collapse rules, see kCanCollapse
+ 	unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count);
+@@ -1303,7 +1431,21 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+ #endif
+ 
+ 	Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
+-	rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
++	rescalePositions(vertex_positions, vertex_data, vertex_count, vertex_stride);
++
++#if ATTRIBUTES
++	for (size_t i = 0; i < vertex_count; ++i)
++	{
++		memset(vertex_positions[i].a, 0, sizeof(vertex_positions[i].a));
++
++		for (size_t k = 0; k < attribute_count; ++k)
++		{
++			float a = attributes[i * attribute_count + k];
++
++			vertex_positions[i].a[k] = a * attribute_weights[k];
++		}
++	}
++#endif
+ 
+ 	Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count);
+ 	memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric));
+@@ -1395,7 +1537,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+ 
+ 	// result_error is quadratic; we need to remap it back to linear
+ 	if (out_result_error)
++	{
+ 		*out_result_error = sqrtf(result_error);
++	}
+ 
+ 	return result_count;
+ }
diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp
index b2cb589462..059cabb055 100644
--- a/thirdparty/meshoptimizer/simplifier.cpp
+++ b/thirdparty/meshoptimizer/simplifier.cpp
@@ -20,6 +20,8 @@
 #define TRACESTATS(i) (void)0
 #endif
 
+#define ATTRIBUTES 8
+
 // This work is based on:
 // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997
 // Michael Garland. Quadric-based polygonal surface simplification. 1999
@@ -358,6 +360,10 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned
 struct Vector3
 {
 	float x, y, z;
+
+#if ATTRIBUTES
+	float a[ATTRIBUTES];
+#endif
 };
 
 static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
@@ -414,6 +420,13 @@ struct Quadric
 	float a10, a20, a21;
 	float b0, b1, b2, c;
 	float w;
+
+#if ATTRIBUTES
+	float gx[ATTRIBUTES];
+	float gy[ATTRIBUTES];
+	float gz[ATTRIBUTES];
+	float gw[ATTRIBUTES];
+#endif
 };
 
 struct Collapse
@@ -456,6 +469,16 @@ static void quadricAdd(Quadric& Q, const Quadric& R)
 	Q.b2 += R.b2;
 	Q.c += R.c;
 	Q.w += R.w;
+
+#if ATTRIBUTES
+	for (int k = 0; k < ATTRIBUTES; ++k)
+	{
+		Q.gx[k] += R.gx[k];
+		Q.gy[k] += R.gy[k];
+		Q.gz[k] += R.gz[k];
+		Q.gw[k] += R.gw[k];
+	}
+#endif
 }
 
 static float quadricError(const Quadric& Q, const Vector3& v)
@@ -481,6 +504,17 @@ static float quadricError(const Quadric& Q, const Vector3& v)
 	r += ry * v.y;
 	r += rz * v.z;
 
+#if ATTRIBUTES
+	// see quadricUpdateAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr
+	for (int k = 0; k < ATTRIBUTES; ++k)
+	{
+		float a = v.a[k];
+
+		r += a * a * Q.w;
+		r -= 2 * a * (v.x * Q.gx[k] + v.y * Q.gy[k] + v.z * Q.gz[k] + Q.gw[k]);
+	}
+#endif
+
 	float s = Q.w == 0.f ? 0.f : 1.f / Q.w;
 
 	return fabsf(r) * s;
@@ -504,6 +538,13 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo
 	Q.b2 = c * dw;
 	Q.c = d * dw;
 	Q.w = w;
+
+#if ATTRIBUTES
+	memset(Q.gx, 0, sizeof(Q.gx));
+	memset(Q.gy, 0, sizeof(Q.gy));
+	memset(Q.gz, 0, sizeof(Q.gz));
+	memset(Q.gw, 0, sizeof(Q.gw));
+#endif
 }
 
 static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w)
@@ -556,6 +597,84 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3
 	quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight);
 }
 
+#if ATTRIBUTES
+static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float w)
+{
+	// for each attribute we want to encode the following function into the quadric:
+	// (eval(pos) - attr)^2
+	// where eval(pos) interpolates attribute across the triangle like so:
+	// eval(pos) = pos.x * gx + pos.y * gy + pos.z * gz + gw
+	// where gx/gy/gz/gw are gradients
+	Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
+	Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
+
+	// we compute gradients using barycentric coordinates; barycentric coordinates can be computed as follows:
+	// v = (d11 * d20 - d01 * d21) / denom
+	// w = (d00 * d21 - d01 * d20) / denom
+	// u = 1 - v - w
+	// here v0, v1 are triangle edge vectors, v2 is a vector from point to triangle corner, and dij = dot(vi, vj)
+	const Vector3& v0 = p10;
+	const Vector3& v1 = p20;
+	float d00 = v0.x * v0.x + v0.y * v0.y + v0.z * v0.z;
+	float d01 = v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
+	float d11 = v1.x * v1.x + v1.y * v1.y + v1.z * v1.z;
+	float denom = d00 * d11 - d01 * d01;
+	float denomr = denom == 0 ? 0.f : 1.f / denom;
+
+	// precompute gradient factors
+	// these are derived by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w and factoring out common factors that are shared between attributes
+	float gx1 = (d11 * v0.x - d01 * v1.x) * denomr;
+	float gx2 = (d00 * v1.x - d01 * v0.x) * denomr;
+	float gy1 = (d11 * v0.y - d01 * v1.y) * denomr;
+	float gy2 = (d00 * v1.y - d01 * v0.y) * denomr;
+	float gz1 = (d11 * v0.z - d01 * v1.z) * denomr;
+	float gz2 = (d00 * v1.z - d01 * v0.z) * denomr;
+
+	for (int k = 0; k < ATTRIBUTES; ++k)
+	{
+		float a0 = p0.a[k], a1 = p1.a[k], a2 = p2.a[k];
+
+		// compute gradient of eval(pos) for x/y/z/w
+		// the formulas below are obtained by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w
+		float gx = gx1 * (a1 - a0) + gx2 * (a2 - a0);
+		float gy = gy1 * (a1 - a0) + gy2 * (a2 - a0);
+		float gz = gz1 * (a1 - a0) + gz2 * (a2 - a0);
+		float gw = a0 - p0.x * gx - p0.y * gy - p0.z * gz;
+
+		// quadric encodes (eval(pos)-attr)^2; this means that the resulting expansion needs to compute, for example, pos.x * pos.y * K
+		// since quadrics already encode factors for pos.x * pos.y, we can accumulate almost everything in basic quadric fields
+		Q.a00 += w * (gx * gx);
+		Q.a11 += w * (gy * gy);
+		Q.a22 += w * (gz * gz);
+
+		Q.a10 += w * (gy * gx);
+		Q.a20 += w * (gz * gx);
+		Q.a21 += w * (gz * gy);
+
+		Q.b0 += w * (gx * gw);
+		Q.b1 += w * (gy * gw);
+		Q.b2 += w * (gz * gw);
+
+		Q.c += w * (gw * gw);
+
+		// the only remaining sum components are ones that depend on attr; these will be addded during error evaluation, see quadricError
+		Q.gx[k] = w * gx;
+		Q.gy[k] = w * gy;
+		Q.gz[k] = w * gz;
+		Q.gw[k] = w * gw;
+
+#if TRACE > 2
+		printf("attr%d: %e %e %e\n",
+			k,
+			(gx * p0.x + gy * p0.y + gz * p0.z + gw - a0),
+			(gx * p1.x + gy * p1.y + gz * p1.z + gw - a1),
+			(gx * p2.x + gy * p2.y + gz * p2.z + gw - a2)
+			);
+#endif
+	}
+}
+#endif
+
 static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
 {
 	for (size_t i = 0; i < index_count; i += 3)
@@ -567,6 +686,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
 		Quadric Q;
 		quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f);
 
+#if ATTRIBUTES
+		quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w);
+#endif
 		quadricAdd(vertex_quadrics[remap[i0]], Q);
 		quadricAdd(vertex_quadrics[remap[i1]], Q);
 		quadricAdd(vertex_quadrics[remap[i2]], Q);
@@ -1259,13 +1381,19 @@ unsigned int* meshopt_simplifyDebugLoopBack = 0;
 #endif
 
 size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error)
+{
+	return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, out_result_error, 0, 0, 0);
+}
+
+size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count)
 {
 	using namespace meshopt;
 
 	assert(index_count % 3 == 0);
-	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
-	assert(vertex_positions_stride % sizeof(float) == 0);
+	assert(vertex_stride > 0 && vertex_stride <= 256);
+	assert(vertex_stride % sizeof(float) == 0);
 	assert(target_index_count <= index_count);
+	assert(attribute_count <= ATTRIBUTES);
 
 	meshopt_Allocator allocator;
 
@@ -1279,7 +1407,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
 	// build position remap that maps each vertex to the one with identical position
 	unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
 	unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count);
-	buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator);
+	buildPositionRemap(remap, wedge, vertex_data, vertex_count, vertex_stride, allocator);
 
 	// classify vertices; vertex kind determines collapse rules, see kCanCollapse
 	unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count);
@@ -1303,7 +1431,21 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
 #endif
 
 	Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
-	rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
+	rescalePositions(vertex_positions, vertex_data, vertex_count, vertex_stride);
+
+#if ATTRIBUTES
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		memset(vertex_positions[i].a, 0, sizeof(vertex_positions[i].a));
+
+		for (size_t k = 0; k < attribute_count; ++k)
+		{
+			float a = attributes[i * attribute_count + k];
+
+			vertex_positions[i].a[k] = a * attribute_weights[k];
+		}
+	}
+#endif
 
 	Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count);
 	memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric));
@@ -1395,7 +1537,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
 
 	// result_error is quadratic; we need to remap it back to linear
 	if (out_result_error)
+	{
 		*out_result_error = sqrtf(result_error);
+	}
 
 	return result_count;
 }