@tool extends "base_modifier.gd" static var shader_file: RDShaderFile @export var iterations : int = 3 @export var offset_step : float = 0.01 @export var consecutive_step_multiplier : float = 0.5 @export var use_computeshader : bool = true func _init() -> void: display_name = "Relax Position" category = "Edit" global_reference_frame_available = false local_reference_frame_available = false individual_instances_reference_frame_available = false can_restrict_height = true restrict_height = true documentation.add_warning( "This modifier is has an O(n²) complexity and will be slow with large amounts of points, unless your device supports compute shaders.", 1) var p := documentation.add_parameter("iterations") p.set_type("int") p.set_cost(2) p.set_description( "How many times the relax algorithm will run. Increasing this value will generally improves the result, at the cost of execution speed." ) p = documentation.add_parameter("Offset step") p.set_type("float") p.set_cost(0) p.set_description("How far the transform will be pushed away each iteration.") p = documentation.add_parameter("Consecutive step multiplier") p.set_type("float") p.set_cost(0) p.set_description( "On each iteration, multiply the offset step by this value. This value is usually set between 0 and 1, to make the effect less pronounced on successive iterations.") p = documentation.add_parameter("Use compute shader") p.set_cost(0) p.set_type("bool") p.set_description( "Run the calculations on the GPU instead of the CPU. This provides a significant speed boost and should be enabled when possible.") p.add_warning( "This parameter can't be enabled when using the OpenGL backend or running in headless mode.", 2) func _process_transforms(transforms, _domain, _seed) -> void: var offset := offset_step if transforms.size() < 2: return # Disable the use of compute shader, if we cannot create a RenderingDevice if use_computeshader: var rd := RenderingServer.create_local_rendering_device() if rd == null: use_computeshader = false else: rd.free() rd = null if use_computeshader: for iteration in iterations: if interrupt_update: return var movedir: PackedVector3Array = compute_closest(transforms) for i in transforms.size(): var dir = movedir[i] if restrict_height: dir.y = 0.0 # move away from closest point transforms.list[i].origin += dir.normalized() * offset offset *= consecutive_step_multiplier else: # calculate the relax transforms on the cpu for iteration in iterations: for i in transforms.size(): if interrupt_update: return var min_vector = Vector3.ONE * 99999.0 var threshold := 99999.0 var distance := 0.0 var diff: Vector3 # Find the closest point for j in transforms.size(): if i == j: continue diff = transforms.list[i].origin - transforms.list[j].origin distance = diff.length_squared() if distance < threshold: min_vector = diff threshold = distance if restrict_height: min_vector.y = 0.0 # move away from closest point transforms.list[i].origin += min_vector.normalized() * offset offset *= consecutive_step_multiplier # compute the closest points to each other using a compute shader # return a vector for each point that points away from the closest neighbour func compute_closest(transforms) -> PackedVector3Array: var padded_num_vecs = ceil(float(transforms.size()) / 64.0) * 64 var padded_num_floats = padded_num_vecs * 4 var rd := RenderingServer.create_local_rendering_device() var shader_spirv: RDShaderSPIRV = get_shader_file().get_spirv() var shader := rd.shader_create_from_spirv(shader_spirv) # Prepare our data. We use vec4 floats in the shader, so we need 32 bit. var input := PackedFloat32Array() for i in transforms.size(): input.append(transforms.list[i].origin.x) input.append(transforms.list[i].origin.y) input.append(transforms.list[i].origin.z) input.append(0) # needed to use vec4, necessary for byte alignment in the shader code # buffer size, number of vectors sent to the gpu input.resize(padded_num_floats) # indexing in the compute shader requires padding var input_bytes := input.to_byte_array() var output_bytes := input_bytes.duplicate() # Create a storage buffer that can hold our float values. var buffer_in := rd.storage_buffer_create(input_bytes.size(), input_bytes) var buffer_out := rd.storage_buffer_create(output_bytes.size(), output_bytes) # Create a uniform to assign the buffer to the rendering device var uniform_in := RDUniform.new() uniform_in.uniform_type = RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER uniform_in.binding = 0 # this needs to match the "binding" in our shader file uniform_in.add_id(buffer_in) # Create a uniform to assign the buffer to the rendering device var uniform_out := RDUniform.new() uniform_out.uniform_type = RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER uniform_out.binding = 1 # this needs to match the "binding" in our shader file uniform_out.add_id(buffer_out) # the last parameter (the 0) needs to match the "set" in our shader file var uniform_set := rd.uniform_set_create([uniform_in, uniform_out], shader, 0) # Create a compute pipeline var pipeline := rd.compute_pipeline_create(shader) var compute_list := rd.compute_list_begin() rd.compute_list_bind_compute_pipeline(compute_list, pipeline) rd.compute_list_bind_uniform_set(compute_list, uniform_set, 0) # each workgroup computes 64 vectors # print("Dispatching workgroups: ", padded_num_vecs/64) rd.compute_list_dispatch(compute_list, padded_num_vecs/64, 1, 1) rd.compute_list_end() # Submit to GPU and wait for sync rd.submit() rd.sync() # Read back the data from the buffer var result_bytes := rd.buffer_get_data(buffer_out) var result := result_bytes.to_float32_array() var retval = PackedVector3Array() for i in transforms.size(): retval.append(Vector3(result[i*4], result[i*4+1], result[i*4+2])) # Free the allocated objects. # All resources must be freed after use to avoid memory leaks. if rd != null: rd.free_rid(pipeline) rd.free_rid(uniform_set) rd.free_rid(shader) rd.free_rid(buffer_in) rd.free_rid(buffer_out) rd.free() rd = null return retval func get_shader_file() -> RDShaderFile: if shader_file == null: shader_file = load(get_script().resource_path.get_base_dir() + "/compute_shaders/compute_relax.glsl") return shader_file