From 1b568754aea24af3ff100bf2819ded7cd9eeb158 Mon Sep 17 00:00:00 2001 From: Tibo De Peuter Date: Tue, 1 Oct 2024 16:55:24 +0200 Subject: [PATCH 1/2] chore: Add JetBrains to gitignore --- .gitignore | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/.gitignore b/.gitignore index 1914940..763c84b 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,119 @@ CMakeSettings.json build/* bin/* *.DS_Store + +# ---> JetBrains +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/ + +# CMake +cmake-build-*/ + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# ---> C +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + +# ---> C++ +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app From f1cf36352fabe4d29e863ceaa2e3f41a18ad0bf3 Mon Sep 17 00:00:00 2001 From: Tibo De Peuter Date: Fri, 22 Nov 2024 16:31:55 +0100 Subject: [PATCH 2/2] Assignment 1 --- src/GpuPerlinNoise.cpp | 37 ++++++++++++++++++ src/PerlinNoise.cl | 87 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 109 insertions(+), 15 deletions(-) diff --git a/src/GpuPerlinNoise.cpp b/src/GpuPerlinNoise.cpp index 692b813..76a5999 100644 --- a/src/GpuPerlinNoise.cpp +++ b/src/GpuPerlinNoise.cpp @@ -91,6 +91,43 @@ void GpuPerlinNoise::Run() { // Use OpenCL to calculate the perlin noise value for each pixel of outputImage // -------------- + /* Initialize the host and device memory, that will be necessary to store the inputs and outputs. */ + // Host memory is already set in the code above. + // buffer type = CL_MEM_READ_WRITE or CL_MEM_READ_ONLY or CL_MEM_WRITE_ONLY + cl::Buffer outputNoise_device(context, CL_MEM_WRITE_ONLY, sizeof(float) * nrPixels); + cl::Buffer workgroupMin_device(context, CL_MEM_WRITE_ONLY, sizeof(float) * nrWorkgroups); + cl::Buffer workgroupMax_device(context, CL_MEM_WRITE_ONLY, sizeof(float) * nrWorkgroups); + + /* Initialize the buffers using host data, so "Write Buffer" */ + // None + + /* Setup before launching the kernel on the device */ + cl::KernelFunctor kernelFunctor(program, "perlin_noise_texture"); + cl::NDRange rangeLocal(workgroupWidth, workgroupHeight); + cl::NDRange rangeGlobal(width, height); + cl::EnqueueArgs enqueArgs(queue, rangeGlobal, rangeLocal); + + /* Launch the kernel on the device and wait until it's done */ + cl::Event event = kernelFunctor(enqueArgs, width, height, outputNoise_device, workgroupMin_device, workgroupMax_device); + + /* Read the data from the device buffer into the host vector */ + queue.enqueueReadBuffer(outputNoise_device, CL_TRUE, 0, sizeof(float) * nrPixels, outputNoise.data()); + queue.enqueueReadBuffer(workgroupMin_device, CL_TRUE, 0, sizeof(float) * nrWorkgroups, workgroupMin.data()); + queue.enqueueReadBuffer(workgroupMax_device, CL_TRUE, 0, sizeof(float) * nrWorkgroups, workgroupMax.data()); + + /* Global min and max */ + float globalMin = *std::min_element(workgroupMin.begin(), workgroupMin.end()); + float globalMax = *std::max_element(workgroupMax.begin(), workgroupMax.end()); + /* Rescale outputNoise to [0, 255] */ + for (int i = 0; i < nrPixels; i++) { + outputImage[i] = static_cast(255 * (outputNoise[i] - globalMin) / (globalMax - globalMin)); + } + + // -------------- + printf("Debugging: min: %f, max: %f\n", workgroupMin[0], workgroupMax[0]); + printf("Debugging: gloabl_min: %f, global_max: %f\n", globalMin, globalMax); + // -------------- + // write to PNG std::string pngPath = "perlin.png"; bool ret = stbi_write_png(pngPath.c_str(), width, height, 1, outputImage.data(), width); diff --git a/src/PerlinNoise.cl b/src/PerlinNoise.cl index 5c80161..0c43a5d 100644 --- a/src/PerlinNoise.cl +++ b/src/PerlinNoise.cl @@ -4,37 +4,94 @@ // REVERT ALL FUNCTION DECLARATIONS to how they were in the beginning BEFORE HANDING IN // ------------------------------------- -float2 smooth_step(float2 t){ - return t; +float smooth_step(float t){ + return 3.0f * t * t - 2.0f * t * t * t; } // generates a pseudo-random float in [0,1[ from 2 floats float pseudo_rand(float2 seed){ - return 0; + float temp = sin(dot(seed, (float2)(12.9898, 78.233))) * 43758.5453; + return temp - floor(temp); } // generates a pseudo-random 2D unit vector from 2 floats // uses: pseudo_rand() float2 random_direction(float2 xy){ - return (float2)(0,0); + float angle = pseudo_rand(xy) * 2.0f * M_PI; + return (float2)(cos(angle), sin(angle)); +} + +float linear_interpolate(float a, float b, float t){ + return a * (1.0f - t) + b * t; +} + +float interpolate(float a, float b, float t){ + return linear_interpolate(a, b, smooth_step(t)); } void kernel perlin_noise_texture(const int width, const int height, global float* output_noise, global float* workgroup_min, global float* workgroup_max){ + const int col = get_global_id(0); // column of current pixel + const int row = get_global_id(1); // row of current pixel + const int pixelId = row * width + col; // index of output_noise to assign + const int threadId = get_local_id(1) * get_local_size(0) + get_local_id(0); // index of current thread within its workgroup + const int workgroupId = get_group_id(1) * get_num_groups(0) + get_group_id(0); // index of current workgroup + const float scale = 0.05f; // feel free to play with this + float2 xy = (float2)(col * scale, row * scale); // (x,y) input for perlin noise algorithm - const int col = 0; // column of current pixel - const int row = 0; // row of current pixel - const int pixelId = row * width + col; // index of output_noise to assign - const int threadId = 0; // index of current thread within its workgroup - const int workgroupId = 0; // index of current workgroup - const float scale = 0.05f; // feel free to play with this - float2 xy = (float2)(col * scale, row * scale); // (x,y) input for perlin noise algorithm + printf("col: %d, row: %d, pixeldId: %d, threadId: %d, workgroupId: %d\n", col, row, pixelId, threadId, workgroupId); + + local float workgroup_p[16*16]; + + float x0 = floor(xy.x); + float y0 = floor(xy.y); + float x1 = x0 + 1; + float y1 = y0 + 1; + + float2 c00 = (float2)(x0, y0); + float2 c10 = (float2)(x1, y0); + float2 c01 = (float2)(x0, y1); + float2 c11 = (float2)(x1, y1); + + float2 g00 = random_direction(c00); + float2 g10 = random_direction(c10); + float2 g01 = random_direction(c01); + float2 g11 = random_direction(c11); + + float2 d00 = xy - c00; + float2 d10 = xy - c10; + float2 d01 = xy - c01; + float2 d11 = xy - c11; + + float p00 = dot(g00, d00); + float p10 = dot(g10, d10); + float p01 = dot(g01, d01); + float p11 = dot(g11, d11); + + const float u = xy.x - x0; + const float v = xy.y - y0; + const float i0 = interpolate(p00, p10, u); + const float i1 = interpolate(p01, p11, u); // calculate the perlin noise for pixelId - output_noise[pixelId] = 0; + float perlin_P = interpolate(i0, i1, v); + output_noise[pixelId] = perlin_P; + workgroup_p[threadId] = perlin_P; + + barrier(CLK_LOCAL_MEM_FENCE); // calculate the min and max within the current workgroup - if(threadId == 0){ - workgroup_min[workgroupId] = -1.0f; - workgroup_max[workgroupId] = 1.0f; + if(threadId == 0){ + float min = workgroup_p[0]; + float max = workgroup_p[0]; + for(int i = 1; i < 16*16; i++){ + if(workgroup_p[i] < min){ + min = workgroup_p[i]; + } + if(workgroup_p[i] > max){ + max = workgroup_p[i]; + } + } + workgroup_min[workgroupId] = min; + workgroup_max[workgroupId] = max; } } \ No newline at end of file