Compare commits
2 commits
1abc2611c7
...
f1cf36352f
Author | SHA1 | Date | |
---|---|---|---|
f1cf36352f | |||
1b568754ae |
3 changed files with 225 additions and 15 deletions
116
.gitignore
vendored
116
.gitignore
vendored
|
@ -38,3 +38,119 @@ CMakeSettings.json
|
||||||
build/*
|
build/*
|
||||||
bin/*
|
bin/*
|
||||||
*.DS_Store
|
*.DS_Store
|
||||||
|
|
||||||
|
# ---> JetBrains
|
||||||
|
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||||
|
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||||
|
|
||||||
|
# User-specific stuff
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# CMake
|
||||||
|
cmake-build-*/
|
||||||
|
|
||||||
|
# File-based project format
|
||||||
|
*.iws
|
||||||
|
|
||||||
|
# IntelliJ
|
||||||
|
out/
|
||||||
|
|
||||||
|
# mpeltonen/sbt-idea plugin
|
||||||
|
.idea_modules/
|
||||||
|
|
||||||
|
# JIRA plugin
|
||||||
|
atlassian-ide-plugin.xml
|
||||||
|
|
||||||
|
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||||
|
com_crashlytics_export_strings.xml
|
||||||
|
crashlytics.properties
|
||||||
|
crashlytics-build.properties
|
||||||
|
fabric.properties
|
||||||
|
|
||||||
|
# ---> C
|
||||||
|
# Prerequisites
|
||||||
|
*.d
|
||||||
|
|
||||||
|
# Object files
|
||||||
|
*.o
|
||||||
|
*.ko
|
||||||
|
*.obj
|
||||||
|
*.elf
|
||||||
|
|
||||||
|
# Linker output
|
||||||
|
*.ilk
|
||||||
|
*.map
|
||||||
|
*.exp
|
||||||
|
|
||||||
|
# Precompiled Headers
|
||||||
|
*.gch
|
||||||
|
*.pch
|
||||||
|
|
||||||
|
# Libraries
|
||||||
|
*.lib
|
||||||
|
*.a
|
||||||
|
*.la
|
||||||
|
*.lo
|
||||||
|
|
||||||
|
# Shared objects (inc. Windows DLLs)
|
||||||
|
*.dll
|
||||||
|
*.so
|
||||||
|
*.so.*
|
||||||
|
*.dylib
|
||||||
|
|
||||||
|
# Executables
|
||||||
|
*.exe
|
||||||
|
*.out
|
||||||
|
*.app
|
||||||
|
*.i*86
|
||||||
|
*.x86_64
|
||||||
|
*.hex
|
||||||
|
|
||||||
|
# Debug files
|
||||||
|
*.dSYM/
|
||||||
|
*.su
|
||||||
|
*.idb
|
||||||
|
*.pdb
|
||||||
|
|
||||||
|
# Kernel Module Compile Results
|
||||||
|
*.mod*
|
||||||
|
*.cmd
|
||||||
|
.tmp_versions/
|
||||||
|
modules.order
|
||||||
|
Module.symvers
|
||||||
|
Mkfile.old
|
||||||
|
dkms.conf
|
||||||
|
|
||||||
|
# ---> C++
|
||||||
|
# Prerequisites
|
||||||
|
*.d
|
||||||
|
|
||||||
|
# Compiled Object files
|
||||||
|
*.slo
|
||||||
|
*.lo
|
||||||
|
*.o
|
||||||
|
*.obj
|
||||||
|
|
||||||
|
# Precompiled Headers
|
||||||
|
*.gch
|
||||||
|
*.pch
|
||||||
|
|
||||||
|
# Compiled Dynamic libraries
|
||||||
|
*.so
|
||||||
|
*.dylib
|
||||||
|
*.dll
|
||||||
|
|
||||||
|
# Fortran module files
|
||||||
|
*.mod
|
||||||
|
*.smod
|
||||||
|
|
||||||
|
# Compiled Static libraries
|
||||||
|
*.lai
|
||||||
|
*.la
|
||||||
|
*.a
|
||||||
|
*.lib
|
||||||
|
|
||||||
|
# Executables
|
||||||
|
*.exe
|
||||||
|
*.out
|
||||||
|
*.app
|
||||||
|
|
|
@ -91,6 +91,43 @@ void GpuPerlinNoise::Run() {
|
||||||
// Use OpenCL to calculate the perlin noise value for each pixel of outputImage
|
// Use OpenCL to calculate the perlin noise value for each pixel of outputImage
|
||||||
// --------------
|
// --------------
|
||||||
|
|
||||||
|
/* Initialize the host and device memory, that will be necessary to store the inputs and outputs. */
|
||||||
|
// Host memory is already set in the code above.
|
||||||
|
// buffer type = CL_MEM_READ_WRITE or CL_MEM_READ_ONLY or CL_MEM_WRITE_ONLY
|
||||||
|
cl::Buffer outputNoise_device(context, CL_MEM_WRITE_ONLY, sizeof(float) * nrPixels);
|
||||||
|
cl::Buffer workgroupMin_device(context, CL_MEM_WRITE_ONLY, sizeof(float) * nrWorkgroups);
|
||||||
|
cl::Buffer workgroupMax_device(context, CL_MEM_WRITE_ONLY, sizeof(float) * nrWorkgroups);
|
||||||
|
|
||||||
|
/* Initialize the buffers using host data, so "Write Buffer" */
|
||||||
|
// None
|
||||||
|
|
||||||
|
/* Setup before launching the kernel on the device */
|
||||||
|
cl::KernelFunctor<int, int, cl::Buffer, cl::Buffer, cl::Buffer> kernelFunctor(program, "perlin_noise_texture");
|
||||||
|
cl::NDRange rangeLocal(workgroupWidth, workgroupHeight);
|
||||||
|
cl::NDRange rangeGlobal(width, height);
|
||||||
|
cl::EnqueueArgs enqueArgs(queue, rangeGlobal, rangeLocal);
|
||||||
|
|
||||||
|
/* Launch the kernel on the device and wait until it's done */
|
||||||
|
cl::Event event = kernelFunctor(enqueArgs, width, height, outputNoise_device, workgroupMin_device, workgroupMax_device);
|
||||||
|
|
||||||
|
/* Read the data from the device buffer into the host vector */
|
||||||
|
queue.enqueueReadBuffer(outputNoise_device, CL_TRUE, 0, sizeof(float) * nrPixels, outputNoise.data());
|
||||||
|
queue.enqueueReadBuffer(workgroupMin_device, CL_TRUE, 0, sizeof(float) * nrWorkgroups, workgroupMin.data());
|
||||||
|
queue.enqueueReadBuffer(workgroupMax_device, CL_TRUE, 0, sizeof(float) * nrWorkgroups, workgroupMax.data());
|
||||||
|
|
||||||
|
/* Global min and max */
|
||||||
|
float globalMin = *std::min_element(workgroupMin.begin(), workgroupMin.end());
|
||||||
|
float globalMax = *std::max_element(workgroupMax.begin(), workgroupMax.end());
|
||||||
|
/* Rescale outputNoise to [0, 255] */
|
||||||
|
for (int i = 0; i < nrPixels; i++) {
|
||||||
|
outputImage[i] = static_cast<unsigned char>(255 * (outputNoise[i] - globalMin) / (globalMax - globalMin));
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------
|
||||||
|
printf("Debugging: min: %f, max: %f\n", workgroupMin[0], workgroupMax[0]);
|
||||||
|
printf("Debugging: gloabl_min: %f, global_max: %f\n", globalMin, globalMax);
|
||||||
|
// --------------
|
||||||
|
|
||||||
// write to PNG
|
// write to PNG
|
||||||
std::string pngPath = "perlin.png";
|
std::string pngPath = "perlin.png";
|
||||||
bool ret = stbi_write_png(pngPath.c_str(), width, height, 1, outputImage.data(), width);
|
bool ret = stbi_write_png(pngPath.c_str(), width, height, 1, outputImage.data(), width);
|
||||||
|
|
|
@ -4,37 +4,94 @@
|
||||||
// REVERT ALL FUNCTION DECLARATIONS to how they were in the beginning BEFORE HANDING IN
|
// REVERT ALL FUNCTION DECLARATIONS to how they were in the beginning BEFORE HANDING IN
|
||||||
// -------------------------------------
|
// -------------------------------------
|
||||||
|
|
||||||
float2 smooth_step(float2 t){
|
float smooth_step(float t){
|
||||||
return t;
|
return 3.0f * t * t - 2.0f * t * t * t;
|
||||||
}
|
}
|
||||||
|
|
||||||
// generates a pseudo-random float in [0,1[ from 2 floats
|
// generates a pseudo-random float in [0,1[ from 2 floats
|
||||||
float pseudo_rand(float2 seed){
|
float pseudo_rand(float2 seed){
|
||||||
return 0;
|
float temp = sin(dot(seed, (float2)(12.9898, 78.233))) * 43758.5453;
|
||||||
|
return temp - floor(temp);
|
||||||
}
|
}
|
||||||
|
|
||||||
// generates a pseudo-random 2D unit vector from 2 floats
|
// generates a pseudo-random 2D unit vector from 2 floats
|
||||||
// uses: pseudo_rand()
|
// uses: pseudo_rand()
|
||||||
float2 random_direction(float2 xy){
|
float2 random_direction(float2 xy){
|
||||||
return (float2)(0,0);
|
float angle = pseudo_rand(xy) * 2.0f * M_PI;
|
||||||
|
return (float2)(cos(angle), sin(angle));
|
||||||
|
}
|
||||||
|
|
||||||
|
float linear_interpolate(float a, float b, float t){
|
||||||
|
return a * (1.0f - t) + b * t;
|
||||||
|
}
|
||||||
|
|
||||||
|
float interpolate(float a, float b, float t){
|
||||||
|
return linear_interpolate(a, b, smooth_step(t));
|
||||||
}
|
}
|
||||||
|
|
||||||
void kernel perlin_noise_texture(const int width, const int height, global float* output_noise, global float* workgroup_min, global float* workgroup_max){
|
void kernel perlin_noise_texture(const int width, const int height, global float* output_noise, global float* workgroup_min, global float* workgroup_max){
|
||||||
|
const int col = get_global_id(0); // column of current pixel
|
||||||
|
const int row = get_global_id(1); // row of current pixel
|
||||||
|
const int pixelId = row * width + col; // index of output_noise to assign
|
||||||
|
const int threadId = get_local_id(1) * get_local_size(0) + get_local_id(0); // index of current thread within its workgroup
|
||||||
|
const int workgroupId = get_group_id(1) * get_num_groups(0) + get_group_id(0); // index of current workgroup
|
||||||
|
const float scale = 0.05f; // feel free to play with this
|
||||||
|
float2 xy = (float2)(col * scale, row * scale); // (x,y) input for perlin noise algorithm
|
||||||
|
|
||||||
const int col = 0; // column of current pixel
|
printf("col: %d, row: %d, pixeldId: %d, threadId: %d, workgroupId: %d\n", col, row, pixelId, threadId, workgroupId);
|
||||||
const int row = 0; // row of current pixel
|
|
||||||
const int pixelId = row * width + col; // index of output_noise to assign
|
local float workgroup_p[16*16];
|
||||||
const int threadId = 0; // index of current thread within its workgroup
|
|
||||||
const int workgroupId = 0; // index of current workgroup
|
float x0 = floor(xy.x);
|
||||||
const float scale = 0.05f; // feel free to play with this
|
float y0 = floor(xy.y);
|
||||||
float2 xy = (float2)(col * scale, row * scale); // (x,y) input for perlin noise algorithm
|
float x1 = x0 + 1;
|
||||||
|
float y1 = y0 + 1;
|
||||||
|
|
||||||
|
float2 c00 = (float2)(x0, y0);
|
||||||
|
float2 c10 = (float2)(x1, y0);
|
||||||
|
float2 c01 = (float2)(x0, y1);
|
||||||
|
float2 c11 = (float2)(x1, y1);
|
||||||
|
|
||||||
|
float2 g00 = random_direction(c00);
|
||||||
|
float2 g10 = random_direction(c10);
|
||||||
|
float2 g01 = random_direction(c01);
|
||||||
|
float2 g11 = random_direction(c11);
|
||||||
|
|
||||||
|
float2 d00 = xy - c00;
|
||||||
|
float2 d10 = xy - c10;
|
||||||
|
float2 d01 = xy - c01;
|
||||||
|
float2 d11 = xy - c11;
|
||||||
|
|
||||||
|
float p00 = dot(g00, d00);
|
||||||
|
float p10 = dot(g10, d10);
|
||||||
|
float p01 = dot(g01, d01);
|
||||||
|
float p11 = dot(g11, d11);
|
||||||
|
|
||||||
|
const float u = xy.x - x0;
|
||||||
|
const float v = xy.y - y0;
|
||||||
|
const float i0 = interpolate(p00, p10, u);
|
||||||
|
const float i1 = interpolate(p01, p11, u);
|
||||||
|
|
||||||
// calculate the perlin noise for pixelId
|
// calculate the perlin noise for pixelId
|
||||||
output_noise[pixelId] = 0;
|
float perlin_P = interpolate(i0, i1, v);
|
||||||
|
output_noise[pixelId] = perlin_P;
|
||||||
|
workgroup_p[threadId] = perlin_P;
|
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
// calculate the min and max within the current workgroup
|
// calculate the min and max within the current workgroup
|
||||||
if(threadId == 0){
|
if(threadId == 0){
|
||||||
workgroup_min[workgroupId] = -1.0f;
|
float min = workgroup_p[0];
|
||||||
workgroup_max[workgroupId] = 1.0f;
|
float max = workgroup_p[0];
|
||||||
|
for(int i = 1; i < 16*16; i++){
|
||||||
|
if(workgroup_p[i] < min){
|
||||||
|
min = workgroup_p[i];
|
||||||
|
}
|
||||||
|
if(workgroup_p[i] > max){
|
||||||
|
max = workgroup_p[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
workgroup_min[workgroupId] = min;
|
||||||
|
workgroup_max[workgroupId] = max;
|
||||||
}
|
}
|
||||||
}
|
}
|
Reference in a new issue