do some benchmarking
This commit is contained in:
parent
e153201ff2
commit
cb5ab7349d
123
README.md
123
README.md
@ -1,3 +1,126 @@
|
|||||||
# Ray Tracing in One Weekend (CUDA edition)
|
# Ray Tracing in One Weekend (CUDA edition)
|
||||||
|
|
||||||
Tutorial: https://github.com/rogerallen/raytracinginoneweekendincuda
|
Tutorial: https://github.com/rogerallen/raytracinginoneweekendincuda
|
||||||
|
|
||||||
|
GPU Used: NVIDIA GeForce GTX 1660 Super
|
||||||
|
CUDA Cores: 1408
|
||||||
|
|
||||||
|
## Parameter sets
|
||||||
|
|
||||||
|
1:
|
||||||
|
* Resolution: 300x300
|
||||||
|
* Thread block size: 8x8
|
||||||
|
* Samples per pixel: 8
|
||||||
|
* Number of additional objects: 0
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 0.0085s
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
2:
|
||||||
|
* Resolution: 600x600
|
||||||
|
* Thread block size: 4x4
|
||||||
|
* Samples per pixel: 50
|
||||||
|
* Number of additional objects: 200
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 2.12857s
|
||||||
|
|
||||||
|
3:
|
||||||
|
* Resolution: 600x600
|
||||||
|
* Thread block size: 16x16
|
||||||
|
* Samples per pixel: 50
|
||||||
|
* Number of additional objects: 200
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 2.4912s
|
||||||
|
|
||||||
|
4:
|
||||||
|
* Resolution: 600x600
|
||||||
|
* Thread block size: 24x24
|
||||||
|
* Samples per pixel: 50
|
||||||
|
* Number of additional objects: 200
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 1.81483s
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
5:
|
||||||
|
* Resolution: 1000x1000
|
||||||
|
* Thread block size: 24x24
|
||||||
|
* Samples per pixel: 100
|
||||||
|
* Number of additional objects: 300
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 14.7335s
|
||||||
|
|
||||||
|
6:
|
||||||
|
* Resolution: 1000x1000
|
||||||
|
* Thread block size: 26x26
|
||||||
|
* Samples per pixel: 100
|
||||||
|
* Number of additional objects: 300
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 17.4273s
|
||||||
|
|
||||||
|
7:
|
||||||
|
* Resolution: 1000x1000
|
||||||
|
* Thread block size: 4x4
|
||||||
|
* Samples per pixel: 100
|
||||||
|
* Number of additional objects: 300
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 19.0303s
|
||||||
|
|
||||||
|
8:
|
||||||
|
* Resolution: 1000x1000
|
||||||
|
* Thread block size: 16x16
|
||||||
|
* Samples per pixel: 100
|
||||||
|
* Number of additional objects: 300
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 23.2295s
|
||||||
|
|
||||||
|
9:
|
||||||
|
* Resolution: 1000x1000
|
||||||
|
* Thread block size: 20x20
|
||||||
|
* Samples per pixel: 100
|
||||||
|
* Number of additional objects: 300
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 23.6136s
|
||||||
|
|
||||||
|
10:
|
||||||
|
* Resolution: 1000x1000
|
||||||
|
* Thread block size: 12x12
|
||||||
|
* Samples per pixel: 100
|
||||||
|
* Number of additional objects: 300
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 27.5289s
|
||||||
|
|
||||||
|
10:
|
||||||
|
* Resolution: 1000x1000
|
||||||
|
* Thread block size: 29x29
|
||||||
|
* Samples per pixel: 100
|
||||||
|
* Number of additional objects: 300
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 28.6761s
|
||||||
|
|
||||||
|
11:
|
||||||
|
* Resolution: 1000x1000
|
||||||
|
* Thread block size: 8x8
|
||||||
|
* Samples per pixel: 100
|
||||||
|
* Number of additional objects: 300
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 33.2001s
|
||||||
|
|
||||||
|
12:
|
||||||
|
* Resolution: 1000x1000
|
||||||
|
* Thread block size: 2x2
|
||||||
|
* Samples per pixel: 100
|
||||||
|
* Number of additional objects: 300
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 55.2398s
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
13:
|
||||||
|
* Resolution: 2400x1200
|
||||||
|
* Thread block size: 16x16
|
||||||
|
* Samples per pixel: 100
|
||||||
|
* Number of additional objects: 500
|
||||||
|
* Maximum number of bounces: 50
|
||||||
|
* Time to render: 124.928s
|
||||||
|
55
src/main.cu
55
src/main.cu
@ -12,6 +12,8 @@
|
|||||||
#include "camera.cpp"
|
#include "camera.cpp"
|
||||||
#include "hitable_list.cpp"
|
#include "hitable_list.cpp"
|
||||||
|
|
||||||
|
#define MAX_BOUNCES 50
|
||||||
|
|
||||||
#define checkCudaErrors(val) check_cuda((val), #val, __FILE__, __LINE__)
|
#define checkCudaErrors(val) check_cuda((val), #val, __FILE__, __LINE__)
|
||||||
|
|
||||||
void check_cuda(cudaError_t result, const char *func, const char *file, int line) {
|
void check_cuda(cudaError_t result, const char *func, const char *file, int line) {
|
||||||
@ -35,7 +37,7 @@ __device__ bool hit_sphere(const vec3& center, float radius, const ray& r) {
|
|||||||
__device__ vec3 color(const ray& r, hitable **world, curandState *local_rand_state) {
|
__device__ vec3 color(const ray& r, hitable **world, curandState *local_rand_state) {
|
||||||
ray cur_ray = r;
|
ray cur_ray = r;
|
||||||
vec3 cur_attenuation = vec3(1,1,1);
|
vec3 cur_attenuation = vec3(1,1,1);
|
||||||
for (int i = 0; i < 50; i++) {
|
for (int i = 0; i < MAX_BOUNCES; i++) {
|
||||||
hit_record rec;
|
hit_record rec;
|
||||||
if ((*world)->hit(cur_ray, 0.001f, FLT_MAX, rec)) {
|
if ((*world)->hit(cur_ray, 0.001f, FLT_MAX, rec)) {
|
||||||
ray scattered;
|
ray scattered;
|
||||||
@ -90,7 +92,7 @@ __global__ void render(vec3 *fb,
|
|||||||
ray r = (*cam)->get_ray(u,v, &local_rand_state);
|
ray r = (*cam)->get_ray(u,v, &local_rand_state);
|
||||||
col += color(r, world, &local_rand_state);
|
col += color(r, world, &local_rand_state);
|
||||||
}
|
}
|
||||||
rand_state[pixel_idx] = local_rand_state;
|
// rand_state[pixel_idx] = local_rand_state;
|
||||||
|
|
||||||
col /= float(ns);
|
col /= float(ns);
|
||||||
col[0] = sqrt(col[0]);
|
col[0] = sqrt(col[0]);
|
||||||
@ -99,7 +101,7 @@ __global__ void render(vec3 *fb,
|
|||||||
fb[pixel_idx] = col;
|
fb[pixel_idx] = col;
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void create_world(hitable **d_list, int d_list_size, hitable **d_world, camera **d_camera, int nx, int ny, curandState *rand_state) {
|
__global__ void create_world(hitable **d_list, int d_list_size, hitable **d_world, camera **d_camera, int nx, int ny, curandState *rand_state, int ball_count) {
|
||||||
if (!(threadIdx.x == 0 && blockIdx.y == 0)) return;
|
if (!(threadIdx.x == 0 && blockIdx.y == 0)) return;
|
||||||
|
|
||||||
curandState local_rand_state = *rand_state;
|
curandState local_rand_state = *rand_state;
|
||||||
@ -108,10 +110,24 @@ __global__ void create_world(hitable **d_list, int d_list_size, hitable **d_worl
|
|||||||
#define RND() curand_uniform(&local_rand_state)
|
#define RND() curand_uniform(&local_rand_state)
|
||||||
|
|
||||||
int idx = 1;
|
int idx = 1;
|
||||||
for (int a = -11; a < 11; a++) {
|
// for (int a = -11; a < 11; a++) {
|
||||||
for (int b = -11; b < 11; b++) {
|
// for (int b = -11; b < 11; b++) {
|
||||||
|
// float choose_mat = RND();
|
||||||
|
// vec3 center(a + RND(), 0.2, b + RND());
|
||||||
|
// material *mat;
|
||||||
|
// if (choose_mat < 0.8f) {
|
||||||
|
// mat = new lambertian(vec3(RND()*RND(), RND()*RND(), RND()*RND()));
|
||||||
|
// } else if (choose_mat < 0.95f) {
|
||||||
|
// mat = new metal(vec3(0.5f*(1.0f+RND()), 0.5f*(1.0f+RND()), 0.5f*(1.0f+RND())), 0.5f*RND());
|
||||||
|
// } else {
|
||||||
|
// mat = new dielectric(1.5);
|
||||||
|
// }
|
||||||
|
// d_list[idx++] = new sphere(center, 0.2, mat);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
for (int i = 0; i < ball_count; i++) {
|
||||||
float choose_mat = RND();
|
float choose_mat = RND();
|
||||||
vec3 center(a + RND(), 0.2, b + RND());
|
vec3 center(RND()*22.0f-11.0f, 0.2f, RND()*22.0f-11.0f);
|
||||||
material *mat;
|
material *mat;
|
||||||
if (choose_mat < 0.8f) {
|
if (choose_mat < 0.8f) {
|
||||||
mat = new lambertian(vec3(RND()*RND(), RND()*RND(), RND()*RND()));
|
mat = new lambertian(vec3(RND()*RND(), RND()*RND(), RND()*RND()));
|
||||||
@ -122,7 +138,6 @@ __global__ void create_world(hitable **d_list, int d_list_size, hitable **d_worl
|
|||||||
}
|
}
|
||||||
d_list[idx++] = new sphere(center, 0.2, mat);
|
d_list[idx++] = new sphere(center, 0.2, mat);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#undef RND
|
#undef RND
|
||||||
|
|
||||||
@ -160,11 +175,14 @@ __global__ void free_world(hitable **d_list, int d_list_size, hitable **d_world,
|
|||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
const char *image_filename = "out.ppm";
|
const char *image_filename = "out.ppm";
|
||||||
int nx = 1200;
|
int nx = 1000;
|
||||||
int ny = 600;
|
int ny = 1000;
|
||||||
int ns = 20;
|
int ns = 100;
|
||||||
int tx = 16;
|
int tx = 24;
|
||||||
int ty = 16;
|
int ty = 24;
|
||||||
|
int number_of_balls = 300;
|
||||||
|
|
||||||
|
int runs = 5;
|
||||||
|
|
||||||
std::cout << "Rendering a " << nx << "x" << ny << " image ";
|
std::cout << "Rendering a " << nx << "x" << ny << " image ";
|
||||||
std::cout << "in " << tx << "x" << ty << " blocks.\n";
|
std::cout << "in " << tx << "x" << ty << " blocks.\n";
|
||||||
@ -183,23 +201,28 @@ int main() {
|
|||||||
checkCudaErrors(cudaMalloc((void **)&d_rand_world_state, num_pixels*sizeof(curandState)));
|
checkCudaErrors(cudaMalloc((void **)&d_rand_world_state, num_pixels*sizeof(curandState)));
|
||||||
|
|
||||||
// Initialize world rand state
|
// Initialize world rand state
|
||||||
|
std::cout << "Initializing world random\n";
|
||||||
rand_init<<<1,1>>>(d_rand_world_state);
|
rand_init<<<1,1>>>(d_rand_world_state);
|
||||||
checkCudaErrors(cudaGetLastError());
|
checkCudaErrors(cudaGetLastError());
|
||||||
checkCudaErrors(cudaDeviceSynchronize());
|
checkCudaErrors(cudaDeviceSynchronize());
|
||||||
|
|
||||||
// populate world
|
// populate world
|
||||||
|
std::cout << "Populating world\n";
|
||||||
hitable **d_list;
|
hitable **d_list;
|
||||||
int d_list_size = 22*22 + 1 + 3;
|
int d_list_size = number_of_balls + 1 + 3;
|
||||||
checkCudaErrors(cudaMalloc((void **)&d_list, d_list_size*sizeof(hitable *)));
|
checkCudaErrors(cudaMalloc((void **)&d_list, d_list_size*sizeof(hitable *)));
|
||||||
hitable **d_world;
|
hitable **d_world;
|
||||||
checkCudaErrors(cudaMalloc((void **)&d_world, sizeof(hitable *)));
|
checkCudaErrors(cudaMalloc((void **)&d_world, sizeof(hitable *)));
|
||||||
camera **d_camera;
|
camera **d_camera;
|
||||||
checkCudaErrors(cudaMalloc((void **)&d_camera, sizeof(camera *)));
|
checkCudaErrors(cudaMalloc((void **)&d_camera, sizeof(camera *)));
|
||||||
create_world<<<1,1>>>(d_list, d_list_size, d_world, d_camera, nx, ny, d_rand_world_state);
|
create_world<<<1,1>>>(d_list, d_list_size, d_world, d_camera, nx, ny, d_rand_world_state, number_of_balls);
|
||||||
checkCudaErrors(cudaGetLastError());
|
checkCudaErrors(cudaGetLastError());
|
||||||
checkCudaErrors(cudaDeviceSynchronize());
|
checkCudaErrors(cudaDeviceSynchronize());
|
||||||
|
|
||||||
// Render frame buffer
|
// Render frame buffer
|
||||||
|
std::cout << "Started rendering\n";
|
||||||
|
double average_time = 0;
|
||||||
|
for (int i = 0; i < runs; i++) {
|
||||||
clock_t start = clock();
|
clock_t start = clock();
|
||||||
{
|
{
|
||||||
dim3 blocks(nx/tx+1, ny/ty+1);
|
dim3 blocks(nx/tx+1, ny/ty+1);
|
||||||
@ -214,6 +237,10 @@ int main() {
|
|||||||
clock_t stop = clock();
|
clock_t stop = clock();
|
||||||
double timer_seconds = ((double)(stop - start)) / CLOCKS_PER_SEC;
|
double timer_seconds = ((double)(stop - start)) / CLOCKS_PER_SEC;
|
||||||
std::cout << "took " << timer_seconds << " seconds.\n";
|
std::cout << "took " << timer_seconds << " seconds.\n";
|
||||||
|
average_time += timer_seconds;
|
||||||
|
}
|
||||||
|
average_time /= runs;
|
||||||
|
std::cout << "average time: " << average_time << " seconds.\n";
|
||||||
|
|
||||||
// Saveing frame buffer
|
// Saveing frame buffer
|
||||||
FILE *f = fopen(image_filename, "w");
|
FILE *f = fopen(image_filename, "w");
|
||||||
|
Loading…
Reference in New Issue
Block a user