diff --git a/Makefile b/Makefile index 2d5508a..6517056 100644 --- a/Makefile +++ b/Makefile @@ -3,15 +3,15 @@ HOST_COMPILER = g++ NVCC = $(CUDA_PATH)/bin/nvcc # select one of these for Debug vs. Release -NVCC_DBG = -g -G -#NVCC_DBG = +#NVCC_DBG = -g -G +NVCC_DBG = NVCCFLAGS = $(NVCC_DBG) -m64 GENCODE_FLAGS = --gpu-architecture=compute_50 --gpu-code=compute_50,sm_50,sm_52 BUILD_DIR = build SRCS = src/main.cu -INCS = src/vec3.cpp src/ray.cpp +INCS = src/*.cpp $(BUILD_DIR)/cudart: $(BUILD_DIR)/cudart.o $(NVCC) $(NVCCFLAGS) $(GENCODE_FLAGS) -o $(BUILD_DIR)/cudart $(BUILD_DIR)/cudart.o diff --git a/src/camera.cpp b/src/camera.cpp new file mode 100644 index 0000000..49e3bf5 --- /dev/null +++ b/src/camera.cpp @@ -0,0 +1,21 @@ +#pragma once + +#include "ray.cpp" + +class camera { +public: + __device__ camera() { + lower_left_corner = vec3(-2.0, -1.0, -1.0); + horizontal = vec3( 4.0, 0.0, 0.0); + vertical = vec3( 0.0, 2.0, 0.0); + origin = vec3( 0.0, 0.0, 0.0); + } + __device__ ray get_ray(float u, float v) { + return ray(origin, lower_left_corner + u*horizontal + v*vertical - origin); + } + + vec3 origin; + vec3 lower_left_corner; + vec3 horizontal; + vec3 vertical; +}; diff --git a/src/hitable.cpp b/src/hitable.cpp index 880b7a4..ee45128 100644 --- a/src/hitable.cpp +++ b/src/hitable.cpp @@ -2,10 +2,13 @@ #include "ray.cpp" +class material; + struct hit_record { float t; vec3 p; vec3 normal; + material *mat; }; class hitable { diff --git a/src/main.cu b/src/main.cu index 40b4eaf..e034139 100644 --- a/src/main.cu +++ b/src/main.cu @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -7,6 +8,8 @@ #include "vec3.cpp" #include "ray.cpp" #include "sphere.cpp" +#include "material.cpp" +#include "camera.cpp" #include "hitable_list.cpp" #define checkCudaErrors(val) check_cuda((val), #val, __FILE__, __LINE__) @@ -29,54 +32,94 @@ __device__ bool hit_sphere(const vec3& center, float radius, const ray& r) { return (discriminant > 0.0); } -__device__ vec3 color(const ray& r, hitable **world) { - hit_record rec; - if ((*world)->hit(r, 0.0, FLT_MAX, rec)) { - return 0.5f*vec3(rec.normal.x()+1.0f, rec.normal.y()+1.0f, rec.normal.z()+1.0f); +__device__ vec3 color(const ray& r, hitable **world, curandState *local_rand_state) { + ray cur_ray = r; + vec3 cur_attenuation = vec3(1,1,1); + for (int i = 0; i < 50; i++) { + hit_record rec; + if ((*world)->hit(cur_ray, 0.001f, FLT_MAX, rec)) { + ray scattered; + vec3 attenuation; + if (rec.mat->scatter(cur_ray, rec, attenuation, scattered, local_rand_state)) { + cur_attenuation *= attenuation; + cur_ray = scattered; + } else { + return vec3(0,0,0); + } + } else { + vec3 unit_direction = unit_vector(r.direction()); + float t = 0.5f * (unit_direction.y() + 1.0f); + vec3 c = (1.0f - t)*vec3(1.0,1.0,1.0) + t*vec3(0.5, 0.7, 1.0); + return cur_attenuation * c; + } } + return vec3(0, 0, 0); +} - vec3 unit_direction = unit_vector(r.direction()); - float t = 0.5f * (unit_direction.y() + 1.0f); - return (1.0f - t)*vec3(1.0,1.0,1.0) + t*vec3(0.5, 0.7, 1.0); +__global__ void render_init(int max_x, int max_y, curandState *rand_state) { + int x = threadIdx.x + blockIdx.x * blockDim.x; + int y = threadIdx.y + blockIdx.y * blockDim.y; + if ((x >= max_x) || (y >= max_y)) return; + int pixel_idx = y*max_x + x; + + curand_init(2002, pixel_idx, 0, &rand_state[pixel_idx]); } __global__ void render(vec3 *fb, int max_x, int max_y, - vec3 lower_left_corner, vec3 horizontal, vec3 vertical, - vec3 origin, - hitable **world) { + int ns, + camera **cam, + hitable **world, + curandState *rand_state) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if ((x >= max_x) || (y >= max_y)) return; int pixel_idx = y*max_x + x; - float u = float(x) / max_x; - float v = float(y) / max_y; - ray r(origin, lower_left_corner + u*horizontal + v*vertical); - fb[pixel_idx] = color(r, world); + curandState local_rand_state = rand_state[pixel_idx]; + vec3 col(0,0,0); + for (int s = 0; s < ns; s++) { + float u = float(x + curand_uniform(&local_rand_state)) / max_x; + float v = float(y + curand_uniform(&local_rand_state)) / max_y; + ray r = (*cam)->get_ray(u,v); + col += color(r, world, &local_rand_state); + } + rand_state[pixel_idx] = local_rand_state; + + col /= float(ns); + col[0] = sqrt(col[0]); + col[1] = sqrt(col[1]); + col[2] = sqrt(col[2]); + fb[pixel_idx] = col; } -__global__ void create_world(hitable **d_list, int d_list_size, hitable **d_world) { +__global__ void create_world(hitable **d_list, int d_list_size, hitable **d_world, camera **d_camera) { if (threadIdx.x == 0 && blockIdx.y == 0) { - d_list[0] = new sphere(vec3(0,0, -1), 0.5); - d_list[1] = new sphere(vec3(0,-100.5, -1), 100); + d_list[0] = new sphere(vec3( 0, 0 , -1), 0.5, new lambertian(vec3(0.8, 0.3, 0.3))); + d_list[1] = new sphere(vec3( 0, -100.5, -1), 100, new lambertian(vec3(0.8, 0.8, 0.0))); + d_list[2] = new sphere(vec3( 1, 0 , -1), 0.5, new metal(vec3(0.8, 0.6, 0.2), 1.0)); + d_list[3] = new sphere(vec3(-1, 0 , -1), 0.5, new metal(vec3(0.8, 0.8, 0.8), 0.3)); *d_world = new hitable_list(d_list, d_list_size); + *d_camera = new camera(); } } -__global__ void free_world(hitable **d_list, int d_list_size, hitable **d_world) { +__global__ void free_world(hitable **d_list, int d_list_size, hitable **d_world, camera **d_camera) { for (int i = 0; i < d_list_size; i++) { - delete d_list[i]; + delete &((sphere*)d_list[i])->mat; + delete &d_list[i]; } - delete *d_world; + delete &d_world; + delete d_camera; } int main() { const char *image_filename = "out.ppm"; int nx = 1200; int ny = 600; - int tx = 8; - int ty = 8; + int ns = 100; + int tx = 16; + int ty = 16; std::cout << "Rendering a " << nx << "x" << ny << " image "; std::cout << "in " << tx << "x" << ty << " blocks.\n"; @@ -88,13 +131,21 @@ int main() { vec3 *fb; checkCudaErrors(cudaMallocManaged(&fb, fb_size)); + // allocate random state + curandState *d_rand_state; + checkCudaErrors(cudaMalloc((void **)&d_rand_state, num_pixels*sizeof(curandState))); + // populate world hitable **d_list; - int d_list_size = 2; + int d_list_size = 4; checkCudaErrors(cudaMalloc((void **)&d_list, d_list_size*sizeof(hitable *))); hitable **d_world; checkCudaErrors(cudaMalloc((void **)&d_world, sizeof(hitable *))); - create_world<<<1,1>>>(d_list, d_list_size, d_world); + camera **d_camera; + checkCudaErrors(cudaMalloc((void **)&d_camera, sizeof(camera *))); + create_world<<<1,1>>>(d_list, d_list_size, d_world, d_camera); + checkCudaErrors(cudaGetLastError()); + checkCudaErrors(cudaDeviceSynchronize()); // Render frame buffer clock_t start = clock(); @@ -102,12 +153,9 @@ int main() { dim3 blocks(nx/tx+1, ny/ty+1); dim3 threads(tx, ty); - render<<>>(fb, nx, ny, - vec3(-2.0, -1.0, -1.0), - vec3(4.0, 0.0, 0.0), - vec3(0.0, 2.0, 0.0), - vec3(0.0, 0.0, 0.0), - d_world); + render_init<<>>(nx, ny, d_rand_state); + + render<<>>(fb, nx, ny, ns, d_camera, d_world, d_rand_state); checkCudaErrors(cudaGetLastError()); checkCudaErrors(cudaDeviceSynchronize()); } @@ -140,11 +188,13 @@ int main() { fclose(f); // Cleanup - free_world<<<1,1>>>(d_list, d_list_size, d_world); + free_world<<<1,1>>>(d_list, d_list_size, d_world, d_camera); checkCudaErrors(cudaGetLastError()); checkCudaErrors(cudaDeviceSynchronize()); checkCudaErrors(cudaFree(d_list)); checkCudaErrors(cudaFree(d_world)); + checkCudaErrors(cudaFree(d_camera)); + checkCudaErrors(cudaFree(d_rand_state)); checkCudaErrors(cudaFree(fb)); cudaDeviceReset(); diff --git a/src/material.cpp b/src/material.cpp new file mode 100644 index 0000000..0cf9c3f --- /dev/null +++ b/src/material.cpp @@ -0,0 +1,60 @@ +#pragma once + +struct hit_record; + +#include "ray.cpp" +#include "hitable.cpp" + +__device__ inline vec3 rand_vec3(curandState *state) { + return vec3(curand_uniform(state), curand_uniform(state), curand_uniform(state)); +} + +__device__ vec3 random_in_unit_sphere(curandState *local_rand_state) { + vec3 p; + do { + p = 2*rand_vec3(local_rand_state) - vec3(1,1,1); + } while (p.squared_length() >= 1.0f); + return p; +} + +__device__ vec3 reflect(const vec3& v, const vec3& n) { + return v - 2.0f*dot(v,n)*n; +} + +class material { +public: + __device__ virtual bool scatter(const ray& r_in, const hit_record& rec, vec3& attenuation, ray& scattered, curandState *local_rand_state) const = 0; +}; + +class lambertian : public material { +public: + __device__ lambertian(const vec3& a) : albedo(a) {} + __device__ virtual bool scatter(const ray& r_in, const hit_record& rec, vec3& attenuation, ray& scattered, curandState *local_rand_state) const { + vec3 target = rec.p + rec.normal + random_in_unit_sphere(local_rand_state); + scattered = ray(rec.p, target - rec.p); + attenuation = albedo; + return true; + } + + vec3 albedo; +}; + +class metal : public material { +public: + __device__ metal(const vec3& a, float f) : albedo(a) { + if (f < 1) { + fuzz = f; + } else { + fuzz = 1; + } + } + __device__ virtual bool scatter(const ray& r_in, const hit_record& rec, vec3& attenuation, ray& scattered, curandState *local_rand_state) const { + vec3 relected = reflect(unit_vector(r_in.direction()), rec.normal); + scattered = ray(rec.p, relected + fuzz*random_in_unit_sphere(local_rand_state)); + attenuation = albedo; + return (dot(scattered.direction(), rec.normal) > 0.0f); + } + + vec3 albedo; + float fuzz; +}; diff --git a/src/sphere.cpp b/src/sphere.cpp index 8a7875f..680391a 100644 --- a/src/sphere.cpp +++ b/src/sphere.cpp @@ -1,14 +1,16 @@ #pragma once #include "hitable.cpp" +#include "material.cpp" class sphere: public hitable { public: __device__ sphere() {} - __device__ sphere(vec3 cen, float r) : center(cen), radius(r) {}; + __device__ sphere(vec3 cen, float r, material *m) : center(cen), radius(r), mat(m) {}; __device__ virtual bool hit(const ray& r, float t_min, float t_max, hit_record& rec) const; vec3 center; float radius; + material *mat; }; __device__ bool sphere::hit(const ray& r, float t_min, float t_max, hit_record& rec) const { @@ -23,6 +25,7 @@ __device__ bool sphere::hit(const ray& r, float t_min, float t_max, hit_record& rec.t = temp; rec.p = r.point_at_parameter(rec.t); rec.normal = (rec.p - center) / radius; + rec.mat = mat; return true; } @@ -31,6 +34,7 @@ __device__ bool sphere::hit(const ray& r, float t_min, float t_max, hit_record& rec.t = temp; rec.p = r.point_at_parameter(rec.t); rec.normal = (rec.p - center) / radius; + rec.mat = mat; return true; } } diff --git a/src/vec3.cpp b/src/vec3.cpp index 840af54..dc08e71 100644 --- a/src/vec3.cpp +++ b/src/vec3.cpp @@ -1,6 +1,7 @@ #pragma once -#include "cuda_runtime.h" +#include +#include #include #include #include @@ -37,8 +38,6 @@ public: float e[3]; }; - - inline std::istream& operator>>(std::istream &is, vec3 &t) { is >> t.e[0] >> t.e[1] >> t.e[2]; return is;