diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index 5229a64..0000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -cmake_minimum_required(VERSION 3.12) -project(raytracing-in-a-weekend) - -set(CMAKE_CXX_STANDARD 14) -enable_language(CUDA) - -add_executable(main src/main.cu) -set_target_properties(main PROPERTIES CUDA_ARCHITECTURES all-major) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2d5508a --- /dev/null +++ b/Makefile @@ -0,0 +1,41 @@ +CUDA_PATH ?= /usr/local/cuda +HOST_COMPILER = g++ +NVCC = $(CUDA_PATH)/bin/nvcc + +# select one of these for Debug vs. Release +NVCC_DBG = -g -G +#NVCC_DBG = + +NVCCFLAGS = $(NVCC_DBG) -m64 +GENCODE_FLAGS = --gpu-architecture=compute_50 --gpu-code=compute_50,sm_50,sm_52 + +BUILD_DIR = build +SRCS = src/main.cu +INCS = src/vec3.cpp src/ray.cpp + +$(BUILD_DIR)/cudart: $(BUILD_DIR)/cudart.o + $(NVCC) $(NVCCFLAGS) $(GENCODE_FLAGS) -o $(BUILD_DIR)/cudart $(BUILD_DIR)/cudart.o + +$(BUILD_DIR)/cudart.o: $(SRCS) $(INCS) + mkdir -p $(BUILD_DIR) + $(NVCC) $(NVCCFLAGS) $(GENCODE_FLAGS) -o $(BUILD_DIR)/cudart.o -c src/main.cu + +out.ppm: $(BUILD_DIR)/cudart + rm -f out.ppm + ./$(BUILD_DIR)/cudart + +out.jpg: out.ppm + rm -f out.jpg + ppmtojpeg out.ppm > out.jpg + +profile_basic: $(BUILD_DIR)/cudart + nvprof ./$(BUILD_DIR)/cudart + + +# use nvprof --query-metrics +profile_metrics: $(BUILD_DIR)/cudart + nvprof --metrics achieved_occupancy,inst_executed,inst_fp_32,inst_fp_64,inst_integer ./cudart + +clean: + rm -rf $(BUILD_DIR) + rm -f out.ppm out.jpg diff --git a/run.sh b/run.sh index c200fdb..d38753d 100755 --- a/run.sh +++ b/run.sh @@ -1,6 +1,2 @@ #!/bin/sh -mkdir -p build -cd build -cmake .. -make -./main +make out.jpg diff --git a/src/main.cu b/src/main.cu index c232729..ca8162e 100644 --- a/src/main.cu +++ b/src/main.cu @@ -3,80 +3,95 @@ #include #include +#include "vec3.cpp" +#include "ray.cpp" + #define checkCudaErrors(val) check_cuda((val), #val, __FILE__, __LINE__) void check_cuda(cudaError_t result, const char *func, const char *file, int line) { - if (result) { - std::cout << "CUDA error: " << cudaGetErrorString(result) << " (error code " << static_cast(result) << ")"; - std::cout << " at " << file << ":" << line << " '" << func << "' \n"; - cudaDeviceReset(); - exit(-1); - } + if (result) { + std::cout << "CUDA error: " << cudaGetErrorString(result) << " (error code " << static_cast(result) << ")"; + std::cout << " at " << file << ":" << line << " '" << func << "' \n"; + cudaDeviceReset(); + exit(-1); + } } -__global__ void render(float *fb, int max_x, int max_y) { +__device__ vec3 color(const ray& r) { + vec3 unit_direction = unit_vector(r.direction()); + float t = 0.5f * (unit_direction.y() + 1.0f); + return (1.0f - t)*vec3(1.0,1.0,1.0) + t*vec3(1.0, 0.0, 0.0); +} + +__global__ void render(vec3 *fb, int max_x, int max_y, vec3 lower_left_corner, vec3 horizontal, vec3 vertical, vec3 origin) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if ((x >= max_x) || (y >= max_y)) return; - int pixel_idx = 3*(y*max_x + x); + int pixel_idx = y*max_x + x; + float u = float(x) / max_x; + float v = float(y) / max_y; - fb[pixel_idx + 0] = float(x) / max_x; - fb[pixel_idx + 1] = float(y) / max_y; - fb[pixel_idx + 2] = 0.2; + ray r(origin, lower_left_corner + u*horizontal + v*vertical); + fb[pixel_idx] = color(r); } int main() { - const char *image_filename = "image.ppm"; - int nx = 1200; - int ny = 600; - int tx = 8; - int ty = 8; + const char *image_filename = "out.ppm"; + int nx = 1200; + int ny = 600; + int tx = 8; + int ty = 8; - int num_pixels = nx*ny; - size_t fb_size = 3*num_pixels*sizeof(float); + std::cout << "Rendering a " << nx << "x" << ny << " image "; + std::cout << "in " << tx << "x" << ty << " blocks.\n"; - float *fb; - checkCudaErrors(cudaMallocManaged(&fb, fb_size)); + int num_pixels = nx*ny; + size_t fb_size = num_pixels*sizeof(vec3); - clock_t start = clock(); - { - dim3 blocks(nx/tx+1, ny/ty+1); - dim3 threads(tx, ty); + vec3 *fb; + checkCudaErrors(cudaMallocManaged(&fb, fb_size)); - render<<>>(fb, nx, ny); - checkCudaErrors(cudaGetLastError()); - checkCudaErrors(cudaDeviceSynchronize()); - } - clock_t stop = clock(); - double timer_seconds = ((double)(stop - start)) / CLOCKS_PER_SEC; - std::cout << "took " << timer_seconds << " seconds.\n"; + clock_t start = clock(); + { + dim3 blocks(nx/tx+1, ny/ty+1); + dim3 threads(tx, ty); - FILE *f = fopen(image_filename, "w"); - assert(f); + render<<>>(fb, nx, ny, + vec3(-2.0, -1.0, -1.0), + vec3(4.0, 0.0, 0.0), + vec3(0.0, 2.0, 0.0), + vec3(0.0, 0.0, 0.0)); + checkCudaErrors(cudaGetLastError()); + checkCudaErrors(cudaDeviceSynchronize()); + } + clock_t stop = clock(); + double timer_seconds = ((double)(stop - start)) / CLOCKS_PER_SEC; + std::cout << "took " << timer_seconds << " seconds.\n"; - char header[128] = { 0 }; - int header_size = snprintf(header, sizeof(header), "P3\n%d %d\n255\n", nx, ny); - fwrite(header, header_size, 1, f); + FILE *f = fopen(image_filename, "w"); + assert(f); - for (int y=ny-1; y >= 0; y--) { - for (int x=0; x < nx; x++) { - size_t pixel_idx = 3*(y*nx + x); - float r = fb[pixel_idx + 0]; - float g = fb[pixel_idx + 1]; - float b = fb[pixel_idx + 2]; - int ir = int(255.99*r); - int ig = int(255.99*g); - int ib = int(255.99*b); + char header[128] = { 0 }; + int header_size = snprintf(header, sizeof(header), "P3\n%d %d\n255\n", nx, ny); + fwrite(header, header_size, 1, f); - char pixel_str[128] = { 0 }; - int str_size = snprintf(pixel_str, sizeof(pixel_str), "%d %d %d\n", ir, ig, ib); - fwrite(pixel_str, str_size, 1, f); - } - } + for (int y=ny-1; y >= 0; y--) { + for (int x=0; x < nx; x++) { + size_t pixel_idx = (y*nx + x); + vec3 pixel = fb[pixel_idx]; + int ir = int(255.99*pixel.r()); + int ig = int(255.99*pixel.g()); + int ib = int(255.99*pixel.b()); - fclose(f); + char pixel_str[128] = { 0 }; + int str_size = snprintf(pixel_str, sizeof(pixel_str), "%d %d %d\n", ir, ig, ib); + fwrite(pixel_str, str_size, 1, f); + } + } - checkCudaErrors(cudaFree(fb)); + fclose(f); - return 0; + checkCudaErrors(cudaFree(fb)); + + return 0; } diff --git a/src/ray.cpp b/src/ray.cpp new file mode 100644 index 0000000..d3464a0 --- /dev/null +++ b/src/ray.cpp @@ -0,0 +1,14 @@ +#include "vec3.cpp" + +class ray +{ + public: + __device__ ray() {} + __device__ ray(const vec3& a, const vec3& b) { A = a; B = b; } + __device__ vec3 origin() const { return A; } + __device__ vec3 direction() const { return B; } + __device__ vec3 point_at_parameter(float t) const { return A + t*B; } + + vec3 A; + vec3 B; +}; diff --git a/src/vec3.cpp b/src/vec3.cpp new file mode 100644 index 0000000..840af54 --- /dev/null +++ b/src/vec3.cpp @@ -0,0 +1,142 @@ +#pragma once + +#include "cuda_runtime.h" +#include +#include +#include + +class vec3 { + +public: + __host__ __device__ vec3() {} + __host__ __device__ vec3(float e0, float e1, float e2) { e[0] = e0; e[1] = e1; e[2] = e2; } + __host__ __device__ inline float x() const { return e[0]; } + __host__ __device__ inline float y() const { return e[1]; } + __host__ __device__ inline float z() const { return e[2]; } + __host__ __device__ inline float r() const { return e[0]; } + __host__ __device__ inline float g() const { return e[1]; } + __host__ __device__ inline float b() const { return e[2]; } + + __host__ __device__ inline const vec3& operator+() const { return *this; } + __host__ __device__ inline vec3 operator-() const { return vec3(-e[0], -e[1], -e[2]); } + __host__ __device__ inline float operator[](int i) const { return e[i]; } + __host__ __device__ inline float& operator[](int i) { return e[i]; }; + + __host__ __device__ inline vec3& operator+=(const vec3 &v2); + __host__ __device__ inline vec3& operator-=(const vec3 &v2); + __host__ __device__ inline vec3& operator*=(const vec3 &v2); + __host__ __device__ inline vec3& operator/=(const vec3 &v2); + __host__ __device__ inline vec3& operator*=(const float t); + __host__ __device__ inline vec3& operator/=(const float t); + + __host__ __device__ inline float length() const { return sqrt(e[0]*e[0] + e[1]*e[1] + e[2]*e[2]); } + __host__ __device__ inline float squared_length() const { return e[0]*e[0] + e[1]*e[1] + e[2]*e[2]; } + __host__ __device__ inline void make_unit_vector(); + + + float e[3]; +}; + + + +inline std::istream& operator>>(std::istream &is, vec3 &t) { + is >> t.e[0] >> t.e[1] >> t.e[2]; + return is; +} + +inline std::ostream& operator<<(std::ostream &os, const vec3 &t) { + os << t.e[0] << " " << t.e[1] << " " << t.e[2]; + return os; +} + +__host__ __device__ inline void vec3::make_unit_vector() { + float k = 1.0 / sqrt(e[0]*e[0] + e[1]*e[1] + e[2]*e[2]); + e[0] *= k; e[1] *= k; e[2] *= k; +} + +__host__ __device__ inline vec3 operator+(const vec3 &v1, const vec3 &v2) { + return vec3(v1.e[0] + v2.e[0], v1.e[1] + v2.e[1], v1.e[2] + v2.e[2]); +} + +__host__ __device__ inline vec3 operator-(const vec3 &v1, const vec3 &v2) { + return vec3(v1.e[0] - v2.e[0], v1.e[1] - v2.e[1], v1.e[2] - v2.e[2]); +} + +__host__ __device__ inline vec3 operator*(const vec3 &v1, const vec3 &v2) { + return vec3(v1.e[0] * v2.e[0], v1.e[1] * v2.e[1], v1.e[2] * v2.e[2]); +} + +__host__ __device__ inline vec3 operator/(const vec3 &v1, const vec3 &v2) { + return vec3(v1.e[0] / v2.e[0], v1.e[1] / v2.e[1], v1.e[2] / v2.e[2]); +} + +__host__ __device__ inline vec3 operator*(float t, const vec3 &v) { + return vec3(t*v.e[0], t*v.e[1], t*v.e[2]); +} + +__host__ __device__ inline vec3 operator/(vec3 v, float t) { + return vec3(v.e[0]/t, v.e[1]/t, v.e[2]/t); +} + +__host__ __device__ inline vec3 operator*(const vec3 &v, float t) { + return vec3(t*v.e[0], t*v.e[1], t*v.e[2]); +} + +__host__ __device__ inline float dot(const vec3 &v1, const vec3 &v2) { + return v1.e[0] *v2.e[0] + v1.e[1] *v2.e[1] + v1.e[2] *v2.e[2]; +} + +__host__ __device__ inline vec3 cross(const vec3 &v1, const vec3 &v2) { + return vec3( (v1.e[1]*v2.e[2] - v1.e[2]*v2.e[1]), + (-(v1.e[0]*v2.e[2] - v1.e[2]*v2.e[0])), + (v1.e[0]*v2.e[1] - v1.e[1]*v2.e[0])); +} + + +__host__ __device__ inline vec3& vec3::operator+=(const vec3 &v){ + e[0] += v.e[0]; + e[1] += v.e[1]; + e[2] += v.e[2]; + return *this; +} + +__host__ __device__ inline vec3& vec3::operator*=(const vec3 &v){ + e[0] *= v.e[0]; + e[1] *= v.e[1]; + e[2] *= v.e[2]; + return *this; +} + +__host__ __device__ inline vec3& vec3::operator/=(const vec3 &v){ + e[0] /= v.e[0]; + e[1] /= v.e[1]; + e[2] /= v.e[2]; + return *this; +} + +__host__ __device__ inline vec3& vec3::operator-=(const vec3& v) { + e[0] -= v.e[0]; + e[1] -= v.e[1]; + e[2] -= v.e[2]; + return *this; +} + +__host__ __device__ inline vec3& vec3::operator*=(const float t) { + e[0] *= t; + e[1] *= t; + e[2] *= t; + return *this; +} + +__host__ __device__ inline vec3& vec3::operator/=(const float t) { + float k = 1.0/t; + + e[0] *= k; + e[1] *= k; + e[2] *= k; + return *this; +} + +__host__ __device__ inline vec3 unit_vector(vec3 v) { + return v / v.length(); +}