test SIMD raytracing
This commit is contained in:
parent
2c98040c42
commit
583cd9b417
|
@ -3,15 +3,19 @@
|
|||
#include <stdlib.h>
|
||||
#include <defocus/defocus.h>
|
||||
|
||||
#include <defocus/camera.h>
|
||||
|
||||
int pinhole_fn(int argc, char **argv);
|
||||
int thin_lense_fn(int argc, char **argv);
|
||||
int test_fn(int argc, char **argv);
|
||||
|
||||
static const char *_model_names[] = {"pinhole", "thin_lense"};
|
||||
static const char *_model_names[] = {"pinhole", "thin_lense", "test"};
|
||||
typedef int (*model_fn)(int argc, char **argv);
|
||||
|
||||
static model_fn _model_fns[] = {
|
||||
pinhole_fn,
|
||||
thin_lense_fn,
|
||||
test_fn,
|
||||
};
|
||||
|
||||
void usage(const char *pname)
|
||||
|
@ -25,7 +29,6 @@ void usage(const char *pname)
|
|||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Missing model name!\n");
|
||||
usage(argv[0]);
|
||||
|
@ -218,4 +221,22 @@ int thin_lense_fn(int argc, char **argv)
|
|||
df_release_image(out_image);
|
||||
|
||||
return error_code;
|
||||
}
|
||||
|
||||
int test_fn( int argc, char **argv ) {
|
||||
|
||||
df_v3 v = {1.f, 2.f, 3.f};
|
||||
df_m4 t = df_translate(-1.f, 2.f, 1.5f);
|
||||
t = df_inverse_transform(t);
|
||||
|
||||
df_v3 tv = df_transform_v3(t, v);
|
||||
|
||||
df_camera_i cam = df_create_perspective_camera(1024.0, 1024.0, 1024.0, 1024.0, 2.e-4f, 1000.f, 350.0, 21.0);
|
||||
df_ray_packet packet = cam.build_ray_packet(cam.o);
|
||||
df_evaluate_ray_packet(&packet);
|
||||
|
||||
df_release_ray_packet(&packet);
|
||||
|
||||
cam.release(cam.o);
|
||||
return 0;
|
||||
}
|
|
@ -38,6 +38,20 @@ typedef enum
|
|||
/** @brief Zero an array */
|
||||
#define DF_ZERO_ARRAY(a, n) DF_ZERO_MEMORY(a, sizeof((a)[0]) * (n))
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef __cplusplus__
|
||||
#define DF_API extern "C" __declspec(dllexport)
|
||||
#else
|
||||
#define DF_API __declspec(dllexport)
|
||||
#endif /* _MSC_VER && __cplusplus__ */
|
||||
#else
|
||||
#ifdef __cplusplus__
|
||||
#define DF_API extern "C"
|
||||
#else
|
||||
#define DF_API
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Simple logging function */
|
||||
|
||||
#ifndef DF_ENABLE_LOGGING
|
||||
|
@ -69,7 +83,7 @@ enum
|
|||
* @param line line number of the log location
|
||||
* @param fmt format string of the message.
|
||||
*/
|
||||
void df_log_impl(int level, const char *file, int line, const char *fmt, ...);
|
||||
DF_API void df_log_impl(int level, const char *file, int line, const char *fmt, ...);
|
||||
|
||||
#define df_log_verbose(...) df_log_impl(df_log_level_verbose, __FILE__, __LINE__, __VA_ARGS__)
|
||||
#define df_log_info(...) df_log_impl(df_log_level_info, __FILE__, __LINE__, __VA_ARGS__)
|
||||
|
@ -112,7 +126,7 @@ typedef union {
|
|||
* @param t interpolation factor
|
||||
* @return a + (b - a) * t
|
||||
*/
|
||||
df_color df_lerp_color(df_color a, df_color b, double t);
|
||||
DF_API df_color df_lerp_color(df_color a, df_color b, double t);
|
||||
|
||||
/** @brief 2d vector */
|
||||
typedef union {
|
||||
|
@ -130,22 +144,22 @@ typedef union {
|
|||
} df_v2;
|
||||
|
||||
/** @brief Add two 2d vectors */
|
||||
df_v2 df_add_v2(df_v2 a, df_v2 b);
|
||||
DF_API df_v2 df_add_v2(df_v2 a, df_v2 b);
|
||||
|
||||
/** @brief Subtract two 2d vectors */
|
||||
df_v2 df_sub_v2(df_v2 a, df_v2 b);
|
||||
DF_API df_v2 df_sub_v2(df_v2 a, df_v2 b);
|
||||
|
||||
/** @brief Calculate the dot product of 2d vectors a and b.
|
||||
*/
|
||||
float df_dot_v2(df_v2 a, df_v2 b);
|
||||
DF_API float df_dot_v2(df_v2 a, df_v2 b);
|
||||
|
||||
/** @brief Multiply a 2d vector with a scalar.
|
||||
*/
|
||||
df_v2 df_mul_v2(float t, df_v2 v);
|
||||
DF_API df_v2 df_mul_v2(float t, df_v2 v);
|
||||
|
||||
/** @brief Returns the normalized version of a 2d vector v
|
||||
*/
|
||||
df_v2 df_normalize_v2(df_v2 v);
|
||||
DF_API df_v2 df_normalize_v2(df_v2 v);
|
||||
|
||||
/** @brief 3d vector */
|
||||
typedef union {
|
||||
|
@ -159,22 +173,22 @@ typedef union {
|
|||
} df_v3;
|
||||
|
||||
/** @brief Add two 3d vectors */
|
||||
df_v3 df_add_v3(df_v3 a, df_v3 b);
|
||||
DF_API df_v3 df_add_v3(df_v3 a, df_v3 b);
|
||||
|
||||
/** @brief Subtract two 3d vectors */
|
||||
df_v3 df_sub_v3(df_v3 a, df_v3 b);
|
||||
DF_API df_v3 df_sub_v3(df_v3 a, df_v3 b);
|
||||
|
||||
/** @brief Calculate the dot product of 3d vectors a and b.
|
||||
*/
|
||||
float df_dot_v3(df_v3 a, df_v3 b);
|
||||
DF_API float df_dot_v3(df_v3 a, df_v3 b);
|
||||
|
||||
/** @brief Multiply a 3d vector with a scalar.
|
||||
*/
|
||||
df_v3 df_mul_v3(float t, df_v3 v);
|
||||
DF_API df_v3 df_mul_v3(float t, df_v3 v);
|
||||
|
||||
/** @brief Returns the normalized version of a 3d vector v
|
||||
*/
|
||||
df_v3 df_normalize_v3(df_v3 v);
|
||||
DF_API df_v3 df_normalize_v3(df_v3 v);
|
||||
|
||||
/** @brief A plane in 3d space.
|
||||
*
|
||||
|
@ -221,7 +235,7 @@ typedef struct
|
|||
} df_line_plane_intersection;
|
||||
|
||||
/** @brief Calculate the intersection between a line and a plane in 3d space */
|
||||
df_line_plane_intersection df_calc_line_plane_intersection(df_line line, df_plane plane);
|
||||
DF_API df_line_plane_intersection df_calc_line_plane_intersection(df_line line, df_plane plane);
|
||||
|
||||
/** @brief A 4x4 matrix.
|
||||
*
|
||||
|
@ -239,22 +253,26 @@ typedef struct df_m4
|
|||
#define DF_M4_AT(m, row, col) ((m).e[(row)*4 + (col)])
|
||||
|
||||
/** @brief Matrix multiply 4x4 matrix a and b */
|
||||
df_m4 df_mul_m4(df_m4 a, df_m4 b);
|
||||
DF_API df_m4 df_mul_m4(df_m4 a, df_m4 b);
|
||||
|
||||
/** @brief Get a scale matrix */
|
||||
df_m4 df_scale(float x, float y, float z);
|
||||
DF_API df_m4 df_scale(float x, float y, float z);
|
||||
|
||||
df_m4 df_translate(float x, float y, float z);
|
||||
DF_API df_m4 df_translate(float x, float y, float z);
|
||||
|
||||
/** @brief Transform (i.e. multiply) a 3d vector v by the transformation matrix T */
|
||||
df_v3 df_transform_v3(df_m4 T, df_v3 v);
|
||||
DF_API df_v3 df_transform_v3(df_m4 T, df_v3 v);
|
||||
|
||||
/** @brief Calculate the inverse of a non-scaling transform matrix.
|
||||
*
|
||||
* Special fast case.
|
||||
*/
|
||||
df_m4 df_inverse_transform_no_scale(df_m4 M);
|
||||
DF_API df_m4 df_inverse_transform_no_scale(df_m4 M);
|
||||
|
||||
/** @brief Calculate the inverse of a transform matrix */
|
||||
df_m4 df_inverse_transform(df_m4 M);
|
||||
DF_API df_m4 df_inverse_transform(df_m4 M);
|
||||
|
||||
/** @brief Calculate the inverse of a general (invertible) 4x4 matrix */
|
||||
DF_API df_m4 df_inverse(df_m4 M);
|
||||
|
||||
#endif
|
|
@ -2,34 +2,12 @@
|
|||
#define DEFOCUS_CAMERA_H
|
||||
|
||||
#include "base.h"
|
||||
#include "raytracing.h"
|
||||
|
||||
/** @file camera.h
|
||||
* @brief basic camera functions
|
||||
*/
|
||||
|
||||
/** @brief Stores information for rays in a SIMD friendly way */
|
||||
typedef struct
|
||||
{
|
||||
/** Packs all SIMD data into a single buffer */
|
||||
float *simd_mem;
|
||||
|
||||
/** Source uvs for rays */
|
||||
df_v2 *ray_uvs;
|
||||
|
||||
/* Points into simd_mem */
|
||||
float *base_x;
|
||||
float *base_y;
|
||||
float *base_z;
|
||||
float *dir_x;
|
||||
float *dir_y;
|
||||
float *dir_z;
|
||||
|
||||
size_t ray_count;
|
||||
} df_ray_packet;
|
||||
|
||||
/** @brief Free ray packet memory */
|
||||
void df_release_ray_packet(df_ray_packet *rays);
|
||||
|
||||
/** @brief Interface for cameras. */
|
||||
typedef struct
|
||||
{
|
||||
|
@ -44,7 +22,7 @@ typedef struct
|
|||
void *o;
|
||||
} df_camera_i;
|
||||
|
||||
df_camera_i
|
||||
df_create_perspective_camera(float image_width, float image_height, float raster_width, float raster_height);
|
||||
DF_API df_camera_i df_create_perspective_camera(
|
||||
float image_width, float image_height, float raster_width, float raster_height, float far_dist, float near_dist, float focal_dist, float lens_radius);
|
||||
|
||||
#endif
|
|
@ -18,7 +18,7 @@ typedef struct df_image df_image;
|
|||
* @param out_image receives the image object
|
||||
* @return error code
|
||||
*/
|
||||
df_result df_create_image(int w, int h, df_image **out_image);
|
||||
DF_API df_result df_create_image(int w, int h, df_image **out_image);
|
||||
|
||||
/** @brief load an image file
|
||||
*
|
||||
|
@ -29,34 +29,34 @@ df_result df_create_image(int w, int h, df_image **out_image);
|
|||
* @param out_image receives the image object
|
||||
* @return error code
|
||||
*/
|
||||
df_result df_load_image(const char *path, int *out_w, int *out_h, df_image **out_image);
|
||||
DF_API df_result df_load_image(const char *path, int *out_w, int *out_h, df_image **out_image);
|
||||
|
||||
/** @brief Write an image to a PNG file
|
||||
* @param img the image
|
||||
* @param path the path
|
||||
*/
|
||||
df_result df_write_image(df_image *img, const char *path);
|
||||
DF_API df_result df_write_image(df_image *img, const char *path);
|
||||
|
||||
/** @brief Free an image.
|
||||
*
|
||||
* Any pointer to the image will be invalid after this.
|
||||
* @param img the image
|
||||
*/
|
||||
void df_release_image(df_image *img);
|
||||
DF_API void df_release_image(df_image *img);
|
||||
|
||||
/** @brief Returns the dimensions of the image */
|
||||
void df_get_image_size(const df_image *image, int *w, int *h);
|
||||
DF_API void df_get_image_size(const df_image *image, int *w, int *h);
|
||||
|
||||
/** @brief Returns the color value at pixel coordinates x, y
|
||||
*
|
||||
* Returns black for coordinates outside the image.
|
||||
*/
|
||||
df_color df_get_image_pixel(const df_image *image, int x, int y);
|
||||
DF_API df_color df_get_image_pixel(const df_image *image, int x, int y);
|
||||
|
||||
/** @brief Set the color value at pixel coordinates x, y.
|
||||
*
|
||||
* Does nothing for coordinates outside the image.
|
||||
*/
|
||||
void df_set_image_pixel(df_image *image, int x, int y, df_color c);
|
||||
DF_API void df_set_image_pixel(df_image *image, int x, int y, df_color c);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -44,7 +44,7 @@ typedef struct df_pinhole_params
|
|||
* @param in_image input image.
|
||||
* @param out_image the output image.
|
||||
*/
|
||||
void df_pinhole(df_pinhole_params params, const df_image *in_image, df_image *out_image);
|
||||
DF_API void df_pinhole(df_pinhole_params params, const df_image *in_image, df_image *out_image);
|
||||
|
||||
/** Parameters for the thin lense model.
|
||||
*/
|
||||
|
@ -72,6 +72,6 @@ typedef struct df_thin_lense_params
|
|||
* @param in_image input image
|
||||
* @param out_image the output image.
|
||||
*/
|
||||
void df_thin_lense(df_thin_lense_params params, const df_image *in_image, df_image *out_image);
|
||||
DF_API void df_thin_lense(df_thin_lense_params params, const df_image *in_image, df_image *out_image);
|
||||
|
||||
#endif
|
||||
|
|
29
include/defocus/raytracing.h
Normal file
29
include/defocus/raytracing.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
#ifndef DF_RAYTRACING_H
|
||||
#define DF_RAYTRACING_H
|
||||
|
||||
/** @brief Stores information for rays in a SIMD friendly way */
|
||||
typedef struct
|
||||
{
|
||||
/** Packs all SIMD data into a single buffer */
|
||||
float *simd_mem;
|
||||
|
||||
/** Source uvs for rays */
|
||||
df_v2 *ray_uvs;
|
||||
|
||||
/* Points into simd_mem */
|
||||
float *base_x;
|
||||
float *base_y;
|
||||
float *base_z;
|
||||
float *dir_x;
|
||||
float *dir_y;
|
||||
float *dir_z;
|
||||
|
||||
size_t ray_count;
|
||||
} df_ray_packet;
|
||||
|
||||
/** @brief Free ray packet memory */
|
||||
DF_API void df_release_ray_packet(df_ray_packet *rays);
|
||||
|
||||
DF_API void df_evaluate_ray_packet(const df_ray_packet *rays);
|
||||
|
||||
#endif
|
109
lib/camera.c
109
lib/camera.c
|
@ -1,6 +1,12 @@
|
|||
#include <defocus/camera.h>
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
/* ********************************************************
|
||||
*
|
||||
|
@ -8,7 +14,7 @@
|
|||
*
|
||||
* ********************************************************/
|
||||
|
||||
void df_release_ray_packet(df_ray_packet *rays)
|
||||
DF_API void df_release_ray_packet(df_ray_packet *rays)
|
||||
{
|
||||
free(rays->simd_mem);
|
||||
free(rays->ray_uvs);
|
||||
|
@ -26,18 +32,95 @@ typedef struct
|
|||
{
|
||||
float focal_dist;
|
||||
float lens_radius;
|
||||
|
||||
/* Raster space, i.e. the space for which we generate rays */
|
||||
float raster_width;
|
||||
float raster_height;
|
||||
|
||||
/* Image(=screen) space, i.e. the space in which we store pixels */
|
||||
float image_width;
|
||||
float image_height;
|
||||
|
||||
df_m4 screen_to_raster;
|
||||
df_m4 raster_to_screen;
|
||||
df_m4 raster_to_camera;
|
||||
|
||||
} df_perspective_camera;
|
||||
|
||||
static void pc_release(void *o) { df_perspective_camera *camera = o; }
|
||||
|
||||
static df_ray_packet pc_build_ray_packet(void *o) { df_perspective_camera *camera = o; }
|
||||
static df_ray_packet pc_build_ray_packet(void *o)
|
||||
{
|
||||
df_perspective_camera *camera = o;
|
||||
df_ray_packet packet;
|
||||
memset(&packet, 0, sizeof(packet));
|
||||
|
||||
df_camera_i df_create_perspective_camera(float image_width, float image_height, float raster_width, float raster_height)
|
||||
/* Generate 1 ray per pixel (ignore lens for now) */
|
||||
size_t count = (size_t)camera->raster_width * (size_t)camera->raster_height;
|
||||
|
||||
size_t alloc_count = count;
|
||||
/* Round up to nearest multiple of 4, for SSE.
|
||||
* Also satisfies 16 byte alignment (assuming simd_mem is 16 byte aligned)
|
||||
* because 4 * sizeof(float) = 16.
|
||||
*/
|
||||
if ((alloc_count % 4) != 0) {
|
||||
alloc_count = ((alloc_count + 3) / 4) * 4;
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
packet.simd_mem = _aligned_malloc(sizeof(float) * alloc_count * 6, 16);
|
||||
#elif defined(_ISOC11_SOURCE) /* Feature test macro for GCC */
|
||||
packet.simd_mem = aligned_alloc(16, sizeof(float) * alloc_count * 6);
|
||||
#else
|
||||
/* Fall back to regular malloc and hope for the best */
|
||||
packet.simd_mem = malloc(sizeof(float) * alloc_count * 6);
|
||||
#endif
|
||||
|
||||
packet.base_x = packet.simd_mem;
|
||||
packet.base_y = packet.base_x + alloc_count;
|
||||
packet.base_z = packet.base_y + alloc_count;
|
||||
packet.dir_x = packet.base_z + alloc_count;
|
||||
packet.dir_y = packet.dir_x + alloc_count;
|
||||
packet.dir_z = packet.dir_y + alloc_count;
|
||||
|
||||
packet.ray_uvs = malloc(sizeof(df_v2) * count);
|
||||
|
||||
packet.ray_count = count;
|
||||
|
||||
size_t i = 0;
|
||||
for (float y = 0; y < camera->raster_height; y += 1.f) {
|
||||
for (float x = 0; x < camera->raster_width; x += 1.f) {
|
||||
packet.base_x[i] = 0.f;
|
||||
packet.base_y[i] = 0.f;
|
||||
packet.base_z[i] = 0.f;
|
||||
|
||||
df_v3 raster_p = {x, y, 0.f};
|
||||
df_v3 camera_p = df_transform_v3(camera->raster_to_camera, raster_p);
|
||||
df_v3 dir = df_normalize_v3(camera_p);
|
||||
|
||||
packet.dir_x[i] = dir.x;
|
||||
packet.dir_y[i] = dir.y;
|
||||
packet.dir_z[i] = dir.z;
|
||||
|
||||
df_v3 img_p = df_transform_v3(camera->raster_to_screen, raster_p);
|
||||
packet.ray_uvs[i].u = img_p.x / camera->image_width;
|
||||
packet.ray_uvs[i].v = img_p.y / camera->image_height;
|
||||
|
||||
++i;
|
||||
assert(i <= count);
|
||||
}
|
||||
}
|
||||
|
||||
return packet;
|
||||
}
|
||||
|
||||
DF_API df_camera_i df_create_perspective_camera(float image_width,
|
||||
float image_height,
|
||||
float raster_width,
|
||||
float raster_height,
|
||||
float far_dist,
|
||||
float near_dist,
|
||||
float focal_dist,
|
||||
float lens_radius)
|
||||
{
|
||||
df_perspective_camera *camera = malloc(sizeof(*camera));
|
||||
|
||||
|
@ -45,9 +128,25 @@ df_camera_i df_create_perspective_camera(float image_width, float image_height,
|
|||
camera->screen_to_raster =
|
||||
df_mul_m4(camera->screen_to_raster, df_scale(1.f / image_width, -1.f / image_height, 1.f));
|
||||
camera->screen_to_raster = df_mul_m4(camera->screen_to_raster, df_translate(0.f, -image_height, 0.f));
|
||||
|
||||
camera->raster_to_screen = df_inverse_transform(camera->screen_to_raster);
|
||||
|
||||
/* Perspective projection matrix */
|
||||
df_m4 persp = {0.f};
|
||||
DF_M4_AT(persp, 0, 0) = 1.f;
|
||||
DF_M4_AT(persp, 1, 1) = 1.f;
|
||||
DF_M4_AT(persp, 2, 2) = far_dist / (far_dist - near_dist);
|
||||
DF_M4_AT(persp, 2, 3) = -1.f * far_dist * near_dist / (far_dist - near_dist);
|
||||
DF_M4_AT(persp, 3, 2) = 1.f;
|
||||
|
||||
camera->raster_to_camera = df_mul_m4(df_inverse(persp), camera->raster_to_screen);
|
||||
|
||||
camera->focal_dist = focal_dist;
|
||||
camera->lens_radius = lens_radius;
|
||||
camera->raster_width = raster_width;
|
||||
camera->raster_height = raster_height;
|
||||
camera->image_width = image_width;
|
||||
camera->image_height = image_height;
|
||||
|
||||
df_camera_i iface = {.release = pc_release, .build_ray_packet = pc_build_ray_packet, .o = camera};
|
||||
return iface;
|
||||
}
|
||||
|
|
14
lib/image.c
14
lib/image.c
|
@ -17,7 +17,7 @@ struct df_image
|
|||
uint8_t *pixels;
|
||||
};
|
||||
|
||||
df_result df_create_image(int w, int h, df_image **out_image)
|
||||
DF_API df_result df_create_image(int w, int h, df_image **out_image)
|
||||
{
|
||||
df_image *img = malloc(sizeof(df_image));
|
||||
if (!img)
|
||||
|
@ -37,7 +37,7 @@ df_result df_create_image(int w, int h, df_image **out_image)
|
|||
return df_result_success;
|
||||
}
|
||||
|
||||
df_result df_load_image(const char *path, int *out_w, int *out_h, df_image **out_image)
|
||||
DF_API df_result df_load_image(const char *path, int *out_w, int *out_h, df_image **out_image)
|
||||
{
|
||||
int w, h, c;
|
||||
stbi_uc *pixels = stbi_load(path, &w, &h, &c, 4);
|
||||
|
@ -69,7 +69,7 @@ out:
|
|||
return res;
|
||||
}
|
||||
|
||||
void df_release_image(df_image *img)
|
||||
DF_API void df_release_image(df_image *img)
|
||||
{
|
||||
if (img) {
|
||||
free(img->pixels);
|
||||
|
@ -77,7 +77,7 @@ void df_release_image(df_image *img)
|
|||
}
|
||||
}
|
||||
|
||||
df_result df_write_image(df_image *image, const char *path)
|
||||
DF_API df_result df_write_image(df_image *image, const char *path)
|
||||
{
|
||||
df_result res = stbi_write_png(path, image->width, image->height, 4, image->pixels, image->width * 4)
|
||||
? df_result_success
|
||||
|
@ -85,7 +85,7 @@ df_result df_write_image(df_image *image, const char *path)
|
|||
return res;
|
||||
}
|
||||
|
||||
void df_get_image_size(const df_image *image, int *w, int *h)
|
||||
DF_API void df_get_image_size(const df_image *image, int *w, int *h)
|
||||
{
|
||||
if (w)
|
||||
*w = image->width;
|
||||
|
@ -93,7 +93,7 @@ void df_get_image_size(const df_image *image, int *w, int *h)
|
|||
*h = image->height;
|
||||
}
|
||||
|
||||
df_color df_get_image_pixel(const df_image *image, int x, int y)
|
||||
DF_API df_color df_get_image_pixel(const df_image *image, int x, int y)
|
||||
{
|
||||
df_color c = {0, 0, 0, 255};
|
||||
if (x >= 0 && x < image->width && y >= 0 && y < image->height) {
|
||||
|
@ -102,7 +102,7 @@ df_color df_get_image_pixel(const df_image *image, int x, int y)
|
|||
return c;
|
||||
}
|
||||
|
||||
void df_set_image_pixel(df_image *image, int x, int y, df_color c)
|
||||
DF_API void df_set_image_pixel(df_image *image, int x, int y, df_color c)
|
||||
{
|
||||
if (x >= 0 && x < image->width && y >= 0 && y < image->height) {
|
||||
memcpy(&image->pixels[4 * (y * image->width + x)], &c.e[0], 4);
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
static const char *log_level_names[] = {"VERBOSE", "INFO", "WARN", "ERROR"};
|
||||
|
||||
void df_log_impl(int level, const char *file, int line, const char *fmt, ...)
|
||||
DF_API void df_log_impl(int level, const char *file, int line, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
|
|
173
lib/math.c
173
lib/math.c
|
@ -5,27 +5,27 @@
|
|||
#include <immintrin.h>
|
||||
#include <pmmintrin.h>
|
||||
|
||||
df_v2 df_add_v2(df_v2 a, df_v2 b)
|
||||
DF_API df_v2 df_add_v2(df_v2 a, df_v2 b)
|
||||
{
|
||||
df_v2 v = {a.x + b.x, a.y + b.y};
|
||||
return v;
|
||||
}
|
||||
|
||||
df_v2 df_sub_v2(df_v2 a, df_v2 b)
|
||||
DF_API df_v2 df_sub_v2(df_v2 a, df_v2 b)
|
||||
{
|
||||
df_v2 v = {a.x - b.x, a.y - b.y};
|
||||
return v;
|
||||
}
|
||||
|
||||
float df_dot_v2(df_v2 a, df_v2 b) { return a.x * b.x + a.y * b.y; }
|
||||
DF_API float df_dot_v2(df_v2 a, df_v2 b) { return a.x * b.x + a.y * b.y; }
|
||||
|
||||
df_v2 df_mul_v2(float t, df_v2 v)
|
||||
DF_API df_v2 df_mul_v2(float t, df_v2 v)
|
||||
{
|
||||
df_v2 r = {t * v.x, t * v.y};
|
||||
return r;
|
||||
}
|
||||
|
||||
df_v2 df_normalize_v2(df_v2 v)
|
||||
DF_API df_v2 df_normalize_v2(df_v2 v)
|
||||
{
|
||||
float len_square = df_dot_v2(v, v);
|
||||
float len = sqrtf(len_square);
|
||||
|
@ -33,27 +33,27 @@ df_v2 df_normalize_v2(df_v2 v)
|
|||
return n;
|
||||
}
|
||||
|
||||
df_v3 df_add_v3(df_v3 a, df_v3 b)
|
||||
DF_API df_v3 df_add_v3(df_v3 a, df_v3 b)
|
||||
{
|
||||
df_v3 v = {a.x + b.x, a.y + b.y, a.z + b.z};
|
||||
return v;
|
||||
}
|
||||
|
||||
df_v3 df_sub_v3(df_v3 a, df_v3 b)
|
||||
DF_API df_v3 df_sub_v3(df_v3 a, df_v3 b)
|
||||
{
|
||||
df_v3 v = {a.x - b.x, a.y - b.y, a.z - b.z};
|
||||
return v;
|
||||
}
|
||||
|
||||
float df_dot_v3(df_v3 a, df_v3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
|
||||
DF_API float df_dot_v3(df_v3 a, df_v3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
|
||||
|
||||
df_v3 df_mul_v3(float t, df_v3 v)
|
||||
DF_API df_v3 df_mul_v3(float t, df_v3 v)
|
||||
{
|
||||
df_v3 r = {t * v.x, t * v.y, t * v.z};
|
||||
return r;
|
||||
}
|
||||
|
||||
df_v3 df_normalize_v3(df_v3 v)
|
||||
DF_API df_v3 df_normalize_v3(df_v3 v)
|
||||
{
|
||||
float len_square = df_dot_v3(v, v);
|
||||
float len = sqrtf(len_square);
|
||||
|
@ -61,7 +61,7 @@ df_v3 df_normalize_v3(df_v3 v)
|
|||
return n;
|
||||
}
|
||||
|
||||
df_line_plane_intersection df_calc_line_plane_intersection(df_line line, df_plane plane)
|
||||
DF_API df_line_plane_intersection df_calc_line_plane_intersection(df_line line, df_plane plane)
|
||||
{
|
||||
/* check case */
|
||||
float dot = df_dot_v3(line.direction, plane.normal);
|
||||
|
@ -90,7 +90,7 @@ df_line_plane_intersection df_calc_line_plane_intersection(df_line line, df_plan
|
|||
}
|
||||
}
|
||||
|
||||
df_m4 df_mul_m4(df_m4 a, df_m4 b)
|
||||
DF_API df_m4 df_mul_m4(df_m4 a, df_m4 b)
|
||||
{
|
||||
/* Super simple, we could probably do it a lot better via SIMD. */
|
||||
df_m4 p;
|
||||
|
@ -105,7 +105,7 @@ df_m4 df_mul_m4(df_m4 a, df_m4 b)
|
|||
return p;
|
||||
}
|
||||
|
||||
df_m4 df_scale(float x, float y, float z)
|
||||
DF_API df_m4 df_scale(float x, float y, float z)
|
||||
{
|
||||
/* clang-format off */
|
||||
df_m4 s = {{
|
||||
|
@ -118,7 +118,7 @@ df_m4 df_scale(float x, float y, float z)
|
|||
return s;
|
||||
}
|
||||
|
||||
df_m4 df_translate(float x, float y, float z)
|
||||
DF_API df_m4 df_translate(float x, float y, float z)
|
||||
{
|
||||
/* clang-format off */
|
||||
df_m4 t = {{
|
||||
|
@ -153,7 +153,7 @@ static float hsum(__m128 v)
|
|||
return _mm_cvtss_f32(sum);
|
||||
}
|
||||
|
||||
df_v3 df_transform_v3(df_m4 T, df_v3 v)
|
||||
DF_API df_v3 df_transform_v3(df_m4 T, df_v3 v)
|
||||
{
|
||||
df_v3 transf;
|
||||
_Alignas(16) float tmp_v[4] = {v.x, v.y, v.z, 1.f};
|
||||
|
@ -182,56 +182,145 @@ df_v3 df_transform_v3(df_m4 T, df_v3 v)
|
|||
* | R T |
|
||||
* | 0 1 |
|
||||
*/
|
||||
df_m4 df_inverse_transform_no_scale(df_m4 M)
|
||||
DF_API df_m4 df_inverse_transform_no_scale(df_m4 M)
|
||||
{
|
||||
df_m4 I = {0.f};
|
||||
|
||||
/* transpose 3x3, we know that m03 = m13 = m23 = 0 */
|
||||
/* transpose 3x3 */
|
||||
__m128 t0 = DF_VEC_SHUFFLE_0101(M.vec[0], M.vec[1]); /* 00, 01, 10, 11 */
|
||||
__m128 t1 = DF_VEC_SHUFFLE_2323(M.vec[0], M.vec[1]); /* 02, 03, 12, 13 */
|
||||
I.vec[0] = DF_VEC_SHUFFLE(t0, M.vec[2], 0, 2, 0, 3); /* 00, 01, 20, 23(=0) */
|
||||
I.vec[1] = DF_VEC_SHUFFLE(t0, M.vec[2], 1, 3, 1, 3); /* 01, 11, 21, 23(=0) */
|
||||
I.vec[2] = DF_VEC_SHUFFLE(t1, M.vec[2], 0, 2, 2, 3); /* 02, 12, 22, 23(=0) */
|
||||
|
||||
/* last */
|
||||
I.vec[3] = _mm_mul_ps(I.vec[0], DF_VEC_SWIZZLE1(M.vec[3], 0));
|
||||
I.vec[3] = _mm_add_ps(I.vec[3], _mm_mul_ps(I.vec[1], DF_VEC_SWIZZLE1(M.vec[3], 1)));
|
||||
I.vec[3] = _mm_add_ps(I.vec[3], _mm_mul_ps(I.vec[2], DF_VEC_SWIZZLE1(M.vec[3], 2)));
|
||||
I.vec[3] = _mm_sub_ps(_mm_setr_ps(0.f, 0.f, 0.f, 1.f), I.vec[3]);
|
||||
__m128 t = _mm_setr_ps(DF_M4_AT(M, 0, 3), DF_M4_AT(M, 1, 3), DF_M4_AT(M, 2, 3), 0.f);
|
||||
DF_M4_AT(I, 0, 3) = -1.f * hsum(_mm_mul_ps(I.vec[0], t));
|
||||
|
||||
DF_M4_AT(I, 1, 3) = -1.f * hsum(_mm_mul_ps(I.vec[1], t));
|
||||
DF_M4_AT(I, 2, 3) = -1.f * hsum(_mm_mul_ps(I.vec[2], t));
|
||||
I.vec[3] = _mm_setr_ps(0.f, 0.f, 0.f, 1.f);
|
||||
|
||||
return I;
|
||||
}
|
||||
|
||||
df_m4 df_inverse_transform(df_m4 M)
|
||||
DF_API df_m4 df_inverse_transform(df_m4 M)
|
||||
{
|
||||
#define SMALL_NUMBER (1.e-8f)
|
||||
|
||||
df_m4 I = {0.f};
|
||||
|
||||
/* transpose 3x3, we know that m03 = m13 = m23 = 0 */
|
||||
/* transpose 3x3 */
|
||||
__m128 t0 = DF_VEC_SHUFFLE_0101(M.vec[0], M.vec[1]); /* 00, 01, 10, 11 */
|
||||
__m128 t1 = DF_VEC_SHUFFLE_2323(M.vec[0], M.vec[1]); /* 02, 03, 12, 13 */
|
||||
I.vec[0] = DF_VEC_SHUFFLE(t0, M.vec[2], 0, 2, 0, 3); /* 00, 01, 20, 23(=0) */
|
||||
I.vec[1] = DF_VEC_SHUFFLE(t0, M.vec[2], 1, 3, 1, 3); /* 01, 11, 21, 23(=0) */
|
||||
I.vec[2] = DF_VEC_SHUFFLE(t1, M.vec[2], 0, 2, 2, 3); /* 02, 12, 22, 23(=0) */
|
||||
|
||||
/* divide by the squared scale */
|
||||
__m128 size_sqr = _mm_mul_ps(I.vec[0], I.vec[0]);
|
||||
size_sqr = _mm_add_ps(size_sqr, _mm_mul_ps(I.vec[1], I.vec[1]));
|
||||
size_sqr = _mm_add_ps(size_sqr, _mm_mul_ps(I.vec[2], I.vec[2]));
|
||||
__m128 t = _mm_setr_ps(DF_M4_AT(M, 0, 3), DF_M4_AT(M, 1, 3), DF_M4_AT(M, 2, 3), 0.f);
|
||||
DF_M4_AT(I, 0, 3) = -1.f * hsum(_mm_mul_ps(I.vec[0], t));
|
||||
|
||||
__m128 r_size_sqr = _mm_div_ps(_mm_set1_ps(1.f), size_sqr);
|
||||
|
||||
I.vec[0] = _mm_mul_ps(I.vec[0], r_size_sqr);
|
||||
I.vec[1] = _mm_mul_ps(I.vec[1], r_size_sqr);
|
||||
I.vec[2] = _mm_mul_ps(I.vec[2], r_size_sqr);
|
||||
|
||||
/* last */
|
||||
I.vec[3] = _mm_mul_ps(I.vec[0], DF_VEC_SWIZZLE1(M.vec[3], 0));
|
||||
I.vec[3] = _mm_add_ps(I.vec[3], _mm_mul_ps(I.vec[1], DF_VEC_SWIZZLE1(M.vec[3], 1)));
|
||||
I.vec[3] = _mm_add_ps(I.vec[3], _mm_mul_ps(I.vec[2], DF_VEC_SWIZZLE1(M.vec[3], 2)));
|
||||
I.vec[3] = _mm_sub_ps(_mm_setr_ps(0.f, 0.f, 0.f, 1.f), I.vec[3]);
|
||||
DF_M4_AT(I, 1, 3) = -1.f * hsum(_mm_mul_ps(I.vec[1], t));
|
||||
DF_M4_AT(I, 2, 3) = -1.f * hsum(_mm_mul_ps(I.vec[2], t));
|
||||
I.vec[3] = _mm_setr_ps(0.f, 0.f, 0.f, 1.f);
|
||||
|
||||
return I;
|
||||
#undef SMALL_NUMBER
|
||||
}
|
||||
|
||||
|
||||
|
||||
// for row major matrix
|
||||
// we use __m128 to represent 2x2 matrix as A = | A0 A1 |
|
||||
// | A2 A3 |
|
||||
// 2x2 row major Matrix multiply A*B
|
||||
static __forceinline __m128 mat2_mul(__m128 vec1, __m128 vec2)
|
||||
{
|
||||
return _mm_add_ps(_mm_mul_ps(vec1, DF_VEC_SWIZZLE(vec2, 0, 3, 0, 3)),
|
||||
_mm_mul_ps(DF_VEC_SWIZZLE(vec1, 1, 0, 3, 2), DF_VEC_SWIZZLE(vec2, 2, 1, 2, 1)));
|
||||
}
|
||||
// 2x2 row major Matrix adjugate multiply (A#)*B
|
||||
static __forceinline __m128 mat2_adj_mul(__m128 vec1, __m128 vec2)
|
||||
{
|
||||
return _mm_sub_ps(_mm_mul_ps(DF_VEC_SWIZZLE(vec1, 3, 3, 0, 0), vec2),
|
||||
_mm_mul_ps(DF_VEC_SWIZZLE(vec1, 1, 1, 2, 2), DF_VEC_SWIZZLE(vec2, 2, 3, 0, 1)));
|
||||
}
|
||||
// 2x2 row major Matrix multiply adjugate A*(B#)
|
||||
static __forceinline __m128 mat2_mul_adj(__m128 vec1, __m128 vec2)
|
||||
{
|
||||
return _mm_sub_ps(_mm_mul_ps(vec1, DF_VEC_SWIZZLE(vec2, 3, 0, 3, 0)),
|
||||
_mm_mul_ps(DF_VEC_SWIZZLE(vec1, 1, 0, 3, 2), DF_VEC_SWIZZLE(vec2, 2, 1, 2, 1)));
|
||||
}
|
||||
|
||||
DF_API df_m4 df_inverse( df_m4 M ) {
|
||||
// use block matrix method
|
||||
// A is a matrix, then i(A) or iA means inverse of A, A# (or A_ in code) means adjugate of A, |A| (or detA in code)
|
||||
// is determinant, tr(A) is trace
|
||||
|
||||
// sub matrices
|
||||
__m128 A = DF_VEC_SHUFFLE_0101(M.vec[0], M.vec[1]);
|
||||
__m128 B = DF_VEC_SHUFFLE_2323(M.vec[0], M.vec[1]);
|
||||
__m128 C = DF_VEC_SHUFFLE_0101(M.vec[2], M.vec[3]);
|
||||
__m128 D = DF_VEC_SHUFFLE_2323(M.vec[2], M.vec[3]);
|
||||
|
||||
#if 0
|
||||
__m128 detA = _mm_set1_ps(M.m[0][0] * M.m[1][1] - M.m[0][1] * M.m[1][0]);
|
||||
__m128 detB = _mm_set1_ps(M.m[0][2] * M.m[1][3] - M.m[0][3] * M.m[1][2]);
|
||||
__m128 detC = _mm_set1_ps(M.m[2][0] * M.m[3][1] - M.m[2][1] * M.m[3][0]);
|
||||
__m128 detD = _mm_set1_ps(M.m[2][2] * M.m[3][3] - M.m[2][3] * M.m[3][2]);
|
||||
#else
|
||||
// determinant as (|A| |B| |C| |D|)
|
||||
__m128 detSub = _mm_sub_ps(
|
||||
_mm_mul_ps(DF_VEC_SHUFFLE(M.vec[0], M.vec[2], 0, 2, 0, 2), DF_VEC_SHUFFLE(M.vec[1], M.vec[3], 1, 3, 1, 3)),
|
||||
_mm_mul_ps(DF_VEC_SHUFFLE(M.vec[0], M.vec[2], 1, 3, 1, 3), DF_VEC_SHUFFLE(M.vec[1], M.vec[3], 0, 2, 0, 2)));
|
||||
__m128 detA = DF_VEC_SWIZZLE1(detSub, 0);
|
||||
__m128 detB = DF_VEC_SWIZZLE1(detSub, 1);
|
||||
__m128 detC = DF_VEC_SWIZZLE1(detSub, 2);
|
||||
__m128 detD = DF_VEC_SWIZZLE1(detSub, 3);
|
||||
#endif
|
||||
|
||||
// let iM = 1/|M| * | X Y |
|
||||
// | Z W |
|
||||
|
||||
// D#C
|
||||
__m128 D_C = mat2_adj_mul(D, C);
|
||||
// A#B
|
||||
__m128 A_B = mat2_adj_mul(A, B);
|
||||
// X# = |D|A - B(D#C)
|
||||
__m128 X_ = _mm_sub_ps(_mm_mul_ps(detD, A), mat2_mul(B, D_C));
|
||||
// W# = |A|D - C(A#B)
|
||||
__m128 W_ = _mm_sub_ps(_mm_mul_ps(detA, D), mat2_mul(C, A_B));
|
||||
|
||||
// |M| = |A|*|D| + ... (continue later)
|
||||
__m128 detM = _mm_mul_ps(detA, detD);
|
||||
|
||||
// Y# = |B|C - D(A#B)#
|
||||
__m128 Y_ = _mm_sub_ps(_mm_mul_ps(detB, C), mat2_mul_adj(D, A_B));
|
||||
// Z# = |C|B - A(D#C)#
|
||||
__m128 Z_ = _mm_sub_ps(_mm_mul_ps(detC, B), mat2_mul_adj(A, D_C));
|
||||
|
||||
// |M| = |A|*|D| + |B|*|C| ... (continue later)
|
||||
detM = _mm_add_ps(detM, _mm_mul_ps(detB, detC));
|
||||
|
||||
// tr((A#B)(D#C))
|
||||
__m128 tr = _mm_mul_ps(A_B, DF_VEC_SWIZZLE(D_C, 0, 2, 1, 3));
|
||||
tr = _mm_hadd_ps(tr, tr);
|
||||
tr = _mm_hadd_ps(tr, tr);
|
||||
// |M| = |A|*|D| + |B|*|C| - tr((A#B)(D#C)
|
||||
detM = _mm_sub_ps(detM, tr);
|
||||
|
||||
const __m128 adjSignMask = _mm_setr_ps(1.f, -1.f, -1.f, 1.f);
|
||||
// (1/|M|, -1/|M|, -1/|M|, 1/|M|)
|
||||
__m128 rDetM = _mm_div_ps(adjSignMask, detM);
|
||||
|
||||
X_ = _mm_mul_ps(X_, rDetM);
|
||||
Y_ = _mm_mul_ps(Y_, rDetM);
|
||||
Z_ = _mm_mul_ps(Z_, rDetM);
|
||||
W_ = _mm_mul_ps(W_, rDetM);
|
||||
|
||||
df_m4 r;
|
||||
|
||||
// apply adjugate and store, here we combine adjugate shuffle and store shuffle
|
||||
r.vec[0] = DF_VEC_SHUFFLE(X_, Y_, 3, 1, 3, 1);
|
||||
r.vec[1] = DF_VEC_SHUFFLE(X_, Y_, 2, 0, 2, 0);
|
||||
r.vec[2] = DF_VEC_SHUFFLE(Z_, W_, 3, 1, 3, 1);
|
||||
r.vec[3] = DF_VEC_SHUFFLE(Z_, W_, 2, 0, 2, 0);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
|
51
lib/raytracing.c
Normal file
51
lib/raytracing.c
Normal file
|
@ -0,0 +1,51 @@
|
|||
#include <defocus/camera.h>
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
DF_API void df_evaluate_ray_packet(const df_ray_packet *rays) {
|
||||
const __m128 *base_x = (const __m128 *)rays->base_x;
|
||||
const __m128 *base_y = (const __m128 *)rays->base_y;
|
||||
const __m128 *base_z = (const __m128 *)rays->base_z;
|
||||
|
||||
const __m128 *dir_x = (const __m128 *)rays->dir_x;
|
||||
const __m128 *dir_y = (const __m128 *)rays->dir_y;
|
||||
const __m128 *dir_z = (const __m128 *)rays->dir_z;
|
||||
|
||||
/* Simple test: Let rays intersect with plane at z = 350.0 */
|
||||
|
||||
float PLANE_Z = 350.0f;
|
||||
|
||||
__m128 plane_z = _mm_set1_ps(PLANE_Z);
|
||||
|
||||
size_t ray_count = rays->ray_count;
|
||||
|
||||
/* TODO(kevin): divide to multiple threads */
|
||||
for (size_t i = 0; i < ray_count; i += 4) {
|
||||
__m128 rays_base_x = base_x[i];
|
||||
__m128 rays_base_y = base_y[i];
|
||||
__m128 rays_base_z = base_z[i];
|
||||
__m128 rays_dir_x = dir_x[i];
|
||||
__m128 rays_dir_y = dir_y[i];
|
||||
__m128 rays_dir_z = dir_z[i];
|
||||
|
||||
/* Solve for t: base.z + t * dir.z = plane_z
|
||||
* t = (plane_z - base.z) / dir.z
|
||||
*/
|
||||
__m128 delta = _mm_sub_ps(plane_z, rays_base_z);
|
||||
__m128 t = _mm_div_ps(delta, rays_dir_z);
|
||||
|
||||
/* Sample p = base.z + t * dir */
|
||||
__m128 sample_p_x = _mm_mul_ps(t, rays_dir_x);
|
||||
__m128 sample_p_y = _mm_mul_ps(t, rays_dir_y);
|
||||
__m128 sample_p_z = _mm_mul_ps(t, rays_dir_z);
|
||||
sample_p_x = _mm_add_ps(sample_p_x, rays_base_x);
|
||||
sample_p_y = _mm_add_ps(sample_p_y, rays_base_y);
|
||||
sample_p_z = _mm_add_ps(sample_p_z, rays_base_z);
|
||||
|
||||
}
|
||||
|
||||
/* Handle remaining (< 4) rays */
|
||||
if ((ray_count % 4) != 0) {
|
||||
|
||||
}
|
||||
}
|
21
meson.build
21
meson.build
|
@ -5,7 +5,9 @@ incdir = include_directories('include', '3p')
|
|||
cc = meson.get_compiler('c')
|
||||
m_dep = cc.find_library('m', required: false)
|
||||
|
||||
add_project_arguments([ '-msse3', '-Wno-missing-braces' ], language: 'c')
|
||||
if cc.get_id() == 'gcc' or cc.get_id() == 'clang'
|
||||
add_project_arguments([ '-msse3', '-msse4.1', '-Wno-missing-braces' ], language: 'c')
|
||||
endif
|
||||
|
||||
lib = library('df',
|
||||
'lib/log.c',
|
||||
|
@ -15,14 +17,21 @@ lib = library('df',
|
|||
'lib/image.c',
|
||||
'lib/color.c',
|
||||
'lib/thin_lense.c',
|
||||
'lib/raytracing.c',
|
||||
'include/defocus/base.h',
|
||||
'include/defocus/camera.h',
|
||||
'include/defocus/defocus.h',
|
||||
'include/defocus/image.h',
|
||||
'include/defocus/intrinsic_helper.h',
|
||||
'include/defocus/models.h',
|
||||
'include/defocus/scene.h',
|
||||
'include/defocus/raytracing.h',
|
||||
include_directories: incdir,
|
||||
dependencies: m_dep,
|
||||
version: '0.1.0',
|
||||
soversion: '0')
|
||||
dependencies: m_dep)
|
||||
|
||||
# Command Line Executable
|
||||
executable('defocus', 'bin/defocus.c', include_directories: incdir, link_with: lib)
|
||||
|
||||
# Test driver
|
||||
munit_dep = dependency('munit', fallback: ['munit', 'munit_dep'])
|
||||
executable('tests', 'tests/tests.c', include_directories: incdir, link_with: lib, dependencies: munit_dep)
|
||||
#munit_dep = dependency('munit', fallback: ['munit', 'munit_dep'])
|
||||
#executable('tests', 'tests/tests.c', include_directories: incdir, link_with: lib, dependencies: munit_dep)
|
Loading…
Reference in New Issue
Block a user