actually trace the dumb test image

does not take plane rotation into account!
dump state
2023-06-19 17:27:23 +02:00 · 2023-05-23 13:20:59 +02:00
22 changed files with 509 additions and 1674 deletions
--- a/bin/defocus.c
+++ b/bin/defocus.c
@ -1,242 +1,63 @@
-#include <stdio.h>
+/* Main command line executable. */
-#include <string.h>
+
 #include <stdlib.h>
 #include <defocus/defocus.h>
-#include <defocus/camera.h>
+#define STB_IMAGE_WRITE_IMPLEMENTATION
 #include <stb_image_write.h>
-int pinhole_fn(int argc, char **argv);
+int main(int argc, char **argv) {
-int thin_lense_fn(int argc, char **argv);
+    df_sphere spheres[2];
-int test_fn(int argc, char **argv);
+    spheres[0].center_x = 0.f;
    spheres[0].center_y = 0.f;
    spheres[0].center_z = -1.f;
    spheres[0].radius = .5f;
    spheres[1].center_x = 0.25f;
    spheres[1].center_y = 0.25f;
    spheres[1].center_z = -1.5f;
    spheres[1].radius = .75f;
-static const char *_model_names[] = {"pinhole", "thin_lense", "test"};
+    int image_width = 1024;
-typedef int (*model_fn)(int argc, char **argv);
+    int image_height = 512;
-static model_fn _model_fns[] = {
+    df_image_handle input_image = df_load_image("../test_image.png", &image_width, &image_height);
-    pinhole_fn,
+    float aspect = (float)image_width / (float)image_height;
    thin_lense_fn,
    test_fn,
 };
-void usage(const char *pname)
+    df_plane plane;
-{
+    plane.base_x = 0.f;
-    printf("Usage: %s <model-name> [model parameters]\n", pname);
+    plane.base_y = 0.f;
-    printf("Available models:\n");
+    plane.base_z = -1.f;
-    for (int i = 0; i < DF_ARRAY_COUNT(_model_names); ++i) {
+    plane.normal_x = 0.f;
-        printf(" %s\n", _model_names[i]);
+    plane.normal_y = 0.f;
-    }
+    plane.normal_z = 1.f;
-}
+    plane.img_p0_x = -aspect;
    plane.img_p0_y = -1.f;
    plane.img_p0_z = plane.base_z;
    plane.img_p1_x = aspect;
    plane.img_p1_y = 1.f;
    plane.img_p1_z = plane.base_z;
    plane.img_ax0_x = 1.f;
    plane.img_ax0_y = 0.f;
    plane.img_ax0_z = 0.f;
    plane.img_ax1_x = 0.f;
    plane.img_ax1_y = 1.f;
    plane.img_ax1_z = 0.f;
    plane.image = input_image;
-int main(int argc, char **argv)
+    uint8_t *image = NULL;
 {
    if (argc < 2) {
        fprintf(stderr, "Missing model name!\n");
        usage(argv[0]);
        return 1;
    }
    model_fn fn = NULL;
    const char *model = argv[1];
    for (int i = 0; i < DF_ARRAY_COUNT(_model_names); ++i) {
        if (strcmp(model, _model_names[i]) == 0) {
            fn = _model_fns[i];
            break;
        }
    }
    if (!fn) {
        fprintf(stderr, "Invalid model name!\n");
        usage(argv[0]);
        return 1;
    }
-    return fn(argc - 2, &argv[2]);
+    df_trace_rays((df_trace_rays_settings){
-}
+            .focal_length = 1.f,
            .image_width = image_width,
            .image_height = image_height
-int pinhole_fn(int argc, char **argv)
+        },
-{
+        spheres, 0,
-    const char *in_path = NULL;
+        &plane, 1,
-    const char *out_path = "out.png";
+        &image);
    const char *focal_str = NULL;
    const char *in_z_str = NULL;
    const char *out_z_str = NULL;
-    for (int i = 0; i < argc; ++i) {
+    stbi_write_png("raytracer.png", image_width, image_height, 3, image, image_width * 3);
        if ((strcmp(argv[i], "-i") == 0 || strcmp(argv[i], "--input") == 0) && i < argc - 1) {
            ++i;
            in_path = argv[i];
        } else if ((strcmp(argv[i], "-o") == 0 || strcmp(argv[i], "--output") == 0) && i < argc - 1) {
            ++i;
            out_path = argv[i];
        } else if ((strcmp(argv[i], "-f") == 0 || strcmp(argv[i], "--focal-length") == 0) && i < argc - 1) {
            ++i;
            focal_str = argv[i];
        } else if ((strcmp(argv[i], "-iz") == 0 || strcmp(argv[i], "--input-z") == 0) && i < argc - 1) {
            ++i;
            in_z_str = argv[i];
        } else if ((strcmp(argv[i], "-oz") == 0 || strcmp(argv[i], "--output-z") == 0) && i < argc - 1) {
            ++i;
            out_z_str = argv[i];
        } else {
            in_path = argv[i];
        }
    }
    if (!in_path || !focal_str || !in_z_str || !out_z_str) {
        fprintf(stderr, "Missing model parameters!\n");
        printf("Pinhole model parameters:\n");
        printf("  -i  | --input <path>\t\tPath to the input image.\n");
        printf("  -o  | --output <path>\t\tPath to the output image (Default: out.png).\n");
        printf("  -iz | --input-z <number>\t\tDistance to the input image plane.\n");
        printf("  -oz | --output-z <number>\t\tDistance to the output image plane.\n");
        printf("  -f  | --focal-length <number>\t\tThe focal length of the camera lens.\n");
        return 1;
    }
    float focal, in_z, out_z;
    focal = atof(focal_str);
    in_z = atof(in_z_str);
    out_z = atof(out_z_str);
    df_image *in_image, *out_image;
    int w, h;
    df_result res = df_load_image(in_path, &w, &h, &in_image);
    if (res != df_result_success) {
        fprintf(stderr, "Failed to load %s! (%d)\n", in_path, (int)res);
        return 1;
    }
    res = df_create_image(w, h, &out_image);
    if (res != df_result_success) {
        fprintf(stderr, "Failed to create output image (%d)\n", (int)res);
        df_release_image(in_image);
        return 1;
    }
    df_pinhole((df_pinhole_params){.orig_z = in_z, .new_z = out_z, .focal_length = focal}, in_image, out_image);
    int error_code = 0;
    res = df_write_image(out_image, out_path);
    if (res != df_result_success) {
        fprintf(stderr, "Failed to write to output image %s (%d)\n", out_path, (int)res);
        error_code = 1;
    }
    df_release_image(in_image);
    df_release_image(out_image);
    return error_code;
 }
 int thin_lense_fn(int argc, char **argv)
 {
    const char *in_path = NULL;
    const char *out_path = "out.png";
    const char *focal_str = NULL;
    const char *aperture_str = NULL;
    const char *in_z_str = NULL;
    const char *out_z_str = NULL;
    const char *samples_str = "64";
    for (int i = 0; i < argc; ++i) {
        if ((strcmp(argv[i], "-i") == 0 || strcmp(argv[i], "--input") == 0) && i < argc - 1) {
            ++i;
            in_path = argv[i];
        } else if ((strcmp(argv[i], "-o") == 0 || strcmp(argv[i], "--output") == 0) && i < argc - 1) {
            ++i;
            out_path = argv[i];
        } else if ((strcmp(argv[i], "-f") == 0 || strcmp(argv[i], "--focal-length") == 0) && i < argc - 1) {
            ++i;
            focal_str = argv[i];
        } else if ((strcmp(argv[i], "-iz") == 0 || strcmp(argv[i], "--input-z") == 0) && i < argc - 1) {
            ++i;
            in_z_str = argv[i];
        } else if ((strcmp(argv[i], "-oz") == 0 || strcmp(argv[i], "--output-z") == 0) && i < argc - 1) {
            ++i;
            out_z_str = argv[i];
        } else if ((strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "--aperture") == 0) && i < argc - 1) {
            ++i;
            aperture_str = argv[i];
        } else if ((strcmp(argv[i], "-s") == 0 || strcmp(argv[i], "--samples") == 0) && i < argc - 1) {
            ++i;
            samples_str = argv[i];
        } else {
            in_path = argv[i];
        }
    }
    if (!in_path || !focal_str || !in_z_str || !out_z_str) {
        fprintf(stderr, "Missing model parameters!\n");
        printf("Pinhole model parameters:\n");
        printf("  -i  | --input <path>\t\tPath to the input image.\n");
        printf("  -o  | --output <path>\t\tPath to the output image (Default: out.png).\n");
        printf("  -iz | --input-z <number>\t\tDistance to the input image plane.\n");
        printf("  -oz | --output-z <number>\t\tDistance to the output image plane.\n");
        printf("  -f  | --focal-length <number>\t\tThe focal length of the camera lens.\n");
        printf("  -a  | --aperture <number>\t\tThe size of the camera lens.\n");
        printf("  -s  | --samples <number>\t\tThe number of samples per pixel (Default: 64).\n");
        return 1;
    }
    float focal, in_z, out_z, aperture;
    int samples;
    focal = atof(focal_str);
    in_z = atof(in_z_str);
    out_z = atof(out_z_str);
    aperture = atof(aperture_str);
    samples = atoi(samples_str);
    df_image *in_image, *out_image;
    int w, h;
    df_result res = df_load_image(in_path, &w, &h, &in_image);
    if (res != df_result_success) {
        fprintf(stderr, "Failed to load %s! (%d)\n", in_path, (int)res);
        return 1;
    }
    res = df_create_image(w, h, &out_image);
    if (res != df_result_success) {
        fprintf(stderr, "Failed to create output image (%d)\n", (int)res);
        df_release_image(in_image);
        return 1;
    }
    df_thin_lense((df_thin_lense_params){.focal_distance = in_z,
                                         .out_distance = out_z,
                                         .focal_length = focal,
                                         .aperture = aperture,
                                         .sample_count = samples},
                  in_image,
                  out_image);
    int error_code = 0;
    res = df_write_image(out_image, out_path);
    if (res != df_result_success) {
        fprintf(stderr, "Failed to write to output image %s (%d)\n", out_path, (int)res);
        error_code = 1;
    }
    df_release_image(in_image);
    df_release_image(out_image);
    return error_code;
 }
 int test_fn( int argc, char **argv ) {
    df_v3 v = {1.f, 2.f, 3.f};
    df_m4 t = df_translate(-1.f, 2.f, 1.5f);
    t = df_inverse_transform(t);
    df_v3 tv = df_transform_v3(t, v);
    df_camera_i cam = df_create_perspective_camera(1024.0, 1024.0, 1024.0, 1024.0, 2.e-4f, 1000.f, 350.0, 21.0);
    df_ray_packet packet = cam.build_ray_packet(cam.o);
    df_evaluate_ray_packet(&packet);
    df_release_ray_packet(&packet);
    cam.release(cam.o);
    return 0;
 }
--- a/include/defocus/base.h
+++ b/include/defocus/base.h
@ -1,278 +0,0 @@
 #ifndef DEFOCUS_BASE_H
 #define DEFOCUS_BASE_H
 #include <inttypes.h>
 #include <string.h>
 #include <immintrin.h>
 /** @file base.h
 * @brief basic utilities
 */
 /** @brief Result codes used throughout the codebase */
 typedef enum
 {
    /** @brief operation successfull */
    df_result_success = 0,
    /** @brief memory allocation failed */
    df_result_out_of_memory = 1,
    /** @brief file i/o error */
    df_result_io_error = 2,
 } df_result;
 /** @brief Used to silence warnings about unused variables */
 #define DF_UNUSED(x) ((void)sizeof((x)))
 /** @brief Evaluates to the number of elements in a static array. */
 #define DF_ARRAY_COUNT(a) (sizeof((a)) / sizeof((a)[0]))
 /** @brief Zero a memory range */
 #define DF_ZERO_MEMORY(ptr, n) memset((ptr), 0, (n))
 /** @brief Zero a struct */
 #define DF_ZERO_STRUCT(ptr) DF_ZERO_MEMORY(ptr, sizeof(*(ptr)))
 /** @brief Zero an array */
 #define DF_ZERO_ARRAY(a, n) DF_ZERO_MEMORY(a, sizeof((a)[0]) * (n))
 #ifdef _MSC_VER
 #ifdef __cplusplus__
 #define DF_API extern "C" __declspec(dllexport)
 #else
 #define DF_API __declspec(dllexport)
 #endif /* _MSC_VER && __cplusplus__ */
 #else
 #ifdef __cplusplus__
 #define DF_API extern "C"
 #else
 #define DF_API
 #endif
 #endif
 /* Simple logging function */
 #ifndef DF_ENABLE_LOGGING
 #define DF_ENABLE_LOGGING 1
 #endif
 enum
 {
    df_log_level_verbose = 0,
    df_log_level_info = 1,
    df_log_level_warn = 2,
    df_log_level_error = 3,
 };
 #if DF_ENABLE_LOGGING
 /** @brief The actual log implementation.
 *
 * There should be no need to call this directly.
 *
 * Instead use one of:
 * - df_log_verbose()
 * - df_log_info()
 * - df_log_warn()
 * - df_log_error()
 * @param level the log level. Used to filter the messages.
 * @param file file name of the log location
 * @param line line number of the log location
 * @param fmt format string of the message.
 */
 DF_API void df_log_impl(int level, const char *file, int line, const char *fmt, ...);
 #define df_log_verbose(...) df_log_impl(df_log_level_verbose, __FILE__, __LINE__, __VA_ARGS__)
 #define df_log_info(...) df_log_impl(df_log_level_info, __FILE__, __LINE__, __VA_ARGS__)
 #define df_log_warn(...) df_log_impl(df_log_level_warn, __FILE__, __LINE__, __VA_ARGS__)
 #define df_log_error(...) df_log_impl(df_log_level_error, __FILE__, __LINE__, __VA_ARGS__)
 #else /* DF_ENABLE_LOGGING = 0 */
 #define df_log_verbose(fmt, ...)
 #define df_log_info(fmt, ...)
 #define df_log_warn(fmt, ...)
 #define df_log_error(fmt, ...)
 #endif
 /* Math constants.
 */
 #define DF_PI 3.1415926f
 #define DF_PI_OVER_2 1.570796f
 #define DF_PI_OVER_4 0.785398f
 /* Basic types */
 /** @brief RGBA color */
 typedef union {
    struct
    {
        uint8_t r;
        uint8_t g;
        uint8_t b;
        uint8_t a;
    };
    uint8_t e[4];
 } df_color;
 /** @brief Linear interpolation between two colors.
 *
 * @param a first color
 * @param b second color
 * @param t interpolation factor
 * @return a + (b - a) * t
 */
 DF_API df_color df_lerp_color(df_color a, df_color b, double t);
 /** @brief 2d vector */
 typedef union {
    struct
    {
        float x;
        float y;
    };
    struct
    {
        float u;
        float v;
    };
    float e[2];
 } df_v2;
 /** @brief Add two 2d vectors */
 DF_API df_v2 df_add_v2(df_v2 a, df_v2 b);
 /** @brief Subtract two 2d vectors */
 DF_API df_v2 df_sub_v2(df_v2 a, df_v2 b);
 /** @brief Calculate the dot product of 2d vectors a and b.
 */
 DF_API float df_dot_v2(df_v2 a, df_v2 b);
 /** @brief Multiply a 2d vector with a scalar.
 */
 DF_API df_v2 df_mul_v2(float t, df_v2 v);
 /** @brief Returns the normalized version of a 2d vector v
 */
 DF_API df_v2 df_normalize_v2(df_v2 v);
 /** @brief 3d vector */
 typedef union {
    struct
    {
        float x;
        float y;
        float z;
    };
    float e[3];
 } df_v3;
 /** @brief Add two 3d vectors */
 DF_API df_v3 df_add_v3(df_v3 a, df_v3 b);
 /** @brief Subtract two 3d vectors */
 DF_API df_v3 df_sub_v3(df_v3 a, df_v3 b);
 /** @brief Calculate the dot product of 3d vectors a and b.
 */
 DF_API float df_dot_v3(df_v3 a, df_v3 b);
 /** @brief Multiply a 3d vector with a scalar.
 */
 DF_API df_v3 df_mul_v3(float t, df_v3 v);
 /** @brief Returns the normalized version of a 3d vector v
 */
 DF_API df_v3 df_normalize_v3(df_v3 v);
 /** @brief A plane in 3d space.
 *
 * Expressed as the set of all points p for which
 * dot((p - base), normal) = 0
 */
 typedef struct
 {
    df_v3 base;
    df_v3 normal;
 } df_plane;
 /** @brief A line in 3d space.
 *
 * Expressed as the vector equation p = base + t * direction.
 */
 typedef struct
 {
    df_v3 base;
    df_v3 direction;
 } df_line;
 /** @brief The three possible result types of a line-plane intersection test.
 */
 typedef enum
 {
    /** The line and plane intersect in one point */
    df_line_plane_intersection_vector,
    /** The line is contained in the plane */
    df_line_plane_intersection_contained,
    /** The line and plane don't intersect (-> they are parallel) */
    df_line_plane_intersection_none,
 } df_line_plane_intersection_type;
 /** @brief Result of a line-plane intersection test.
 * The contained vector is only valid if @c type = @c df_line_plane_intersection_vector.
 */
 typedef struct
 {
    df_line_plane_intersection_type type;
    df_v3 vec;
 } df_line_plane_intersection;
 /** @brief Calculate the intersection between a line and a plane in 3d space */
 DF_API df_line_plane_intersection df_calc_line_plane_intersection(df_line line, df_plane plane);
 /** @brief A 4x4 matrix.
 *
 * Stored in row major order.
 */
 typedef struct df_m4
 {
    _Alignas(16) union {
        float e[16];
        __m128 vec[4];
    };
 } df_m4;
 /** Accesses the element at the given row and column of a 4x4 matrix */
 #define DF_M4_AT(m, row, col) ((m).e[(row)*4 + (col)])
 /** @brief Matrix multiply 4x4 matrix a and b */
 DF_API df_m4 df_mul_m4(df_m4 a, df_m4 b);
 /** @brief Get a scale matrix */
 DF_API df_m4 df_scale(float x, float y, float z);
 DF_API df_m4 df_translate(float x, float y, float z);
 /** @brief Transform (i.e. multiply) a 3d vector v by the transformation matrix T */
 DF_API df_v3 df_transform_v3(df_m4 T, df_v3 v);
 /** @brief Calculate the inverse of a non-scaling transform matrix.
 *
 * Special fast case.
 */
 DF_API df_m4 df_inverse_transform_no_scale(df_m4 M);
 /** @brief Calculate the inverse of a transform matrix */
 DF_API df_m4 df_inverse_transform(df_m4 M);
 /** @brief Calculate the inverse of a general (invertible) 4x4 matrix */
 DF_API df_m4 df_inverse(df_m4 M);
 #endif
--- a/include/defocus/camera.h
+++ b/include/defocus/camera.h
@ -1,28 +0,0 @@
 #ifndef DEFOCUS_CAMERA_H
 #define DEFOCUS_CAMERA_H
 #include "base.h"
 #include "raytracing.h"
 /** @file camera.h
 * @brief basic camera functions
 */
 /** @brief Interface for cameras. */
 typedef struct
 {
    /** Release the camera object */
    void (*release)(void *o);
    /** Builds a ray packet of all rays that need to be evaluated to generate the image.
     */
    df_ray_packet (*build_ray_packet)(void *o);
    /** Opaque pointer to the camera object */
    void *o;
 } df_camera_i;
 DF_API df_camera_i df_create_perspective_camera(
    float image_width, float image_height, float raster_width, float raster_height, float far_dist, float near_dist, float focal_dist, float lens_radius);
 #endif
--- a/include/defocus/defocus.h
+++ b/include/defocus/defocus.h
@ -1,11 +1,94 @@
-#ifndef DEFOCUS_DEFOCUS_H
+#ifndef DEFOCUS_H
-#define DEFOCUS_DEFOCUS_H
+#define DEFOCUS_H
-/** @file defocus.h
+#include <stdint.h>
- * @brief meta header
+
 #ifdef __cplusplus
 #ifdef _MSC_VER
 #define DF_API extern "C" __declspec(dllexport)
 #else
 #define DF_API extern "C"
 #endif
 #else
 #ifdef _MSC_VER
 #define DF_API __declspec(dllexport)
 #else
 #define DF_API
 #endif
 #endif /* __cplusplus */
 #define DF_EPSF32 0.00001f
 #define DF_ARRAY_COUNT(A) (sizeof((A)) / sizeof((A)[0]))
 /* Images */
 typedef unsigned int df_image_handle;
 DF_API df_image_handle df_load_image(const char *path, int *w, int *h);
 /* Settings for the basic ray tracing function. */
 typedef struct
 {
    /* Width needs to be divisible by 4! */
    int image_width;
    int image_height;
    /* Distance between lense and image plane */
    float focal_length;
    float lens_radius;
 } df_trace_rays_settings;
 /* Sphere shape */
 typedef struct
 {
    float center_x;
    float center_y;
    float center_z;
    float radius;
 } df_sphere;
 typedef struct
 {
    float base_x;
    float base_y;
    float base_z;
    float normal_x;
    float normal_y;
    float normal_z;
    /* Coordinates on the plane */
    float img_p0_x;
    float img_p0_y;
    float img_p0_z;
    float img_p1_x;
    float img_p1_y;
    float img_p1_z; 
    /* TODO(Kevin): These could be calculated from p0 and p1... */
    float img_ax0_x;
    float img_ax0_y;
    float img_ax0_z;
    float img_ax1_x;
    float img_ax1_y;
    float img_ax1_z;
    df_image_handle image;
 } df_plane;
 DF_API float df_max_f32(const float *list, unsigned int count);
 /* Core function, implements raytracing.
 *  
 * The result buffer receives RGB pixels, with 8 bits per channel stored in RGB order.
 * It is allocated via malloc().
 * 
 * Returns 1 on success, 0 if allocating the result image failed.
 */
-#include "base.h"
+DF_API int df_trace_rays(df_trace_rays_settings settings, const df_sphere *spheres, unsigned int sphere_count,
-#include "image.h"
+    const df_plane *planes, unsigned int plane_count, uint8_t **result);
 #include "models.h"
 #endif
--- a/include/defocus/image.h
+++ b/include/defocus/image.h
@ -1,62 +0,0 @@
 #ifndef DEFOCUS_IMAGE_H
 #define DEFOCUS_IMAGE_H
 /** @file image.h
 * @brief image type and functions
 */
 #include "base.h"
 /** @brief Opaque rgba8 image object */
 typedef struct df_image df_image;
 /** @brief create an image
 *
 * The contents of the image will be undefined.
 * @param w the image width
 * @param h the image height
 * @param out_image receives the image object
 * @return error code
 */
 DF_API df_result df_create_image(int w, int h, df_image **out_image);
 /** @brief load an image file
 *
 * The image data will be converted to rgba8
 *
 * @param w the image width
 * @param h the image height
 * @param out_image receives the image object
 * @return error code
 */
 DF_API df_result df_load_image(const char *path, int *out_w, int *out_h, df_image **out_image);
 /** @brief Write an image to a PNG file
 * @param img the image
 * @param path the path
 */
 DF_API df_result df_write_image(df_image *img, const char *path);
 /** @brief Free an image.
 *
 * Any pointer to the image will be invalid after this.
 * @param img the image
 */
 DF_API void df_release_image(df_image *img);
 /** @brief Returns the dimensions of the image */
 DF_API void df_get_image_size(const df_image *image, int *w, int *h);
 /** @brief Returns the color value at pixel coordinates x, y
 *
 * Returns black for coordinates outside the image.
 */
 DF_API df_color df_get_image_pixel(const df_image *image, int x, int y);
 /** @brief Set the color value at pixel coordinates x, y.
 *
 * Does nothing for coordinates outside the image.
 */
 DF_API void df_set_image_pixel(df_image *image, int x, int y, df_color c);
 #endif
--- a/include/defocus/intrinsic_helper.h
+++ b/include/defocus/intrinsic_helper.h
@ -1,38 +0,0 @@
 #ifndef DEFOCUS_INTRINSIC_HELPER_H
 #define DEFOCUS_INTRINSIC_HELPER_H
 /** @file intrinsic_helper.h
 * @brief Helper macros for intrinsics
 *
 * Assumes that at least SS3 is available.
 */
 #include <immintrin.h>
 #include <pmmintrin.h>
 /* Shuffle mask for _mm_shuffle_epi32 */
 #define DF_MAKE_SHUFFLE_MASK(x, y, z, w) (x | (y << 2) | (z << 4) | (w << 6))
 #define DF_VEC_SWIZZLE_MASK(vec, mask) _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vec), mask))
 #define DF_VEC_SWIZZLE(vec, x, y, z, w) DF_VEC_SWIZZLE_MASK(vec, DF_MAKE_SHUFFLE_MASK(x, y, z, w))
 // "broadcasts" element x [0, 1, 2, 3] to all elements of the result */
 #define DF_VEC_SWIZZLE1(vec, x) DF_VEC_SWIZZLE_MASK(vec, DF_MAKE_SHUFFLE_MASK(x, x, x, x))
 /* returns [vec0, vec0, vec2, vec2] */
 #define DF_VEC_SWIZZLE_0022(vec) _mm_moveldup_ps(vec)
 /* returns [vec1, vec1, vec3, vec3] */
 #define DF_VEC_SWIZZLE_1133(vec) _mm_movehdup_ps(vec)
 /* returns [vec1[x], vec1[y], vec2[z], vec2[w]] */
 #define DF_VEC_SHUFFLE(vec1, vec2, x, y, z, w) _mm_shuffle_ps(vec1, vec2, DF_MAKE_SHUFFLE_MASK(x, y, z, w))
 /* returns [vec1[0], vec1[1], vec2[0], vec2[1]] */
 #define DF_VEC_SHUFFLE_0101(vec1, vec2) _mm_movelh_ps(vec1, vec2)
 /* returns [vec1[2], vec1[3], vec2[2], vec2[3]] */
 #define DF_VEC_SHUFFLE_2323(vec1, vec2) _mm_movehl_ps(vec2, vec1)
 #endif
--- a/include/defocus/models.h
+++ b/include/defocus/models.h
@ -1,77 +0,0 @@
 #ifndef DF_MODELS_H
 #define DF_MODELS_H
 /** @file models.h
 * @brief Camera models for defocus.
 *
 * Camera models are usually implemented as a function taking an input image and parameters required for the model
 * and produce an output image.
 *
 * Parameters are contained in a parameter struct, because this enables us to something similar to named parameters.
 * We can also do default parameters, by leaving some values set to zero.
 *
 * @code{c}
 * df_thin_lense((df_thin_lense){
 *     .focal_length = 42.f,
 *     .aperture = 21.f,
 *     .focal_distance = 150.f,
 *     .out_distance = 200.f},
 *     in_image,
 *     out_image);
 * @endcode
 *
 * All camera models use the same coordinate space: The camera looks along the positive z-axis in a right-handed
 * coordinate system.  (-> positive y is down, positive x is right.)
 */
 #include "image.h"
 /** Parameters for the pinhole model. */
 typedef struct df_pinhole_params
 {
    float focal_length; /**< the cameras focal length in millimeters. */
    float orig_z;       /**< the distance of the input image from the camera. */
    float new_z;        /**< the distance of the generated output image. */
 } df_pinhole_params;
 /** @brief Simple pinhole camera model.
 *
 * The simplest possible(?) camera model.
 * This function takes an input image at a known distance from the camera and moves it to a new distance.
 *
 * @param params model parameters.
 * @param in_image input image.
 * @param out_image the output image.
 */
 DF_API void df_pinhole(df_pinhole_params params, const df_image *in_image, df_image *out_image);
 /** Parameters for the thin lense model.
 */
 typedef struct df_thin_lense_params
 {
    /** The cameras focal length in millimeters. */
    float focal_length;
    /** The aperture size in millimeters */
    float aperture;
    /** The distance between the plane of focus and the camera lens, in millimeters */
    float focal_distance;
    /** The distance between the output image and the camera lens, in millimeters */
    float out_distance;
    /** The number of samples per pixel. Leave at 0 for the default value (64). */
    int sample_count;
 } df_thin_lense_params;
 /** @brief Thin-lense model.
 *
 * @param params model parameters
 * @param in_image input image
 * @param out_image the output image.
 */
 DF_API void df_thin_lense(df_thin_lense_params params, const df_image *in_image, df_image *out_image);
 #endif
--- a/include/defocus/raytracing.h
+++ b/include/defocus/raytracing.h
@ -1,29 +0,0 @@
 #ifndef DF_RAYTRACING_H
 #define DF_RAYTRACING_H
 /** @brief Stores information for rays in a SIMD friendly way */
 typedef struct
 {
    /** Packs all SIMD data into a single buffer */
    float *simd_mem;
    /** Source uvs for rays */
    df_v2 *ray_uvs;
    /* Points into simd_mem */
    float *base_x;
    float *base_y;
    float *base_z;
    float *dir_x;
    float *dir_y;
    float *dir_z;
    size_t ray_count;
 } df_ray_packet;
 /** @brief Free ray packet memory */
 DF_API void df_release_ray_packet(df_ray_packet *rays);
 DF_API void df_evaluate_ray_packet(const df_ray_packet *rays);
 #endif
--- a/include/defocus/scene.h
+++ b/include/defocus/scene.h
@ -1,37 +0,0 @@
 #ifndef DEFOCUS_SCENE_H
 #define DEFOCUS_SCENE_H
 #include "base.h"
 #include "image.h"
 #include "camera.h"
 /** @file scene.h
 * @brief scene structure */
 /** Opaque scene structure */
 typedef struct df_scene df_scene;
 df_scene *df_create_scene();
 void df_release_scene(df_scene *scene);
 void df_scene_add_plane(df_scene *scene, df_plane plane, df_image *texture);
 df_image *df_scene_generate_image(df_scene *scene, df_camera_i camera);
 #if 0
 /* Usage example */
 df_camera_i camera = df_create_perspective_camera(...);
 df_scene* scene = df_create_scene();
 df_scene_add_plane(..., in_image);
 df_image *out_image = df_scene_generate_image(scene, camera);
 df_release_scene(scene);
 camera->release(camera->o);
 #endif
 #endif
--- a/lib/camera.c
+++ b/lib/camera.c
@ -1,152 +0,0 @@
 #include <defocus/camera.h>
 #include <string.h>
 #include <stdlib.h>
 #include <assert.h>
 #ifdef _MSC_VER
 #include <malloc.h>
 #endif
 /* ********************************************************
 *
 * Ray Packet Functions
 *
 * ********************************************************/
 DF_API void df_release_ray_packet(df_ray_packet *rays)
 {
    free(rays->simd_mem);
    free(rays->ray_uvs);
    memset(rays, 0, sizeof(*rays));
 }
 /* ********************************************************
 *
 * Perspective Camrea
 *
 * ********************************************************/
 typedef struct
 {
    float focal_dist;
    float lens_radius;
    /* Raster space, i.e. the space for which we generate rays */
    float raster_width;
    float raster_height;
    /* Image(=screen) space, i.e. the space in which we store pixels */
    float image_width;
    float image_height;
    df_m4 screen_to_raster;
    df_m4 raster_to_screen;
    df_m4 raster_to_camera;
 } df_perspective_camera;
 static void pc_release(void *o) { df_perspective_camera *camera = o; }
 static df_ray_packet pc_build_ray_packet(void *o)
 {
    df_perspective_camera *camera = o;
    df_ray_packet packet;
    memset(&packet, 0, sizeof(packet));
    /* Generate 1 ray per pixel (ignore lens for now) */
    size_t count = (size_t)camera->raster_width * (size_t)camera->raster_height;
    size_t alloc_count = count;
    /* Round up to nearest multiple of 4, for SSE.
     * Also satisfies 16 byte alignment (assuming simd_mem is 16 byte aligned)
     * because 4 * sizeof(float) = 16.
     */
    if ((alloc_count % 4) != 0) {
        alloc_count = ((alloc_count + 3) / 4) * 4;
    }
 #ifdef _MSC_VER
    packet.simd_mem = _aligned_malloc(sizeof(float) * alloc_count * 6, 16);
 #elif defined(_ISOC11_SOURCE) /* Feature test macro for GCC */
    packet.simd_mem = aligned_alloc(16, sizeof(float) * alloc_count * 6);
 #else
    /* Fall back to regular malloc and hope for the best */
    packet.simd_mem = malloc(sizeof(float) * alloc_count * 6);
 #endif
    packet.base_x = packet.simd_mem;
    packet.base_y = packet.base_x + alloc_count;
    packet.base_z = packet.base_y + alloc_count;
    packet.dir_x = packet.base_z + alloc_count;
    packet.dir_y = packet.dir_x + alloc_count;
    packet.dir_z = packet.dir_y + alloc_count;
    packet.ray_uvs = malloc(sizeof(df_v2) * count);
    packet.ray_count = count;
    size_t i = 0;
    for (float y = 0; y < camera->raster_height; y += 1.f) {
        for (float x = 0; x < camera->raster_width; x += 1.f) {
            packet.base_x[i] = 0.f;
            packet.base_y[i] = 0.f;
            packet.base_z[i] = 0.f;
            df_v3 raster_p = {x, y, 0.f};
            df_v3 camera_p = df_transform_v3(camera->raster_to_camera, raster_p);
            df_v3 dir = df_normalize_v3(camera_p);
            packet.dir_x[i] = dir.x;
            packet.dir_y[i] = dir.y;
            packet.dir_z[i] = dir.z;
            df_v3 img_p = df_transform_v3(camera->raster_to_screen, raster_p);
            packet.ray_uvs[i].u = img_p.x / camera->image_width;
            packet.ray_uvs[i].v = img_p.y / camera->image_height;
            ++i;
            assert(i <= count);
        }
    }
    return packet;
 }
 DF_API df_camera_i df_create_perspective_camera(float image_width,
                                                float image_height,
                                                float raster_width,
                                                float raster_height,
                                                float far_dist,
                                                float near_dist,
                                                float focal_dist,
                                                float lens_radius)
 {
    df_perspective_camera *camera = malloc(sizeof(*camera));
    camera->screen_to_raster = df_scale(raster_width, raster_height, 1.f);
    camera->screen_to_raster =
        df_mul_m4(camera->screen_to_raster, df_scale(1.f / image_width, -1.f / image_height, 1.f));
    camera->screen_to_raster = df_mul_m4(camera->screen_to_raster, df_translate(0.f, -image_height, 0.f));
    camera->raster_to_screen = df_inverse_transform(camera->screen_to_raster);
    /* Perspective projection matrix */
    df_m4 persp = {0.f};
    DF_M4_AT(persp, 0, 0) = 1.f;
    DF_M4_AT(persp, 1, 1) = 1.f;
    DF_M4_AT(persp, 2, 2) = far_dist / (far_dist - near_dist);
    DF_M4_AT(persp, 2, 3) = -1.f * far_dist * near_dist / (far_dist - near_dist);
    DF_M4_AT(persp, 3, 2) = 1.f;
    camera->raster_to_camera = df_mul_m4(df_inverse(persp), camera->raster_to_screen);
    camera->focal_dist = focal_dist;
    camera->lens_radius = lens_radius;
    camera->raster_width = raster_width;
    camera->raster_height = raster_height;
    camera->image_width = image_width;
    camera->image_height = image_height;
    df_camera_i iface = {.release = pc_release, .build_ray_packet = pc_build_ray_packet, .o = camera};
    return iface;
 }
--- a/lib/color.c
+++ b/lib/color.c
@ -1,14 +0,0 @@
 #include <defocus/base.h>
 #include <math.h>
 df_color df_lerp_color(df_color a, df_color b, double t)
 {
    df_color r;
    for (int i = 0; i < 4; ++i) {
        double af = (double)a.e[i];
        double bf = (double)b.e[i];
        r.e[i] = (uint8_t)floor(af + (bf - af) * t);
    }
    return r;
 }
--- a/lib/image.c
+++ b/lib/image.c
@ -1,110 +0,0 @@
 #include <defocus/image.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #define STB_IMAGE_IMPLEMENTATION
 #include <stb_image.h>
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include <stb_image_write.h>
 struct df_image
 {
    int width;
    int height;
    uint8_t *pixels;
 };
 DF_API df_result df_create_image(int w, int h, df_image **out_image)
 {
    df_image *img = malloc(sizeof(df_image));
    if (!img)
        return df_result_out_of_memory;
    DF_ZERO_STRUCT(img);
    img->width = w;
    img->height = h;
    img->pixels = malloc(4 * sizeof(uint8_t) * w * h);
    if (!img->pixels) {
        free(img);
        return df_result_out_of_memory;
    }
    memset(img->pixels, 0, 4 * w * h);
    *out_image = img;
    return df_result_success;
 }
 DF_API df_result df_load_image(const char *path, int *out_w, int *out_h, df_image **out_image)
 {
    int w, h, c;
    stbi_uc *pixels = stbi_load(path, &w, &h, &c, 4);
    if (!pixels) {
        fprintf(stderr, "ERROR: failed to load %s: %s\n", path, stbi_failure_reason());
        return df_result_io_error;
    }
    df_result res = df_result_success;
    df_image *img = malloc(sizeof(df_image));
    if (!img) {
        res = df_result_out_of_memory;
        goto err;
    }
    DF_ZERO_STRUCT(img);
    img->width = w;
    img->height = h;
    img->pixels = (uint8_t *)pixels;
    *out_image = img;
    *out_w = w;
    *out_h = h;
    goto out;
 err:
    free(img);
 out:
    return res;
 }
 DF_API void df_release_image(df_image *img)
 {
    if (img) {
        free(img->pixels);
        free(img);
    }
 }
 DF_API df_result df_write_image(df_image *image, const char *path)
 {
    df_result res = stbi_write_png(path, image->width, image->height, 4, image->pixels, image->width * 4)
                        ? df_result_success
                        : df_result_io_error;
    return res;
 }
 DF_API void df_get_image_size(const df_image *image, int *w, int *h)
 {
    if (w)
        *w = image->width;
    if (h)
        *h = image->height;
 }
 DF_API df_color df_get_image_pixel(const df_image *image, int x, int y)
 {
    df_color c = {0, 0, 0, 255};
    if (x >= 0 && x < image->width && y >= 0 && y < image->height) {
        memcpy(&c.e[0], &image->pixels[4 * (y * image->width + x)], 4);
    }
    return c;
 }
 DF_API void df_set_image_pixel(df_image *image, int x, int y, df_color c)
 {
    if (x >= 0 && x < image->width && y >= 0 && y < image->height) {
        memcpy(&image->pixels[4 * (y * image->width + x)], &c.e[0], 4);
    }
 }
--- a/lib/log.c
+++ b/lib/log.c
@ -1,15 +0,0 @@
 #include <defocus/base.h>
 #include <stdio.h>
 #include <stdarg.h>
 static const char *log_level_names[] = {"VERBOSE", "INFO", "WARN", "ERROR"};
 DF_API void df_log_impl(int level, const char *file, int line, const char *fmt, ...)
 {
    va_list ap;
    va_start(ap, fmt);
    fprintf(stderr, "[%s] %s:%d: ", log_level_names[level], file, line);
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
    va_end(ap);
 }
--- a/lib/math.c
+++ b/lib/math.c
@ -1,326 +0,0 @@
 #include <defocus/base.h>
 #include <defocus/intrinsic_helper.h>
 #include <math.h>
 #include <immintrin.h>
 #include <pmmintrin.h>
 DF_API df_v2 df_add_v2(df_v2 a, df_v2 b)
 {
    df_v2 v = {a.x + b.x, a.y + b.y};
    return v;
 }
 DF_API df_v2 df_sub_v2(df_v2 a, df_v2 b)
 {
    df_v2 v = {a.x - b.x, a.y - b.y};
    return v;
 }
 DF_API float df_dot_v2(df_v2 a, df_v2 b) { return a.x * b.x + a.y * b.y; }
 DF_API df_v2 df_mul_v2(float t, df_v2 v)
 {
    df_v2 r = {t * v.x, t * v.y};
    return r;
 }
 DF_API df_v2 df_normalize_v2(df_v2 v)
 {
    float len_square = df_dot_v2(v, v);
    float len = sqrtf(len_square);
    df_v2 n = {v.x / len, v.y / len};
    return n;
 }
 DF_API df_v3 df_add_v3(df_v3 a, df_v3 b)
 {
    df_v3 v = {a.x + b.x, a.y + b.y, a.z + b.z};
    return v;
 }
 DF_API df_v3 df_sub_v3(df_v3 a, df_v3 b)
 {
    df_v3 v = {a.x - b.x, a.y - b.y, a.z - b.z};
    return v;
 }
 DF_API float df_dot_v3(df_v3 a, df_v3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
 DF_API df_v3 df_mul_v3(float t, df_v3 v)
 {
    df_v3 r = {t * v.x, t * v.y, t * v.z};
    return r;
 }
 DF_API df_v3 df_normalize_v3(df_v3 v)
 {
    float len_square = df_dot_v3(v, v);
    float len = sqrtf(len_square);
    df_v3 n = {v.x / len, v.y / len, v.z / len};
    return n;
 }
 DF_API df_line_plane_intersection df_calc_line_plane_intersection(df_line line, df_plane plane)
 {
    /* check case */
    float dot = df_dot_v3(line.direction, plane.normal);
    df_v3 base_diff = df_sub_v3(plane.base, line.base);
    float dot2 = df_dot_v3(base_diff, plane.normal);
    if (dot != 0) {
        float t = dot2 / dot;
        df_v3 point = df_add_v3(line.base, df_mul_v3(t, line.direction));
        df_line_plane_intersection intersection = {
            .type = df_line_plane_intersection_vector,
            .vec = point,
        };
        return intersection;
    } else {
        if (dot2 == 0) {
            df_line_plane_intersection intersection = {
                .type = df_line_plane_intersection_contained,
            };
            return intersection;
        } else {
            df_line_plane_intersection intersection = {
                .type = df_line_plane_intersection_none,
            };
            return intersection;
        }
    }
 }
 DF_API df_m4 df_mul_m4(df_m4 a, df_m4 b)
 {
    /* Super simple, we could probably do it a lot better via SIMD. */
    df_m4 p;
    for (int row = 0; row < 4; ++row) {
        for (int col = 0; col < 4; ++col) {
            DF_M4_AT(p, row, col) = 0.f;
            for (int i = 0; i < 4; ++i) {
                DF_M4_AT(p, row, col) += DF_M4_AT(a, row, i) * DF_M4_AT(b, i, col);
            }
        }
    }
    return p;
 }
 DF_API df_m4 df_scale(float x, float y, float z)
 {
    /* clang-format off */
    df_m4 s = {{
          x, 0.f, 0.f, 0.f,
        0.f,   y, 0.f, 0.f,
        0.f, 0.f,   z, 0.f,
        0.f, 0.f, 0.f, 1.f,
    }};
    /* clang-format on */
    return s;
 }
 DF_API df_m4 df_translate(float x, float y, float z)
 {
    /* clang-format off */
    df_m4 t = {{
        1.f, 0.f, 0.f, x,
        0.f, 1.f, 0.f, y,
        0.f, 0.f, 1.f, z,
        0.f, 0.f, 0.f, 1.f,
    }};
    /* clang-format on */
    return t;
 }
 /** SSE3 version of a horizontal sum:
 * computes the sum of 4 32bit floats inside v
 */
 static float hsum(__m128 v)
 {
    /* v = [v0, v1, v2, v3] */
    /* shuf = [v1, v1, v3, v3] */
    __m128 shuf = _mm_movehdup_ps(v);
    /* sum = [v0 + v1, 2*v1, v2 + v3, 2*v3] */
    __m128 sum = _mm_add_ps(v, shuf);
    /* shuf = [v2 + v3, 2*v3, v3, v3] */
    shuf = _mm_movehl_ps(shuf, sum);
    /* sum = [v0 + v1 + v2 + v3, ...] */
    sum = _mm_add_ss(sum, shuf);
    return _mm_cvtss_f32(sum);
 }
 DF_API df_v3 df_transform_v3(df_m4 T, df_v3 v)
 {
    df_v3 transf;
    _Alignas(16) float tmp_v[4] = {v.x, v.y, v.z, 1.f};
    __m128 homov = _mm_load_ps(tmp_v);
    __m128 row0 = _mm_load_ps(&T.e[0]);
    __m128 row1 = _mm_load_ps(&T.e[4]);
    __m128 row2 = _mm_load_ps(&T.e[8]);
    __m128 prod = _mm_mul_ps(row0, homov);
    transf.x = hsum(prod);
    prod = _mm_mul_ps(row1, homov);
    transf.y = hsum(prod);
    prod = _mm_mul_ps(row2, homov);
    transf.z = hsum(prod);
    return transf;
 }
 /* Fast 4x4 matrix inverse via SIMD adapted from:
 * https://lxjk.github.io/2017/09/03/Fast-4x4-Matrix-Inverse-with-SSE-SIMD-Explained.html
 */
 /* Only works if M is a transform matrix without scale:
 *
 * | R T |
 * | 0 1 |
 */
 DF_API df_m4 df_inverse_transform_no_scale(df_m4 M)
 {
    df_m4 I = {0.f};
    /* transpose 3x3 */
    __m128 t0 = DF_VEC_SHUFFLE_0101(M.vec[0], M.vec[1]); /* 00, 01, 10, 11 */
    __m128 t1 = DF_VEC_SHUFFLE_2323(M.vec[0], M.vec[1]); /* 02, 03, 12, 13 */
    I.vec[0] = DF_VEC_SHUFFLE(t0, M.vec[2], 0, 2, 0, 3); /* 00, 01, 20, 23(=0) */
    I.vec[1] = DF_VEC_SHUFFLE(t0, M.vec[2], 1, 3, 1, 3); /* 01, 11, 21, 23(=0) */
    I.vec[2] = DF_VEC_SHUFFLE(t1, M.vec[2], 0, 2, 2, 3); /* 02, 12, 22, 23(=0) */
    __m128 t = _mm_setr_ps(DF_M4_AT(M, 0, 3), DF_M4_AT(M, 1, 3), DF_M4_AT(M, 2, 3), 0.f);
    DF_M4_AT(I, 0, 3) = -1.f * hsum(_mm_mul_ps(I.vec[0], t));
    DF_M4_AT(I, 1, 3) = -1.f * hsum(_mm_mul_ps(I.vec[1], t));
    DF_M4_AT(I, 2, 3) = -1.f * hsum(_mm_mul_ps(I.vec[2], t));
    I.vec[3] = _mm_setr_ps(0.f, 0.f, 0.f, 1.f);
    return I;
 }
 DF_API df_m4 df_inverse_transform(df_m4 M)
 {
    df_m4 I = {0.f};
    /* transpose 3x3 */
    __m128 t0 = DF_VEC_SHUFFLE_0101(M.vec[0], M.vec[1]); /* 00, 01, 10, 11 */
    __m128 t1 = DF_VEC_SHUFFLE_2323(M.vec[0], M.vec[1]); /* 02, 03, 12, 13 */
    I.vec[0] = DF_VEC_SHUFFLE(t0, M.vec[2], 0, 2, 0, 3); /* 00, 01, 20, 23(=0) */
    I.vec[1] = DF_VEC_SHUFFLE(t0, M.vec[2], 1, 3, 1, 3); /* 01, 11, 21, 23(=0) */
    I.vec[2] = DF_VEC_SHUFFLE(t1, M.vec[2], 0, 2, 2, 3); /* 02, 12, 22, 23(=0) */
    __m128 t = _mm_setr_ps(DF_M4_AT(M, 0, 3), DF_M4_AT(M, 1, 3), DF_M4_AT(M, 2, 3), 0.f);
    DF_M4_AT(I, 0, 3) = -1.f * hsum(_mm_mul_ps(I.vec[0], t));
    DF_M4_AT(I, 1, 3) = -1.f * hsum(_mm_mul_ps(I.vec[1], t));
    DF_M4_AT(I, 2, 3) = -1.f * hsum(_mm_mul_ps(I.vec[2], t));
    I.vec[3] = _mm_setr_ps(0.f, 0.f, 0.f, 1.f);
    return I;
 }
 // for row major matrix
 // we use __m128 to represent 2x2 matrix as A = | A0  A1 |
 //                                              | A2  A3 |
 // 2x2 row major Matrix multiply A*B
 static __forceinline __m128 mat2_mul(__m128 vec1, __m128 vec2)
 {
    return _mm_add_ps(_mm_mul_ps(vec1, DF_VEC_SWIZZLE(vec2, 0, 3, 0, 3)),
                      _mm_mul_ps(DF_VEC_SWIZZLE(vec1, 1, 0, 3, 2), DF_VEC_SWIZZLE(vec2, 2, 1, 2, 1)));
 }
 // 2x2 row major Matrix adjugate multiply (A#)*B
 static __forceinline __m128 mat2_adj_mul(__m128 vec1, __m128 vec2)
 {
    return _mm_sub_ps(_mm_mul_ps(DF_VEC_SWIZZLE(vec1, 3, 3, 0, 0), vec2),
                      _mm_mul_ps(DF_VEC_SWIZZLE(vec1, 1, 1, 2, 2), DF_VEC_SWIZZLE(vec2, 2, 3, 0, 1)));
 }
 // 2x2 row major Matrix multiply adjugate A*(B#)
 static __forceinline __m128 mat2_mul_adj(__m128 vec1, __m128 vec2)
 {
    return _mm_sub_ps(_mm_mul_ps(vec1, DF_VEC_SWIZZLE(vec2, 3, 0, 3, 0)),
                      _mm_mul_ps(DF_VEC_SWIZZLE(vec1, 1, 0, 3, 2), DF_VEC_SWIZZLE(vec2, 2, 1, 2, 1)));
 }
 DF_API df_m4 df_inverse( df_m4 M ) {
    // use block matrix method
    // A is a matrix, then i(A) or iA means inverse of A, A# (or A_ in code) means adjugate of A, |A| (or detA in code)
    // is determinant, tr(A) is trace
    // sub matrices
    __m128 A = DF_VEC_SHUFFLE_0101(M.vec[0], M.vec[1]);
    __m128 B = DF_VEC_SHUFFLE_2323(M.vec[0], M.vec[1]);
    __m128 C = DF_VEC_SHUFFLE_0101(M.vec[2], M.vec[3]);
    __m128 D = DF_VEC_SHUFFLE_2323(M.vec[2], M.vec[3]);
 #if 0
 	__m128 detA = _mm_set1_ps(M.m[0][0] * M.m[1][1] - M.m[0][1] * M.m[1][0]);
 	__m128 detB = _mm_set1_ps(M.m[0][2] * M.m[1][3] - M.m[0][3] * M.m[1][2]);
 	__m128 detC = _mm_set1_ps(M.m[2][0] * M.m[3][1] - M.m[2][1] * M.m[3][0]);
 	__m128 detD = _mm_set1_ps(M.m[2][2] * M.m[3][3] - M.m[2][3] * M.m[3][2]);
 #else
    // determinant as (|A| |B| |C| |D|)
    __m128 detSub = _mm_sub_ps(
        _mm_mul_ps(DF_VEC_SHUFFLE(M.vec[0], M.vec[2], 0, 2, 0, 2), DF_VEC_SHUFFLE(M.vec[1], M.vec[3], 1, 3, 1, 3)),
        _mm_mul_ps(DF_VEC_SHUFFLE(M.vec[0], M.vec[2], 1, 3, 1, 3), DF_VEC_SHUFFLE(M.vec[1], M.vec[3], 0, 2, 0, 2)));
    __m128 detA = DF_VEC_SWIZZLE1(detSub, 0);
    __m128 detB = DF_VEC_SWIZZLE1(detSub, 1);
    __m128 detC = DF_VEC_SWIZZLE1(detSub, 2);
    __m128 detD = DF_VEC_SWIZZLE1(detSub, 3);
 #endif
    // let iM = 1/|M| * | X  Y |
    //                  | Z  W |
    // D#C
    __m128 D_C = mat2_adj_mul(D, C);
    // A#B
    __m128 A_B = mat2_adj_mul(A, B);
    // X# = |D|A - B(D#C)
    __m128 X_ = _mm_sub_ps(_mm_mul_ps(detD, A), mat2_mul(B, D_C));
    // W# = |A|D - C(A#B)
    __m128 W_ = _mm_sub_ps(_mm_mul_ps(detA, D), mat2_mul(C, A_B));
    // |M| = |A|*|D| + ... (continue later)
    __m128 detM = _mm_mul_ps(detA, detD);
    // Y# = |B|C - D(A#B)#
    __m128 Y_ = _mm_sub_ps(_mm_mul_ps(detB, C), mat2_mul_adj(D, A_B));
    // Z# = |C|B - A(D#C)#
    __m128 Z_ = _mm_sub_ps(_mm_mul_ps(detC, B), mat2_mul_adj(A, D_C));
    // |M| = |A|*|D| + |B|*|C| ... (continue later)
    detM = _mm_add_ps(detM, _mm_mul_ps(detB, detC));
    // tr((A#B)(D#C))
    __m128 tr = _mm_mul_ps(A_B, DF_VEC_SWIZZLE(D_C, 0, 2, 1, 3));
    tr = _mm_hadd_ps(tr, tr);
    tr = _mm_hadd_ps(tr, tr);
    // |M| = |A|*|D| + |B|*|C| - tr((A#B)(D#C)
    detM = _mm_sub_ps(detM, tr);
    const __m128 adjSignMask = _mm_setr_ps(1.f, -1.f, -1.f, 1.f);
    // (1/|M|, -1/|M|, -1/|M|, 1/|M|)
    __m128 rDetM = _mm_div_ps(adjSignMask, detM);
    X_ = _mm_mul_ps(X_, rDetM);
    Y_ = _mm_mul_ps(Y_, rDetM);
    Z_ = _mm_mul_ps(Z_, rDetM);
    W_ = _mm_mul_ps(W_, rDetM);
    df_m4 r;
    // apply adjugate and store, here we combine adjugate shuffle and store shuffle
    r.vec[0] = DF_VEC_SHUFFLE(X_, Y_, 3, 1, 3, 1);
    r.vec[1] = DF_VEC_SHUFFLE(X_, Y_, 2, 0, 2, 0);
    r.vec[2] = DF_VEC_SHUFFLE(Z_, W_, 3, 1, 3, 1);
    r.vec[3] = DF_VEC_SHUFFLE(Z_, W_, 2, 0, 2, 0);
    return r;
 }
--- a/lib/pinhole.c
+++ b/lib/pinhole.c
@ -1,76 +0,0 @@
 #include <defocus/models.h>
 #include <defocus/base.h>
 #include <defocus/image.h>
 #include <math.h>
 void df_pinhole(df_pinhole_params params, const df_image *in_image, df_image *out_image)
 {
    /* orig_z is the z-coordinate of the original image plane (=> in_image is located there)
     * new_z  is the z-coordinate of the output image plane (=> out_image is located there)
     *
     * We can map from image coordinates to world coordinates, because we know the z coordinate of
     * the input image plane and the focal length of the camera.
     *
     * Let x,y,z be the world coordinates and u, v the image coordintes.
     * The pinhole camera model gives us:
     * u = f/z * x;  v = f/z * y
     * => x = u * z / f
     * => y = v * z / f
     */
    int w, h;
    df_get_image_size(in_image, &w, &h);
    int out_w, out_h;
    df_get_image_size(out_image, &out_w, &out_h);
    double in_z_over_f = params.orig_z / params.focal_length;
    double out_f_over_z = params.focal_length / params.new_z;
    int prev_out_iy = -1;
    for (int iy = 0; iy < h; ++iy) {
        double v = (double)iy / (double)h;
        double y = in_z_over_f * v;
        double out_v = out_f_over_z * y;
        int out_iy = (int)floor(out_v * (double)out_h);
        int prev_out_ix = -1;
        for (int ix = 0; ix < w; ++ix) {
            double u = (double)ix / (double)w;
            double x = in_z_over_f * u;
            double out_u = out_f_over_z * x;
            int out_ix = (int)floor(out_u * (double)out_w);
            df_color px = df_get_image_pixel(in_image, ix, iy);
            df_set_image_pixel(out_image, out_ix, out_iy, px);
            /* Go back and interpolate between this pixel and the previous one. */
            if ((out_ix - prev_out_ix) > 1) {
                df_color prev_px = df_get_image_pixel(out_image, prev_out_ix, out_iy);
                for (int d = prev_out_ix + 1; d < out_ix; ++d) {
                    double t = (double)(d - prev_out_ix) / (double)(out_ix - prev_out_ix);
                    df_color color = df_lerp_color(prev_px, px, t);
                    df_set_image_pixel(out_image, d, out_iy, color);
                }
            }
            prev_out_ix = out_ix;
        }
        /* Go back and interpolate between this row and the previous one */
        if ((out_iy - prev_out_iy) > 1) {
            for (int y = prev_out_iy + 1; y < out_iy; ++y) {
                double t = (double)(y - prev_out_iy) / (double)(out_iy - prev_out_iy);
                for (int x = 0; x < out_w; ++x) {
                    df_color a = df_get_image_pixel(out_image, x, prev_out_iy);
                    df_color b = df_get_image_pixel(out_image, x, out_iy);
                    df_color color = df_lerp_color(a, b, t);
                    df_set_image_pixel(out_image, x, y, color);
                }
            }
        }
        prev_out_iy = out_iy;
    }
 }
--- a/lib/raytracer.c
+++ b/lib/raytracer.c
@ -0,0 +1,347 @@
 #include <defocus/defocus.h>
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #define STB_IMAGE_IMPLEMENTATION
 #include "stb_image.h"
 /* *********** *
 *             *
 *    Vec 3    *
 *             *
 * *********** */
 /* We can later use this to store 4 vecs for simd */
 typedef struct
 {
    float x;
    float y;
    float z;
 } df_v3;
 static df_v3 normalize(df_v3 v)
 {
    float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
    return (df_v3){v.x / l, v.y / l, v.z / l};
 }
 static float dot(df_v3 a, df_v3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
 /* ****************** *
 *                    *
 *    List of hits    *
 *                    *
 * ****************** */
 typedef struct
 {
    df_v3 at;
    df_v3 normal;
    float ray_t;
    int front_face; /* 1 if we hit the front face */
 } df_hit_record;
 static void set_face_normal(df_hit_record *record, df_v3 ray_dir, df_v3 outward_normal)
 {
    record->front_face = dot(ray_dir, outward_normal) < 0;
    record->normal =
        record->front_face ? outward_normal : (df_v3){-outward_normal.x, -outward_normal.y, -outward_normal.z};
 }
 typedef struct
 {
    df_hit_record *hits;
    size_t count;
    size_t capacity;
 } df_hit_list;
 static void emit_hit(df_hit_list *list, df_hit_record record)
 {
    if (list->count == list->capacity) {
        size_t cap2 = (list->capacity > 0) ? 2 * list->capacity : 128;
        df_hit_record *tmp = realloc(list->hits, sizeof(df_hit_record) * cap2);
        if (!tmp)
            return;
        list->hits = tmp;
        list->capacity = cap2;
    }
    list->hits[list->count++] = record;
 }
 static df_hit_list merge_hit_lists(const df_hit_list **lists, unsigned int count)
 {
    size_t total_size = 0;
    for (unsigned int i = 0; i < count; ++i)
        total_size += lists[i]->count;
    df_hit_list out;
    out.count = total_size;
    out.hits = malloc(sizeof(df_hit_record) * total_size);
    if (!out.hits)
        return out;
    df_hit_record *dst = out.hits;
    for (unsigned int i = 0; i < count; ++i) {
        memcpy(dst, lists[i]->hits, sizeof(df_hit_record) * lists[i]->count);
        dst += lists[i]->count;
    }
    return out;
 }
 /* ********************* *
 *                       *
 *    Image functions    *
 *                       *
 * ********************* */
 #define MAX_IMAGES 1024
 struct
 {
    struct
    {
        stbi_uc *pixels;
        int w;
        int h;
    } images[MAX_IMAGES];
    unsigned int image_count;
 } _image_table;
 DF_API df_image_handle df_load_image(const char *path, int *w, int *h)
 {
    if (_image_table.image_count == MAX_IMAGES)
        return 0;
    int width, height, c;
    stbi_uc *pixels = stbi_load(path, &width, &height, &c, 4);
    if (!pixels) {
        return 0;
    }
    df_image_handle handle = (_image_table.image_count + 1);
    _image_table.image_count++;
    _image_table.images[handle].pixels = pixels;
    _image_table.images[handle].w = width;
    _image_table.images[handle].h = height;
    if (w) *w = width;
    if (h) *h = height;
    return handle;
 }
 /* ********************************* *
 *                                   *
 *    Intersection test functions    *
 *
 * ********************************* */
 typedef struct
 {
    float ray_t;
    float img_u;
    float img_v;
    df_image_handle image;
 } df_hit; 
 /* -1 => does not hit, >= 0 => hit (sphere index) */
 static float sphere_test(float ray_origin_x,
                         float ray_origin_y,
                         float ray_origin_z,
                         float ray_dx,
                         float ray_dy,
                         float ray_dz,
                         const df_sphere *spheres,
                         unsigned int sphere_count)
 {
    float result = -1.f;
    for (unsigned int i = 0; i < sphere_count; ++i) {
        float delta_x = ray_origin_x - spheres[i].center_x;
        float delta_y = ray_origin_y - spheres[i].center_y;
        float delta_z = ray_origin_z - spheres[i].center_z;
        float a = ray_dx * ray_dx + ray_dy * ray_dy + ray_dz * ray_dz;
        float b = (delta_x * ray_dx + delta_y * ray_dy + delta_z * ray_dz);
        float c = (delta_x * delta_x + delta_y * delta_y + delta_z * delta_z) - (spheres[i].radius * spheres[i].radius);
        float discriminant = b * b - a * c;
        /* we can get the hit location t as: (-b - sqrtf(disciminant)) / (2.f * a) */
        if (discriminant > 0) {
            float t = (-b - sqrtf(discriminant)) / (2.f * a);
 #if 0
            df_hit_record hit;
            hit.at.x = ray_origin_x + t * ray_dx;
            hit.at.y = ray_origin_y + t * ray_dy;
            hit.at.z = ray_origin_z + t * ray_dz;
            hit.ray_t = t;
            df_v3 outward_normal = {
                hit.at.x - spheres[i].center_x,
                hit.at.y - spheres[i].center_y,
                hit.at.z - spheres[i].center_z
            };
            set_face_normal(&hit, (df_v3){ray_dx, ray_dy, ray_dz}, outward_normal);
 #endif
            if (t > result)
                result = t;
        }
    }
    return result;
 }
 static df_hit plane_test(float ray_origin_x,
                        float ray_origin_y,
                        float ray_origin_z,
                        float ray_dx,
                        float ray_dy,
                        float ray_dz,
                        const df_plane *planes,
                        unsigned int plane_count)
 {
    df_hit result = {
        .ray_t = -1.f,
    };
    for (unsigned int i = 0; i < plane_count; ++i) {
        float dot = (ray_dx * planes[i].normal_x + ray_dy * planes[i].normal_y + ray_dz * planes[i].normal_z);
        if (dot > DF_EPSF32 || dot < -DF_EPSF32) {
            float delta_x = planes[i].base_x - ray_origin_x;
            float delta_y = planes[i].base_y - ray_origin_y;
            float delta_z = planes[i].base_z - ray_origin_z;
            float num = delta_x * planes[i].normal_x + delta_y * planes[i].normal_y + delta_z * planes[i].normal_z;
            float t = num / dot;
            if (t > result.ray_t) {
                result.ray_t = t;
                /* Project point on plane to image coordinate system */
                float px = ray_origin_x + t * ray_dx;
                float py = ray_origin_y + t * ray_dy;
                float pz = ray_origin_z + t * ray_dz;
                float img_p3_x = px - planes[i].img_p0_x;
                float img_p3_y = py - planes[i].img_p0_y;
                float img_p3_z = pz - planes[i].img_p0_z;
                /* FIXME(Kevin): We would need to take plane rotation into account.
                 * Alternatively, just pass w & h into the plane and let the user
                 * (i.e. higher level code) worry about that */
                float w = planes[i].img_p1_x - planes[i].img_p0_x;
                float h = planes[i].img_p1_y - planes[i].img_p0_y;
                result.img_u =
                    img_p3_x * planes[i].img_ax0_x + img_p3_y * planes[i].img_ax0_y + img_p3_z * planes[i].img_ax0_z;
                result.img_v =
                    img_p3_x * planes[i].img_ax1_x + img_p3_y * planes[i].img_ax1_y + img_p3_z * planes[i].img_ax1_z;
                result.img_u /= w;
                result.img_v /= h;
                result.image = planes[i].image;
            }
        }
        /* Line is parallel to the plane, either contained or not. Do we care? */
    }
    return result;
 }
 DF_API int df_trace_rays(df_trace_rays_settings settings,
                         const df_sphere *spheres,
                         unsigned int sphere_count,
                         const df_plane *planes,
                         unsigned int plane_count,
                         uint8_t **result)
 {
    int image_width = settings.image_width;
    int image_height = settings.image_height;
    float aspect_ratio = (float)image_width / (float)image_height;
    float viewport_height = 2.f;
    float viewport_width = aspect_ratio * viewport_height;
    float focal_length = settings.focal_length;
    /* Simple perspective projection.
     * The lens is placed at (0, 0, 0)
     */
    float lower_left_x = -viewport_width / 2.f;
    float lower_left_y = -viewport_height / 2.f;
    float lower_left_z = -focal_length;
    uint8_t *pixels = malloc(image_width * image_height * 3);
    if (!pixels)
        return 0;
    float max_img_u = 0.f;
    float max_img_v = 0.f;
    for (int y = 0; y < image_height; ++y) {
        /* TODO(Kevin): SIMD */
        uint8_t *row = pixels + y * image_width * 3;
        for (int x = 0; x < image_width; ++x) {
            float u = (float)x / (float)(image_width - 1);
            float v = (float)y / (float)(image_height - 1);
            /* Target = Delta because origin is (0, 0, 0) */
            float target_x = lower_left_x + u * viewport_width;
            float target_y = lower_left_y + v * viewport_height;
            float target_z = lower_left_z;
            /* Raycast against all spheres */
            float sphere_hit_t = sphere_test(0, 0, 0, target_x, target_y, target_z, spheres, sphere_count);
            df_hit plane_hit = plane_test(0, 0, 0, target_x, target_y, target_z, planes, plane_count);
            #if 0
            float hits[] = {sphere_hit_t, plane_hit.ray_t};
            float hit_t = df_max_f32(hits, DF_ARRAY_COUNT(hits));
            if (hit_t >= 0) {
                df_v3 hit_p = {target_x * hit_t, target_y * hit_t, target_z * hit_t};
                hit_p.z -= -1.f;
                df_v3 normal = normalize(hit_p);
                row[x * 3 + 0] = (uint8_t)((.5f * (normal.x + 1.f)) * 255);
                row[x * 3 + 1] = (uint8_t)((.5f * (normal.y + 1.f)) * 255);
                row[x * 3 + 2] = (uint8_t)((.5f * (normal.z + 1.f)) * 255);
            }
            #endif
            if (plane_hit.ray_t >= 0) {
                float img_u = plane_hit.img_u;
                float img_v = plane_hit.img_v;
                // Temporary, handle arbitrary scale (esp. resulting from different orientation)
                float view_aspect = viewport_width / viewport_height;
                if (img_u >= 0 && img_v >= 0 && img_u <= 1.f && img_v <= 1.f) {
                    int pixelx = (int)floorf(img_u * (_image_table.images[plane_hit.image].w - 1));
                    int pixely = (int)floorf(img_v * (_image_table.images[plane_hit.image].h - 1));
                    stbi_uc *pixel = _image_table.images[plane_hit.image].pixels +
                                     4 * (pixely * _image_table.images[plane_hit.image].w + pixelx);
                    row[x * 3 + 0] = pixel[0];
                    row[x * 3 + 1] = pixel[1];
                    row[x * 3 + 2] = pixel[2];
                }
            }
            else {
                /* Gradient background color */
                float len = sqrtf(target_x * target_x + target_y * target_y + target_z * target_z);
                float t = .5f * (target_y / len + 1.f);
                float r = (1.f - t) + t * 0.5f;
                float g = (1.f - t) + t * 0.7f;
                float b = (1.f - t) + t * 1.0f;
                row[x * 3 + 0] = (uint8_t)(r * 255);
                row[x * 3 + 1] = (uint8_t)(g * 255);
                row[x * 3 + 2] = (uint8_t)(b * 255);
            }
        }
    }
    *result = pixels;
    return 1;
 }
--- a/lib/raytracing.c
+++ b/lib/raytracing.c
@ -1,51 +0,0 @@
 #include <defocus/camera.h>
 #include <immintrin.h>
 DF_API void df_evaluate_ray_packet(const df_ray_packet *rays) {
    const __m128 *base_x = (const __m128 *)rays->base_x; 
    const __m128 *base_y = (const __m128 *)rays->base_y;
    const __m128 *base_z = (const __m128 *)rays->base_z;
    const __m128 *dir_x = (const __m128 *)rays->dir_x;
    const __m128 *dir_y = (const __m128 *)rays->dir_y;
    const __m128 *dir_z = (const __m128 *)rays->dir_z;
    /* Simple test: Let rays intersect with plane at z = 350.0 */
    float PLANE_Z = 350.0f;
    __m128 plane_z = _mm_set1_ps(PLANE_Z);
    size_t ray_count = rays->ray_count;
    /* TODO(kevin): divide to multiple threads */
    for (size_t i = 0; i < ray_count; i += 4) {
        __m128 rays_base_x = base_x[i];
        __m128 rays_base_y = base_y[i];
        __m128 rays_base_z = base_z[i];
        __m128 rays_dir_x = dir_x[i];
        __m128 rays_dir_y = dir_y[i];
        __m128 rays_dir_z = dir_z[i];
        /* Solve for t: base.z + t * dir.z = plane_z 
         *              t = (plane_z - base.z) / dir.z 
         */
        __m128 delta = _mm_sub_ps(plane_z, rays_base_z);
        __m128 t = _mm_div_ps(delta, rays_dir_z);
        /* Sample p = base.z + t * dir */
        __m128 sample_p_x = _mm_mul_ps(t, rays_dir_x);
        __m128 sample_p_y = _mm_mul_ps(t, rays_dir_y);
        __m128 sample_p_z = _mm_mul_ps(t, rays_dir_z);
        sample_p_x = _mm_add_ps(sample_p_x, rays_base_x);
        sample_p_y = _mm_add_ps(sample_p_y, rays_base_y);
        sample_p_z = _mm_add_ps(sample_p_z, rays_base_z);
    }
    /* Handle remaining (< 4) rays */
    if ((ray_count % 4) != 0) {
    }
 }
--- a/lib/thin_lense.c
+++ b/lib/thin_lense.c
@ -1,129 +0,0 @@
 #include <defocus/base.h>
 #include <defocus/models.h>
 #include <defocus/image.h>
 #include <math.h>
 #include <stdlib.h>
 /* The thin lense model, implemented as a quasi-raytracer. */
 /** Map uv coordinates in [0, 1) to the unit circle centered at (0, 0). */
 static df_v2 concentric_map_disk(df_v2 in_p)
 {
    df_v2 offset = df_sub_v2(df_mul_v2(2.f, in_p), (df_v2){{1.f, 1.f}});
    if (offset.u == 0.f && offset.v == 0.f)
        return offset;
    float theta, r;
    if (fabsf(offset.u) > fabsf(offset.v)) {
        r = offset.u;
        theta = DF_PI_OVER_4 * (offset.v / offset.u);
    } else {
        r = offset.v;
        theta = DF_PI_OVER_2 - DF_PI_OVER_4 * (offset.u / offset.v);
    }
    return df_mul_v2(r, (df_v2){{cosf(theta), sinf(theta)}});
 }
 void df_thin_lense(df_thin_lense_params params, const df_image *in_image, df_image *out_image)
 {
    /* We do the following:
     * - Orthographic projection (i.e. don't do anything really except flip the y axis)
     * - For each pixel:
     *   - Transform from raster space into camera space
     *   - Generate ray from pixel to plane of focus through lense center
     *   - For each sample:
     *     - Choose random point on lense
     *     - Trace ray from lense point through the above intersection point to the image.
     *   - Sum all samples
     */
    int sample_count = (params.sample_count > 0) ? params.sample_count : 64;
    int w, h;
    df_get_image_size(out_image, &w, &h);
    /* TODO(Kevin): It would be pretty trivial to
     * a) parallelize this, and
     * b) use SIMD for multiple rays
     */
    for (int y = 0; y < h; ++y) {
        float cam_y = (float)(y - h) / 2.f;
        for (int x = 0; x < w; ++x) {
            float cam_x = (float)(x - w) / 2.f;
            df_v3 ray_dir = {{cam_x, cam_y, 1.f}};
            ray_dir = df_normalize_v3(ray_dir);
            ray_dir.z = 1.f;
            /* Calculate the intersection with the plane of focus */
            df_v3 focus_p = df_mul_v3(params.focal_distance, ray_dir);
            uint32_t color[4] = {0, 0, 0, 0};
            for (int sample_idx = 0; sample_idx < sample_count; ++sample_idx) {
                df_v2 lens_uv = {(float)(rand() % 1024) / 1024.f, (float)(rand() % 1024) / 1024.f};
                df_v2 lens_p = df_mul_v2(params.aperture, concentric_map_disk(lens_uv));
                ray_dir.x = focus_p.x - lens_p.x;
                ray_dir.y = focus_p.y - lens_p.y;
                ray_dir.z = focus_p.z;
                ray_dir = df_normalize_v3(ray_dir);
                df_v3 sample_p = df_mul_v3(params.focal_distance, ray_dir);
                int img_x = (int)sample_p.x + w / 2;
                int img_y = (int)sample_p.y + h / 2;
                df_color sample_color = df_get_image_pixel(in_image, img_x, img_y);
                for (int i = 0; i < 4; ++i)
                    color[i] += (uint32_t)sample_color.e[i];
            }
            df_color pixel_color = {
                {color[0] / sample_count, color[1] / sample_count, color[2] / sample_count, color[3] / sample_count}};
            df_set_image_pixel(out_image, x, y, pixel_color);
        }
    }
 #if 0
    /* The guassian lens equation 1/z' - 1/z = 1/f
     * with z' := distance between film and lens,
     *      z  := focal distance, i.e. the distance between lens and the plane of focus
     *      f  := focal length of the lens
     *
     * gives us: z' = fz / (f + z) */
    float film_dist = (params.focal_length * params.focal_distance) / (params.focal_length + params.focal_distance);
    int sample_count = (params.sample_count > 0) ? params.sample_count : 64;
    int w, h;
    df_get_image_size(out_image, &w, &h);
    for (int y = 0; y < h; ++y) {
        float v = (float)y / (float)h;
        for (int x = 0; x < w; ++x) {
            float u = (float)x / (float)w;
            df_v2 img_p = {u, v};
            float color[4] = {0, 0, 0, 0};
            df_v3 focus_ray = {};
            focus_ray.z = 1.f;
            focus_ray.x = 0.f;
            focus_ray.y = 0.f;
            for (int sample_idx = 0; sample_idx < sample_count; ++sample_idx) {
                /* Generate a random sample on the lense */
                df_v2 sample_p = {(float)(rand() % 1024) / 1024.f, (float)(rand() % 1024) / 1024.f};
                df_v2 lens_p = df_mul_v2(params.aperture, concentric_map_disk(sample_p));
                /* Compute the intersection point on the plane of focus. */
                /* TODO: Use a ray from img_p to the center of the lens,
                 * trace to focal_distance.
                 */
            }
        }
    }
 #endif
 }
--- a/lib/utils.c
+++ b/lib/utils.c
@ -0,0 +1,10 @@
 #include <defocus/defocus.h>
 #include <math.h>
 DF_API float df_max_f32(const float *list, unsigned int count)
 {
    float max = -INFINITY;
    for (unsigned int i = 0; i < count; ++i)
        max = (max < list[i]) ? list[i] : max;
    return max;
 }
--- a/meson.build
+++ b/meson.build
@ -9,28 +9,20 @@ if cc.get_id() == 'gcc' or cc.get_id() == 'clang'
  add_project_arguments([ '-msse3', '-msse4.1', '-Wno-missing-braces' ], language: 'c')
 endif
 # Main library
 lib = library('df',
-              'lib/log.c',
+              'lib/raytracer.c',
-              'lib/math.c',
+              'lib/utils.c',
-              'lib/camera.c',
+	          'include/defocus/defocus.h',
-              'lib/pinhole.c',
+              '3p/stb_image.h',
              'lib/image.c',
              'lib/color.c',
              'lib/thin_lense.c',
              'lib/raytracing.c',
              'include/defocus/base.h',
              'include/defocus/camera.h',
              'include/defocus/defocus.h',
              'include/defocus/image.h',
              'include/defocus/intrinsic_helper.h',
              'include/defocus/models.h',
              'include/defocus/scene.h',
              'include/defocus/raytracing.h',
              include_directories: incdir,
              dependencies: m_dep)
 # Command Line Executable
-executable('defocus', 'bin/defocus.c', include_directories: incdir, link_with: lib)
+executable('defocus',
           'bin/defocus.c',
           '3p/stb_image_write.h',
           include_directories: incdir, link_with: lib)
 # Test driver
 #munit_dep = dependency('munit', fallback: ['munit', 'munit_dep'])
--- a/subprojects/.gitignore
+++ b/subprojects/.gitignore
@ -0,0 +1 @@
 munit
--- a/test_image.png
+++ b/test_image.png
Author	SHA1	Message	Date
Kevin Trogant	4b26cec390	actually trace the dumb test image does not take plane rotation into account!	2023-06-19 17:27:23 +02:00
Kevin Trogant	6b8ff8926e	dump state	2023-05-23 13:20:59 +02:00