diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..0e923f7
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*.png filter=lfs diff=lfs merge=lfs -text
+*.JPG filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
index 1d28e52..1299a31 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 *.o
 builddir
 .cache
+env
diff --git a/P1000914.JPG b/P1000914.JPG
new file mode 100644
index 0000000..5695ca0
--- /dev/null
+++ b/P1000914.JPG
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0fc646b4286cee2f9a9cb4fb5d89e15db7ed161135ca3a6130c74685616e5e13
+size 5181440
diff --git a/bin/defocus.c b/bin/defocus.c
index 9bebb00..23cbe67 100644
--- a/bin/defocus.c
+++ b/bin/defocus.c
@@ -25,6 +25,7 @@ void usage(const char *pname)
 
 int main(int argc, char **argv)
 {
+
     if (argc < 2) {
         fprintf(stderr, "Missing model name!\n");
         usage(argv[0]);
@@ -109,7 +110,7 @@ int pinhole_fn(int argc, char **argv)
         return 1;
     }
 
-    df_pinhole(in_image, focal, in_z, out_z, out_image);
+    df_pinhole((df_pinhole_params){.orig_z = in_z, .new_z = out_z, .focal_length = focal}, in_image, out_image);
 
     int error_code = 0;
     res = df_write_image(out_image, out_path);
@@ -126,6 +127,95 @@ int pinhole_fn(int argc, char **argv)
 
 int thin_lense_fn(int argc, char **argv)
 {
-    fprintf(stderr, "Not implemented yet");
-    return 1;
+    const char *in_path = NULL;
+    const char *out_path = "out.png";
+    const char *focal_str = NULL;
+    const char *aperture_str = NULL;
+    const char *in_z_str = NULL;
+    const char *out_z_str = NULL;
+    const char *samples_str = "64";
+
+    for (int i = 0; i < argc; ++i) {
+        if ((strcmp(argv[i], "-i") == 0 || strcmp(argv[i], "--input") == 0) && i < argc - 1) {
+            ++i;
+            in_path = argv[i];
+        } else if ((strcmp(argv[i], "-o") == 0 || strcmp(argv[i], "--output") == 0) && i < argc - 1) {
+            ++i;
+            out_path = argv[i];
+        } else if ((strcmp(argv[i], "-f") == 0 || strcmp(argv[i], "--focal-length") == 0) && i < argc - 1) {
+            ++i;
+            focal_str = argv[i];
+        } else if ((strcmp(argv[i], "-iz") == 0 || strcmp(argv[i], "--input-z") == 0) && i < argc - 1) {
+            ++i;
+            in_z_str = argv[i];
+        } else if ((strcmp(argv[i], "-oz") == 0 || strcmp(argv[i], "--output-z") == 0) && i < argc - 1) {
+            ++i;
+            out_z_str = argv[i];
+        } else if ((strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "--aperture") == 0) && i < argc - 1) {
+            ++i;
+            aperture_str = argv[i];
+        } else if ((strcmp(argv[i], "-s") == 0 || strcmp(argv[i], "--samples") == 0) && i < argc - 1) {
+            ++i;
+            samples_str = argv[i];
+        } else {
+            in_path = argv[i];
+        }
+    }
+
+    if (!in_path || !focal_str || !in_z_str || !out_z_str) {
+        fprintf(stderr, "Missing model parameters!\n");
+        printf("Pinhole model parameters:\n");
+        printf("  -i  | --input <path>\t\tPath to the input image.\n");
+        printf("  -o  | --output <path>\t\tPath to the output image (Default: out.png).\n");
+        printf("  -iz | --input-z <number>\t\tDistance to the input image plane.\n");
+        printf("  -oz | --output-z <number>\t\tDistance to the output image plane.\n");
+        printf("  -f  | --focal-length <number>\t\tThe focal length of the camera lens.\n");
+        printf("  -a  | --aperture <number>\t\tThe size of the camera lens.\n");
+        printf("  -s  | --samples <number>\t\tThe number of samples per pixel (Default: 64).\n");
+        return 1;
+    }
+
+    float focal, in_z, out_z, aperture;
+    int samples;
+    focal = atof(focal_str);
+    in_z = atof(in_z_str);
+    out_z = atof(out_z_str);
+    aperture = atof(aperture_str);
+    samples = atoi(samples_str);
+
+    df_image *in_image, *out_image;
+
+    int w, h;
+    df_result res = df_load_image(in_path, &w, &h, &in_image);
+    if (res != df_result_success) {
+        fprintf(stderr, "Failed to load %s! (%d)\n", in_path, (int)res);
+        return 1;
+    }
+
+    res = df_create_image(w, h, &out_image);
+    if (res != df_result_success) {
+        fprintf(stderr, "Failed to create output image (%d)\n", (int)res);
+        df_release_image(in_image);
+        return 1;
+    }
+
+    df_thin_lense((df_thin_lense_params){.focal_distance = in_z,
+                                         .out_distance = out_z,
+                                         .focal_length = focal,
+                                         .aperture = aperture,
+                                         .sample_count = samples},
+                  in_image,
+                  out_image);
+
+    int error_code = 0;
+    res = df_write_image(out_image, out_path);
+    if (res != df_result_success) {
+        fprintf(stderr, "Failed to write to output image %s (%d)\n", out_path, (int)res);
+        error_code = 1;
+    }
+
+    df_release_image(in_image);
+    df_release_image(out_image);
+
+    return error_code;
 }
\ No newline at end of file
diff --git a/cam000046.png b/cam000046.png
new file mode 100644
index 0000000..e22cddb
--- /dev/null
+++ b/cam000046.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95f37ebdfb18062a0377d1afac513e048176a7c45f3d431adfad10151899be10
+size 2667544
diff --git a/include/defocus/base.h b/include/defocus/base.h
index a020f49..e8fa3fb 100644
--- a/include/defocus/base.h
+++ b/include/defocus/base.h
@@ -4,6 +4,8 @@
 #include <inttypes.h>
 #include <string.h>
 
+#include <immintrin.h>
+
 /** @file base.h
  * @brief basic utilities
  */
@@ -14,14 +16,11 @@ typedef enum
     /** @brief operation successfull */
     df_result_success = 0,
 
-    /** @brief an opengl error occured */
-    df_result_gl_error = 1,
-
     /** @brief memory allocation failed */
-    df_result_out_of_memory = 2,
+    df_result_out_of_memory = 1,
 
     /** @brief file i/o error */
-    df_result_io_error = 3,
+    df_result_io_error = 2,
 } df_result;
 
 /** @brief Used to silence warnings about unused variables */
@@ -86,6 +85,12 @@ void df_log_impl(int level, const char *file, int line, const char *fmt, ...);
 
 #endif
 
+/* Math constants.
+ */
+#define DF_PI 3.1415926f
+#define DF_PI_OVER_2 1.570796f
+#define DF_PI_OVER_4 0.785398f
+
 /* Basic types */
 
 /** @brief RGBA color */
@@ -124,6 +129,24 @@ typedef union {
     float e[2];
 } df_v2;
 
+/** @brief Add two 2d vectors */
+df_v2 df_add_v2(df_v2 a, df_v2 b);
+
+/** @brief Subtract two 2d vectors */
+df_v2 df_sub_v2(df_v2 a, df_v2 b);
+
+/** @brief Calculate the dot product of 2d vectors a and b.
+ */
+float df_dot_v2(df_v2 a, df_v2 b);
+
+/** @brief Multiply a 2d vector with a scalar.
+ */
+df_v2 df_mul_v2(float t, df_v2 v);
+
+/** @brief Returns the normalized version of a 2d vector v
+ */
+df_v2 df_normalize_v2(df_v2 v);
+
 /** @brief 3d vector */
 typedef union {
     struct
@@ -200,4 +223,38 @@ typedef struct
 /** @brief Calculate the intersection between a line and a plane in 3d space */
 df_line_plane_intersection df_calc_line_plane_intersection(df_line line, df_plane plane);
 
+/** @brief A 4x4 matrix.
+ *
+ * Stored in row major order.
+ */
+typedef struct df_m4
+{
+    _Alignas(16) union {
+        float e[16];
+        __m128 vec[4];
+    };
+} df_m4;
+
+/** Accesses the element at the given row and column of a 4x4 matrix */
+#define DF_M4_AT(m, row, col) ((m).e[(row)*4 + (col)])
+
+/** @brief Matrix multiply 4x4 matrix a and b */
+df_m4 df_mul_m4(df_m4 a, df_m4 b);
+
+/** @brief Get a scale matrix */
+df_m4 df_scale(float x, float y, float z);
+
+df_m4 df_translate(float x, float y, float z);
+
+/** @brief Transform (i.e. multiply) a 3d vector v by the transformation matrix T */
+df_v3 df_transform_v3(df_m4 T, df_v3 v);
+
+/** @brief Calculate the inverse of a non-scaling transform matrix.
+ *
+ * Special fast case.
+ */
+df_m4 df_inverse_transform_no_scale(df_m4 M);
+
+/** @brief Calculate the inverse of a transform matrix */
+df_m4 df_inverse_transform(df_m4 M);
 #endif
\ No newline at end of file
diff --git a/include/defocus/camera.h b/include/defocus/camera.h
new file mode 100644
index 0000000..21a0657
--- /dev/null
+++ b/include/defocus/camera.h
@@ -0,0 +1,50 @@
+#ifndef DEFOCUS_CAMERA_H
+#define DEFOCUS_CAMERA_H
+
+#include "base.h"
+
+/** @file camera.h
+ * @brief basic camera functions
+ */
+
+/** @brief Stores information for rays in a SIMD friendly way */
+typedef struct
+{
+    /** Packs all SIMD data into a single buffer */
+    float *simd_mem;
+
+    /** Source uvs for rays */
+    df_v2 *ray_uvs;
+
+    /* Points into simd_mem */
+    float *base_x;
+    float *base_y;
+    float *base_z;
+    float *dir_x;
+    float *dir_y;
+    float *dir_z;
+
+    size_t ray_count;
+} df_ray_packet;
+
+/** @brief Free ray packet memory */
+void df_release_ray_packet(df_ray_packet *rays);
+
+/** @brief Interface for cameras. */
+typedef struct
+{
+    /** Release the camera object */
+    void (*release)(void *o);
+
+    /** Builds a ray packet of all rays that need to be evaluated to generate the image.
+     */
+    df_ray_packet (*build_ray_packet)(void *o);
+
+    /** Opaque pointer to the camera object */
+    void *o;
+} df_camera_i;
+
+df_camera_i
+df_create_perspective_camera(float image_width, float image_height, float raster_width, float raster_height);
+
+#endif
\ No newline at end of file
diff --git a/include/defocus/intrinsic_helper.h b/include/defocus/intrinsic_helper.h
new file mode 100644
index 0000000..c24b01f
--- /dev/null
+++ b/include/defocus/intrinsic_helper.h
@@ -0,0 +1,38 @@
+#ifndef DEFOCUS_INTRINSIC_HELPER_H
+#define DEFOCUS_INTRINSIC_HELPER_H
+
+/** @file intrinsic_helper.h
+ * @brief Helper macros for intrinsics
+ *
+ * Assumes that at least SS3 is available.
+ */
+
+#include <immintrin.h>
+#include <pmmintrin.h>
+
+/* Shuffle mask for _mm_shuffle_epi32 */
+#define DF_MAKE_SHUFFLE_MASK(x, y, z, w) (x | (y << 2) | (z << 4) | (w << 6))
+
+#define DF_VEC_SWIZZLE_MASK(vec, mask) _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vec), mask))
+
+#define DF_VEC_SWIZZLE(vec, x, y, z, w) DF_VEC_SWIZZLE_MASK(vec, DF_MAKE_SHUFFLE_MASK(x, y, z, w))
+
+// "broadcasts" element x [0, 1, 2, 3] to all elements of the result */
+#define DF_VEC_SWIZZLE1(vec, x) DF_VEC_SWIZZLE_MASK(vec, DF_MAKE_SHUFFLE_MASK(x, x, x, x))
+
+/* returns [vec0, vec0, vec2, vec2] */
+#define DF_VEC_SWIZZLE_0022(vec) _mm_moveldup_ps(vec)
+
+/* returns [vec1, vec1, vec3, vec3] */
+#define DF_VEC_SWIZZLE_1133(vec) _mm_movehdup_ps(vec)
+
+/* returns [vec1[x], vec1[y], vec2[z], vec2[w]] */
+#define DF_VEC_SHUFFLE(vec1, vec2, x, y, z, w) _mm_shuffle_ps(vec1, vec2, DF_MAKE_SHUFFLE_MASK(x, y, z, w))
+
+/* returns [vec1[0], vec1[1], vec2[0], vec2[1]] */
+#define DF_VEC_SHUFFLE_0101(vec1, vec2) _mm_movelh_ps(vec1, vec2)
+
+/* returns [vec1[2], vec1[3], vec2[2], vec2[3]] */
+#define DF_VEC_SHUFFLE_2323(vec1, vec2) _mm_movehl_ps(vec2, vec1)
+
+#endif
diff --git a/include/defocus/models.h b/include/defocus/models.h
index 26e72af..6f7320a 100644
--- a/include/defocus/models.h
+++ b/include/defocus/models.h
@@ -7,34 +7,71 @@
  * Camera models are usually implemented as a function taking an input image and parameters required for the model
  * and produce an output image.
  *
+ * Parameters are contained in a parameter struct, because this enables us to something similar to named parameters.
+ * We can also do default parameters, by leaving some values set to zero.
+ *
+ * @code{c}
+ * df_thin_lense((df_thin_lense){
+ *     .focal_length = 42.f,
+ *     .aperture = 21.f,
+ *     .focal_distance = 150.f,
+ *     .out_distance = 200.f},
+ *     in_image,
+ *     out_image);
+ * @endcode
+ *
  * All camera models use the same coordinate space: The camera looks along the positive z-axis in a right-handed
  * coordinate system.  (-> positive y is down, positive x is right.)
+
  */
 
 #include "image.h"
 
+/** Parameters for the pinhole model. */
+typedef struct df_pinhole_params
+{
+    float focal_length; /**< the cameras focal length in millimeters. */
+    float orig_z;       /**< the distance of the input image from the camera. */
+    float new_z;        /**< the distance of the generated output image. */
+} df_pinhole_params;
+
 /** @brief Simple pinhole camera model.
  *
  * The simplest possible(?) camera model.
  * This function takes an input image at a known distance from the camera and moves it to a new distance.
  *
- * @param in_image input image
- * @param focal_length the cameras focal length in millimeters.
- * @param orig_z the distance of the input image from the camera.
- * @param new_z the distance of the generated output image.
+ * @param params model parameters.
+ * @param in_image input image.
  * @param out_image the output image.
  */
-void df_pinhole(const df_image *in_image, float focal_length, float orig_z, float new_z, df_image *out_image);
+void df_pinhole(df_pinhole_params params, const df_image *in_image, df_image *out_image);
+
+/** Parameters for the thin lense model.
+ */
+typedef struct df_thin_lense_params
+{
+    /** The cameras focal length in millimeters. */
+    float focal_length;
+
+    /** The aperture size in millimeters */
+    float aperture;
+
+    /** The distance between the plane of focus and the camera lens, in millimeters */
+    float focal_distance;
+
+    /** The distance between the output image and the camera lens, in millimeters */
+    float out_distance;
+
+    /** The number of samples per pixel. Leave at 0 for the default value (64). */
+    int sample_count;
+} df_thin_lense_params;
 
 /** @brief Thin-lense model.
  *
+ * @param params model parameters
  * @param in_image input image
- * @param focal_length the cameras focal length in millimeters.
- * @param aperture the aperture size in millimeters.
- * @param orig_z the distnce of the input image from the camera.
  * @param out_image the output image.
  */
-void df_thin_lense(
-    const df_image *in_image, float focal_length, float aperture, float orig_z, float new_z, df_image *out_image);
+void df_thin_lense(df_thin_lense_params params, const df_image *in_image, df_image *out_image);
 
 #endif
diff --git a/include/defocus/scene.h b/include/defocus/scene.h
new file mode 100644
index 0000000..89cbe02
--- /dev/null
+++ b/include/defocus/scene.h
@@ -0,0 +1,37 @@
+#ifndef DEFOCUS_SCENE_H
+#define DEFOCUS_SCENE_H
+
+#include "base.h"
+#include "image.h"
+#include "camera.h"
+
+/** @file scene.h
+ * @brief scene structure */
+
+/** Opaque scene structure */
+typedef struct df_scene df_scene;
+
+df_scene *df_create_scene();
+
+void df_release_scene(df_scene *scene);
+
+void df_scene_add_plane(df_scene *scene, df_plane plane, df_image *texture);
+
+df_image *df_scene_generate_image(df_scene *scene, df_camera_i camera);
+
+#if 0
+/* Usage example */
+
+df_camera_i camera = df_create_perspective_camera(...);
+df_scene* scene = df_create_scene();
+
+df_scene_add_plane(..., in_image);
+
+df_image *out_image = df_scene_generate_image(scene, camera);
+
+df_release_scene(scene);
+camera->release(camera->o);
+
+#endif
+
+#endif
diff --git a/lib/camera.c b/lib/camera.c
new file mode 100644
index 0000000..988951e
--- /dev/null
+++ b/lib/camera.c
@@ -0,0 +1,53 @@
+#include <defocus/camera.h>
+
+#include <string.h>
+
+/* ********************************************************
+ *
+ * Ray Packet Functions
+ *
+ * ********************************************************/
+
+void df_release_ray_packet(df_ray_packet *rays)
+{
+    free(rays->simd_mem);
+    free(rays->ray_uvs);
+
+    memset(rays, 0, sizeof(*rays));
+}
+
+/* ********************************************************
+ *
+ * Perspective Camrea
+ *
+ * ********************************************************/
+
+typedef struct
+{
+    float focal_dist;
+    float lens_radius;
+
+    df_m4 screen_to_raster;
+    df_m4 raster_to_screen;
+    df_m4 raster_to_camera;
+
+} df_perspective_camera;
+
+static void pc_release(void *o) { df_perspective_camera *camera = o; }
+
+static df_ray_packet pc_build_ray_packet(void *o) { df_perspective_camera *camera = o; }
+
+df_camera_i df_create_perspective_camera(float image_width, float image_height, float raster_width, float raster_height)
+{
+    df_perspective_camera *camera = malloc(sizeof(*camera));
+
+    camera->screen_to_raster = df_scale(raster_width, raster_height, 1.f);
+    camera->screen_to_raster =
+        df_mul_m4(camera->screen_to_raster, df_scale(1.f / image_width, -1.f / image_height, 1.f));
+    camera->screen_to_raster = df_mul_m4(camera->screen_to_raster, df_translate(0.f, -image_height, 0.f));
+
+    camera->raster_to_screen = df_inverse_transform(camera->screen_to_raster);
+
+    df_camera_i iface = {.release = pc_release, .build_ray_packet = pc_build_ray_packet, .o = camera};
+    return iface;
+}
diff --git a/lib/math.c b/lib/math.c
index bad6c3a..204d85b 100644
--- a/lib/math.c
+++ b/lib/math.c
@@ -1,6 +1,37 @@
 #include <defocus/base.h>
+#include <defocus/intrinsic_helper.h>
 
 #include <math.h>
+#include <immintrin.h>
+#include <pmmintrin.h>
+
+df_v2 df_add_v2(df_v2 a, df_v2 b)
+{
+    df_v2 v = {a.x + b.x, a.y + b.y};
+    return v;
+}
+
+df_v2 df_sub_v2(df_v2 a, df_v2 b)
+{
+    df_v2 v = {a.x - b.x, a.y - b.y};
+    return v;
+}
+
+float df_dot_v2(df_v2 a, df_v2 b) { return a.x * b.x + a.y * b.y; }
+
+df_v2 df_mul_v2(float t, df_v2 v)
+{
+    df_v2 r = {t * v.x, t * v.y};
+    return r;
+}
+
+df_v2 df_normalize_v2(df_v2 v)
+{
+    float len_square = df_dot_v2(v, v);
+    float len = sqrtf(len_square);
+    df_v2 n = {v.x / len, v.y / len};
+    return n;
+}
 
 df_v3 df_add_v3(df_v3 a, df_v3 b)
 {
@@ -57,4 +88,150 @@ df_line_plane_intersection df_calc_line_plane_intersection(df_line line, df_plan
             return intersection;
         }
     }
-}
\ No newline at end of file
+}
+
+df_m4 df_mul_m4(df_m4 a, df_m4 b)
+{
+    /* Super simple, we could probably do it a lot better via SIMD. */
+    df_m4 p;
+    for (int row = 0; row < 4; ++row) {
+        for (int col = 0; col < 4; ++col) {
+            DF_M4_AT(p, row, col) = 0.f;
+            for (int i = 0; i < 4; ++i) {
+                DF_M4_AT(p, row, col) += DF_M4_AT(a, row, i) * DF_M4_AT(b, i, col);
+            }
+        }
+    }
+    return p;
+}
+
+df_m4 df_scale(float x, float y, float z)
+{
+    /* clang-format off */
+    df_m4 s = {{
+          x, 0.f, 0.f, 0.f,
+        0.f,   y, 0.f, 0.f,
+        0.f, 0.f,   z, 0.f,
+        0.f, 0.f, 0.f, 1.f,
+    }};
+    /* clang-format on */
+    return s;
+}
+
+df_m4 df_translate(float x, float y, float z)
+{
+    /* clang-format off */
+    df_m4 t = {{
+        1.f, 0.f, 0.f, x,
+        0.f, 1.f, 0.f, y,
+        0.f, 0.f, 1.f, z,
+        0.f, 0.f, 0.f, 1.f,
+    }};
+    /* clang-format on */
+    return t;
+}
+
+/** SSE3 version of a horizontal sum:
+ * computes the sum of 4 32bit floats inside v
+ */
+static float hsum(__m128 v)
+{
+    /* v = [v0, v1, v2, v3] */
+
+    /* shuf = [v1, v1, v3, v3] */
+    __m128 shuf = _mm_movehdup_ps(v);
+
+    /* sum = [v0 + v1, 2*v1, v2 + v3, 2*v3] */
+    __m128 sum = _mm_add_ps(v, shuf);
+
+    /* shuf = [v2 + v3, 2*v3, v3, v3] */
+    shuf = _mm_movehl_ps(shuf, sum);
+
+    /* sum = [v0 + v1 + v2 + v3, ...] */
+    sum = _mm_add_ss(sum, shuf);
+
+    return _mm_cvtss_f32(sum);
+}
+
+df_v3 df_transform_v3(df_m4 T, df_v3 v)
+{
+    df_v3 transf;
+    _Alignas(16) float tmp_v[4] = {v.x, v.y, v.z, 1.f};
+    __m128 homov = _mm_load_ps(tmp_v);
+
+    __m128 row0 = _mm_load_ps(&T.e[0]);
+    __m128 row1 = _mm_load_ps(&T.e[4]);
+    __m128 row2 = _mm_load_ps(&T.e[8]);
+
+    __m128 prod = _mm_mul_ps(row0, homov);
+    transf.x = hsum(prod);
+    prod = _mm_mul_ps(row1, homov);
+    transf.y = hsum(prod);
+    prod = _mm_mul_ps(row2, homov);
+    transf.z = hsum(prod);
+
+    return transf;
+}
+
+/* Fast 4x4 matrix inverse via SIMD adapted from:
+ * https://lxjk.github.io/2017/09/03/Fast-4x4-Matrix-Inverse-with-SSE-SIMD-Explained.html
+ */
+
+/* Only works if M is a transform matrix without scale:
+ *
+ * | R T |
+ * | 0 1 |
+ */
+df_m4 df_inverse_transform_no_scale(df_m4 M)
+{
+    df_m4 I = {0.f};
+
+    /* transpose 3x3, we know that m03 = m13 = m23 = 0 */
+    __m128 t0 = DF_VEC_SHUFFLE_0101(M.vec[0], M.vec[1]); /* 00, 01, 10, 11 */
+    __m128 t1 = DF_VEC_SHUFFLE_2323(M.vec[0], M.vec[1]); /* 02, 03, 12, 13 */
+    I.vec[0] = DF_VEC_SHUFFLE(t0, M.vec[2], 0, 2, 0, 3); /* 00, 01, 20, 23(=0) */
+    I.vec[1] = DF_VEC_SHUFFLE(t0, M.vec[2], 1, 3, 1, 3); /* 01, 11, 21, 23(=0) */
+    I.vec[2] = DF_VEC_SHUFFLE(t1, M.vec[2], 0, 2, 2, 3); /* 02, 12, 22, 23(=0) */
+
+    /* last */
+    I.vec[3] = _mm_mul_ps(I.vec[0], DF_VEC_SWIZZLE1(M.vec[3], 0));
+    I.vec[3] = _mm_add_ps(I.vec[3], _mm_mul_ps(I.vec[1], DF_VEC_SWIZZLE1(M.vec[3], 1)));
+    I.vec[3] = _mm_add_ps(I.vec[3], _mm_mul_ps(I.vec[2], DF_VEC_SWIZZLE1(M.vec[3], 2)));
+    I.vec[3] = _mm_sub_ps(_mm_setr_ps(0.f, 0.f, 0.f, 1.f), I.vec[3]);
+
+    return I;
+}
+
+df_m4 df_inverse_transform(df_m4 M)
+{
+#define SMALL_NUMBER (1.e-8f)
+
+    df_m4 I = {0.f};
+
+    /* transpose 3x3, we know that m03 = m13 = m23 = 0 */
+    __m128 t0 = DF_VEC_SHUFFLE_0101(M.vec[0], M.vec[1]); /* 00, 01, 10, 11 */
+    __m128 t1 = DF_VEC_SHUFFLE_2323(M.vec[0], M.vec[1]); /* 02, 03, 12, 13 */
+    I.vec[0] = DF_VEC_SHUFFLE(t0, M.vec[2], 0, 2, 0, 3); /* 00, 01, 20, 23(=0) */
+    I.vec[1] = DF_VEC_SHUFFLE(t0, M.vec[2], 1, 3, 1, 3); /* 01, 11, 21, 23(=0) */
+    I.vec[2] = DF_VEC_SHUFFLE(t1, M.vec[2], 0, 2, 2, 3); /* 02, 12, 22, 23(=0) */
+
+    /* divide by the squared scale */
+    __m128 size_sqr = _mm_mul_ps(I.vec[0], I.vec[0]);
+    size_sqr = _mm_add_ps(size_sqr, _mm_mul_ps(I.vec[1], I.vec[1]));
+    size_sqr = _mm_add_ps(size_sqr, _mm_mul_ps(I.vec[2], I.vec[2]));
+
+    __m128 r_size_sqr = _mm_div_ps(_mm_set1_ps(1.f), size_sqr);
+
+    I.vec[0] = _mm_mul_ps(I.vec[0], r_size_sqr);
+    I.vec[1] = _mm_mul_ps(I.vec[1], r_size_sqr);
+    I.vec[2] = _mm_mul_ps(I.vec[2], r_size_sqr);
+
+    /* last */
+    I.vec[3] = _mm_mul_ps(I.vec[0], DF_VEC_SWIZZLE1(M.vec[3], 0));
+    I.vec[3] = _mm_add_ps(I.vec[3], _mm_mul_ps(I.vec[1], DF_VEC_SWIZZLE1(M.vec[3], 1)));
+    I.vec[3] = _mm_add_ps(I.vec[3], _mm_mul_ps(I.vec[2], DF_VEC_SWIZZLE1(M.vec[3], 2)));
+    I.vec[3] = _mm_sub_ps(_mm_setr_ps(0.f, 0.f, 0.f, 1.f), I.vec[3]);
+
+    return I;
+#undef SMALL_NUMBER
+}
diff --git a/lib/pinhole.c b/lib/pinhole.c
index af2538d..40e3b49 100644
--- a/lib/pinhole.c
+++ b/lib/pinhole.c
@@ -4,7 +4,7 @@
 
 #include <math.h>
 
-void df_pinhole(const df_image *in_image, float focal_length, float orig_z, float new_z, df_image *out_image)
+void df_pinhole(df_pinhole_params params, const df_image *in_image, df_image *out_image)
 {
     /* orig_z is the z-coordinate of the original image plane (=> in_image is located there)
      * new_z  is the z-coordinate of the output image plane (=> out_image is located there)
@@ -25,8 +25,8 @@ void df_pinhole(const df_image *in_image, float focal_length, float orig_z, floa
     int out_w, out_h;
     df_get_image_size(out_image, &out_w, &out_h);
 
-    double in_z_over_f = orig_z / focal_length;
-    double out_f_over_z = focal_length / new_z;
+    double in_z_over_f = params.orig_z / params.focal_length;
+    double out_f_over_z = params.focal_length / params.new_z;
 
     int prev_out_iy = -1;
     for (int iy = 0; iy < h; ++iy) {
diff --git a/lib/thin_lense.c b/lib/thin_lense.c
index 1c048b2..fc74ce3 100644
--- a/lib/thin_lense.c
+++ b/lib/thin_lense.c
@@ -1,6 +1,129 @@
+#include <defocus/base.h>
 #include <defocus/models.h>
+#include <defocus/image.h>
 
-void df_thin_lense(
-    const df_image *in_image, float focal_length, float aperture, float orig_z, float new_z, df_image *out_image)
+#include <math.h>
+#include <stdlib.h>
+
+/* The thin lense model, implemented as a quasi-raytracer. */
+
+/** Map uv coordinates in [0, 1) to the unit circle centered at (0, 0). */
+static df_v2 concentric_map_disk(df_v2 in_p)
 {
+    df_v2 offset = df_sub_v2(df_mul_v2(2.f, in_p), (df_v2){{1.f, 1.f}});
+    if (offset.u == 0.f && offset.v == 0.f)
+        return offset;
+
+    float theta, r;
+    if (fabsf(offset.u) > fabsf(offset.v)) {
+        r = offset.u;
+        theta = DF_PI_OVER_4 * (offset.v / offset.u);
+    } else {
+        r = offset.v;
+        theta = DF_PI_OVER_2 - DF_PI_OVER_4 * (offset.u / offset.v);
+    }
+    return df_mul_v2(r, (df_v2){{cosf(theta), sinf(theta)}});
+}
+
+void df_thin_lense(df_thin_lense_params params, const df_image *in_image, df_image *out_image)
+{
+    /* We do the following:
+     * - Orthographic projection (i.e. don't do anything really except flip the y axis)
+     * - For each pixel:
+     *   - Transform from raster space into camera space
+     *   - Generate ray from pixel to plane of focus through lense center
+     *   - For each sample:
+     *     - Choose random point on lense
+     *     - Trace ray from lense point through the above intersection point to the image.
+     *   - Sum all samples
+     */
+
+    int sample_count = (params.sample_count > 0) ? params.sample_count : 64;
+
+    int w, h;
+    df_get_image_size(out_image, &w, &h);
+
+    /* TODO(Kevin): It would be pretty trivial to
+     * a) parallelize this, and
+     * b) use SIMD for multiple rays
+     */
+    for (int y = 0; y < h; ++y) {
+        float cam_y = (float)(y - h) / 2.f;
+        for (int x = 0; x < w; ++x) {
+            float cam_x = (float)(x - w) / 2.f;
+
+            df_v3 ray_dir = {{cam_x, cam_y, 1.f}};
+            ray_dir = df_normalize_v3(ray_dir);
+            ray_dir.z = 1.f;
+
+            /* Calculate the intersection with the plane of focus */
+            df_v3 focus_p = df_mul_v3(params.focal_distance, ray_dir);
+
+            uint32_t color[4] = {0, 0, 0, 0};
+            for (int sample_idx = 0; sample_idx < sample_count; ++sample_idx) {
+                df_v2 lens_uv = {(float)(rand() % 1024) / 1024.f, (float)(rand() % 1024) / 1024.f};
+                df_v2 lens_p = df_mul_v2(params.aperture, concentric_map_disk(lens_uv));
+
+                ray_dir.x = focus_p.x - lens_p.x;
+                ray_dir.y = focus_p.y - lens_p.y;
+                ray_dir.z = focus_p.z;
+                ray_dir = df_normalize_v3(ray_dir);
+
+                df_v3 sample_p = df_mul_v3(params.focal_distance, ray_dir);
+
+                int img_x = (int)sample_p.x + w / 2;
+                int img_y = (int)sample_p.y + h / 2;
+
+                df_color sample_color = df_get_image_pixel(in_image, img_x, img_y);
+
+                for (int i = 0; i < 4; ++i)
+                    color[i] += (uint32_t)sample_color.e[i];
+            }
+
+            df_color pixel_color = {
+                {color[0] / sample_count, color[1] / sample_count, color[2] / sample_count, color[3] / sample_count}};
+
+            df_set_image_pixel(out_image, x, y, pixel_color);
+        }
+    }
+
+#if 0
+    /* The guassian lens equation 1/z' - 1/z = 1/f
+     * with z' := distance between film and lens,
+     *      z  := focal distance, i.e. the distance between lens and the plane of focus
+     *      f  := focal length of the lens
+     *
+     * gives us: z' = fz / (f + z) */
+    float film_dist = (params.focal_length * params.focal_distance) / (params.focal_length + params.focal_distance);
+
+    int sample_count = (params.sample_count > 0) ? params.sample_count : 64;
+
+    int w, h;
+    df_get_image_size(out_image, &w, &h);
+
+    for (int y = 0; y < h; ++y) {
+        float v = (float)y / (float)h;
+        for (int x = 0; x < w; ++x) {
+            float u = (float)x / (float)w;
+            df_v2 img_p = {u, v};
+            float color[4] = {0, 0, 0, 0};
+
+            df_v3 focus_ray = {};
+            focus_ray.z = 1.f;
+            focus_ray.x = 0.f;
+            focus_ray.y = 0.f;
+
+            for (int sample_idx = 0; sample_idx < sample_count; ++sample_idx) {
+                /* Generate a random sample on the lense */
+                df_v2 sample_p = {(float)(rand() % 1024) / 1024.f, (float)(rand() % 1024) / 1024.f};
+                df_v2 lens_p = df_mul_v2(params.aperture, concentric_map_disk(sample_p));
+
+                /* Compute the intersection point on the plane of focus. */
+                /* TODO: Use a ray from img_p to the center of the lens,
+                 * trace to focal_distance.
+                 */
+            }
+        }
+    }
+#endif
 }
\ No newline at end of file
diff --git a/meson.build b/meson.build
index 84f5252..47460e2 100644
--- a/meson.build
+++ b/meson.build
@@ -5,9 +5,12 @@ incdir = include_directories('include', '3p')
 cc = meson.get_compiler('c')
 m_dep = cc.find_library('m', required: false)
 
+add_project_arguments([ '-msse3', '-Wno-missing-braces' ], language: 'c')
+
 lib = library('df', 
               'lib/log.c',
               'lib/math.c',
+              'lib/camera.c',
               'lib/pinhole.c',
               'lib/image.c',
               'lib/color.c',
@@ -16,4 +19,10 @@ lib = library('df',
               dependencies: m_dep,
               version: '0.1.0',
               soversion: '0')
-executable('defocus', 'bin/defocus.c', include_directories: incdir, link_with: lib)
\ No newline at end of file
+
+# Command Line Executable
+executable('defocus', 'bin/defocus.c', include_directories: incdir, link_with: lib)
+
+# Test driver
+munit_dep = dependency('munit', fallback: ['munit', 'munit_dep'])
+executable('tests', 'tests/tests.c', include_directories: incdir, link_with: lib, dependencies: munit_dep)
\ No newline at end of file
diff --git a/subprojects/munit.wrap b/subprojects/munit.wrap
new file mode 100644
index 0000000..c53b275
--- /dev/null
+++ b/subprojects/munit.wrap
@@ -0,0 +1,4 @@
+[wrap-git]
+directory=munit
+url=https://github.com/nemequ/munit/
+revision=head
diff --git a/tests/tests.c b/tests/tests.c
new file mode 100644
index 0000000..d476c94
--- /dev/null
+++ b/tests/tests.c
@@ -0,0 +1,67 @@
+#include "munit.h"
+#include <defocus/defocus.h>
+
+static MunitResult test_transforms_identity(const MunitParameter params[], void *user_data)
+{
+    /* Check that the identity matrix does what it's supposed to do. */
+    /* clang-format off */
+    df_m4 identity = {
+        {1.f, 0.f, 0.f, 0.f,
+         0.f, 1.f, 0.f, 0.f,
+         0.f, 0.f, 1.f, 0.f,
+         0.f, 0.f, 0.f, 1.f}
+    };
+    /* clang-format on */
+
+    df_v3 v = {1.f, 2.f, 3.f};
+
+    df_v3 identity_transform = df_transform_v3(identity, v);
+
+    munit_assert_double_equal(v.x, identity_transform.x, 6);
+    munit_assert_double_equal(v.y, identity_transform.y, 6);
+    munit_assert_double_equal(v.z, identity_transform.z, 6);
+
+    return MUNIT_OK;
+}
+
+static MunitResult test_transforms_translation(const MunitParameter params[], void *user_data)
+{
+    /* Check that the identity matrix does what it's supposed to do. */
+    /* clang-format off */
+    df_m4 translate = {
+        {1.f, 0.f, 0.f,  1.f,
+         0.f, 1.f, 0.f, -1.f,
+         0.f, 0.f, 1.f,  .5f,
+         0.f, 0.f, 0.f,  1.f}
+    };
+    /* clang-format on */
+
+    df_v3 v = {1.f, 2.f, 3.f};
+
+    df_v3 tv = df_transform_v3(translate, v);
+
+    munit_assert_double_equal(tv.x, v.x + 1.f, 6);
+    munit_assert_double_equal(tv.y, v.y - 1.f, 6);
+    munit_assert_double_equal(tv.z, v.z + .5f, 6);
+
+    return MUNIT_OK;
+}
+
+static MunitTest _tests[] = {
+    {(char *)"/math/transforms/identity", test_transforms_identity, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL},
+    {(char *)"/math/transforms/translation", test_transforms_translation, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL},
+    {NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL}};
+
+static MunitSuite _test_suite = {
+    "",
+    _tests,
+    NULL,
+    1,
+    MUNIT_SUITE_OPTION_NONE,
+};
+
+int main(int argc, char **argv)
+{
+    munit_suite_main(&_test_suite, NULL, argc, argv);
+    return 0;
+}