Commits

Sepehr Taghdisian committed 40f4a7a

csm optimization, lod improvements

Comments (0)

Files changed (19)

data/shaders/glsl/csm.gs.glsl

 out vec2 gso_coord;
 #endif
 
+layout(std140) uniform cb_frame_gs
+{
+    vec4 c_cascade_planes[4*_CASCADE_CNT_];
+};
+
+/* returns false if it's not intersected */
+uint test_tri_singleplane(vec4 v0, vec4 v1, vec4 v2, vec4 plane)
+{
+    float i1 = dot(v0.xyz, plane.xyz) + plane.w;
+    float i2 = dot(v1.xyz, plane.xyz) + plane.w;
+    float i3 = dot(v2.xyz, plane.xyz) + plane.w;
+    return uint(i1 >= 0) | uint(i2 >= 0) | uint(i3 >= 0);
+}
+
+bool test_tri_planes(vec4 plane1, vec4 plane2, vec4 plane3, vec4 plane4, 
+    vec4 v0, vec4 v1, vec4 v2)
+{
+    uint t1 = test_tri_singleplane(v0, v1, v2, plane1);
+    uint t2 = test_tri_singleplane(v0, v1, v2, plane2);
+    uint t3 = test_tri_singleplane(v0, v1, v2, plane3);
+    uint t4 = test_tri_singleplane(v0, v1, v2, plane4);
+    return bool(t1 & t2 & t3 & t4);
+}
+
+/* */
 void main()
 {
     /* generate 3 triangles for each input trinagle and send them to 3 views (cascade) */
-    /* view #1 */
-    gl_Layer = 0;
-    for (int i = 0; i < 3; i++)    {
-        gl_Position = verts[i].pos0;
+    /* tri#1 -> cascade #1 */
+    if (test_tri_planes(c_cascade_planes[0], c_cascade_planes[1], c_cascade_planes[2],
+        c_cascade_planes[3], verts[0].pos0, verts[1].pos0, verts[2].pos0))
+    {
+        gl_Layer = 0;
+        for (uint i = 0; i < 3; i++)    {
+            gl_Position = verts[i].pos0;
 #if defined(_ALPHAMAP_)
-        gso_coord = verts[i].coord;
+            gso_coord = verts[i].coord;
 #endif
-        EmitVertex();
+            EmitVertex();
+        }
+        EndPrimitive();
     }
-    EndPrimitive();
 
-    /* view #2 */
-    gl_Layer = 1;
-    for (int i = 0; i < 3; i++)    {
-        gl_Position = verts[i].pos1;
+    /* tri#2 -> cascade #2 */
+    if (test_tri_planes(c_cascade_planes[4], c_cascade_planes[5], c_cascade_planes[6],
+        c_cascade_planes[7], verts[0].pos1, verts[1].pos1, verts[2].pos1))
+    {
+        gl_Layer = 1;
+        for (uint i = 0; i < 3; i++)    {
+            gl_Position = verts[i].pos1;
 #if defined(_ALPHAMAP_)
-        gso_coord = verts[i].coord;
+            gso_coord = verts[i].coord;
 #endif
-        EmitVertex();
+            EmitVertex();
+        }
+        EndPrimitive();
     }
-    EndPrimitive();
 
-    /* view #3 */
-    gl_Layer = 2;
-    for (int i = 0; i < 3; i++)    {
-        gl_Position = verts[i].pos2;
+    /* tri #3 -> cascade #3 */
+    if (test_tri_planes(c_cascade_planes[8], c_cascade_planes[9], c_cascade_planes[10],
+        c_cascade_planes[11], verts[0].pos2, verts[1].pos2, verts[2].pos2))
+    {    
+        gl_Layer = 2;
+        for (uint i = 0; i < 3; i++)    {
+            gl_Position = verts[i].pos2;
 #if defined(_ALPHAMAP_)
-        gso_coord = verts[i].coord;
+            gso_coord = verts[i].coord;
 #endif
-        EmitVertex();
+            EmitVertex();
+        }
+        EndPrimitive();
     }
-    EndPrimitive();
 }
 
 

data/shaders/glsl/occ-prev.ps.glsl

 /* textures */
 uniform sampler2D s_depth;
 
+#if defined(_EXTRA_)
+uniform sampler2D s_depth_ext;
+#endif
+
 /* uniforms */
 uniform vec2 c_camprops; /* x = near, y = far */
 
     float depth_zbuff = textureLod(s_depth, vec2(vso_coord.x, 1.0 - vso_coord.y), 0);
     float depth = (2.0f * c_camprops.x)/(c_camprops.y  + c_camprops.x - 
         depth_zbuff*(c_camprops.y - c_camprops.x));
-    pso_color = vec4(depth, depth, depth, 1);
+    vec4 color = vec4(depth, depth, depth, 1);
+
+#if defined(_EXTRA_)
+    float depth_zbuff_ext = textureLod(s_depth_ext, vec2(vso_coord.x, 1.0 - vso_coord.y), 0);
+    float depth_ext = (2.0f * c_camprops.x)/(c_camprops.y  + c_camprops.x - 
+        depth_zbuff_ext*(c_camprops.y - c_camprops.x));
+    if (depth_ext < depth)
+        color *= vec4(depth_ext, 0, 0, 1);
+#endif
+
+    pso_color = color;
 }

data/shaders/hlsl/csm.gs.hlsl

 #endif
 };
 
+cbuffer cb_frame_gs
+{
+    float4 c_cascade_planes[4*_CASCADE_CNT_];
+};
+
+/* returns false if it's not intersected */
+uint test_tri_singleplane(float4 v0, float4 v1, float4 v2, float4 plane)
+{
+    float i1 = dot(v0.xyz, plane.xyz) + plane.w;
+    float i2 = dot(v1.xyz, plane.xyz) + plane.w;
+    float i3 = dot(v2.xyz, plane.xyz) + plane.w;
+    return (i1 >= 0) | (i2 >= 0) | (i3 >= 0);
+}
+
+uint test_tri_planes(float4 plane1, float4 plane2, float4 plane3, float4 plane4, 
+    float4 v0, float4 v1, float4 v2)
+{
+    uint t1 = test_tri_singleplane(v0, v1, v2, plane1);
+    uint t2 = test_tri_singleplane(v0, v1, v2, plane2);
+    uint t3 = test_tri_singleplane(v0, v1, v2, plane3);
+    uint t4 = test_tri_singleplane(v0, v1, v2, plane4);
+    return (t1 & t2 & t3 & t4);
+}
+
 [maxvertexcount(9)]
 void main(triangle vso i[3], inout TriangleStream<gso> tris)
 {
     o[2].coord = i[2].coord;
 #endif
 
-    /* tri #1 -> view 1 */
-    o[0].rt_idx = 0;
-    o[1].rt_idx = 0;
-    o[2].rt_idx = 0;
-    o[0].pos = i[0].pos0;
-    o[1].pos = i[1].pos0;
-    o[2].pos = i[2].pos0;
-    tris.Append(o[0]);
-    tris.Append(o[1]);
-    tris.Append(o[2]);
-    tris.RestartStrip();
+    /* tri #1 -> cascade 1 */
+    if (test_tri_planes(c_cascade_planes[0], c_cascade_planes[1], c_cascade_planes[2],
+        c_cascade_planes[3], i[0].pos0, i[1].pos0, i[2].pos0))
+    {
+        o[0].rt_idx = 0;
+        o[1].rt_idx = 0;
+        o[2].rt_idx = 0;
+        o[0].pos = i[0].pos0;
+        o[1].pos = i[1].pos0;
+        o[2].pos = i[2].pos0;
+        tris.Append(o[0]);
+        tris.Append(o[1]);
+        tris.Append(o[2]);
+        tris.RestartStrip();
+    }
 
-    /* tri #2 -> view 2 */
-    o[0].rt_idx = 1;
-    o[1].rt_idx = 1;
-    o[2].rt_idx = 1;
-    o[0].pos = i[0].pos1;
-    o[1].pos = i[1].pos1;
-    o[2].pos = i[2].pos1;
-    tris.Append(o[0]);
-    tris.Append(o[1]);
-    tris.Append(o[2]);
-    tris.RestartStrip();
+    /* tri #2 -> cascade 2 */
+    if (test_tri_planes(c_cascade_planes[4], c_cascade_planes[5], c_cascade_planes[6],
+        c_cascade_planes[7], i[0].pos1, i[1].pos1, i[2].pos1))
+    {
+        o[0].rt_idx = 1;
+        o[1].rt_idx = 1;
+        o[2].rt_idx = 1;
+        o[0].pos = i[0].pos1;
+        o[1].pos = i[1].pos1;
+        o[2].pos = i[2].pos1;
+        tris.Append(o[0]);
+        tris.Append(o[1]);
+        tris.Append(o[2]);
+        tris.RestartStrip();
+    }
 
-    /* tri #3 -> view 3 */
-    o[0].rt_idx = 2;
-    o[1].rt_idx = 2;
-    o[2].rt_idx = 2;
-    o[0].pos = i[0].pos2;
-    o[1].pos = i[1].pos2;
-    o[2].pos = i[2].pos2;
-    tris.Append(o[0]);
-    tris.Append(o[1]);
-    tris.Append(o[2]);
-    tris.RestartStrip();
+    /* tri #3 -> cascade 3 */
+    if (test_tri_planes(c_cascade_planes[8], c_cascade_planes[9], c_cascade_planes[10],
+        c_cascade_planes[11], i[0].pos2, i[1].pos2, i[2].pos2))
+    {
+        o[0].rt_idx = 2;
+        o[1].rt_idx = 2;
+        o[2].rt_idx = 2;
+        o[0].pos = i[0].pos2;
+        o[1].pos = i[1].pos2;
+        o[2].pos = i[2].pos2;
+        tris.Append(o[0]);
+        tris.Append(o[1]);
+        tris.Append(o[2]);
+        tris.RestartStrip();
+    }
 }
 
 

data/shaders/hlsl/occ-prev.ps.hlsl

 Texture2D<float> t_depth;
 SamplerState s_depth;
 
+#if defined(_EXTRA_)
+Texture2D<float> t_depth_ext;
+SamplerState s_depth_ext;
+#endif
+
 /* uniforms */
 float2 c_camprops;  /* x = near, y = far */
 
     float depth_zbuff = t_depth.SampleLevel(s_depth, input.coord, 0);
     float depth = (2.0f * c_camprops.x)/(c_camprops.y  + c_camprops.x - 
         depth_zbuff*(c_camprops.y - c_camprops.x));
-    return float4(depth, depth, depth, 1);
+    float4 color = float4(depth, depth, depth, 1);
+
+#if defined(_EXTRA_)
+    float depth_zbuff_ext = t_depth_ext.SampleLevel(s_depth_ext, input.coord, 0);
+    float depth_ext = (2.0f * c_camprops.x)/(c_camprops.y  + c_camprops.x - 
+        depth_zbuff_ext*(c_camprops.y - c_camprops.x));
+    [flatten]
+    if (depth_ext < depth)
+        color *= float4(depth_ext, 0, 0, 1);
+#endif
+
+    return color;
 }

src/core/linked-list.h

 #endif
 
 /**
- * add item to the linked-list
+ * add item to the linked-list, this function adds the list_item to the head of the list \n
+ * so linked_list pointer will be swaped with new item
  * @param plist pointer to the root item of the list (can be NULL)
  * @param item new item to be added
  * @param data custom data pointer, mostly owner of the list 'item'

src/engine/camera.c

     vec3_muls(&yaxis_far_scaled, &yaxis, far_plane_h);
 
     struct vec4f tmpv;
-    /* corners, start from minimum corner point and winds clockwise near quad */
+    /* corners, start from bottom-left corner point and winds clockwise */
+    /* near quad */
     vec3_setv(&corners[0], vec3_sub(&tmpv, &near_center, 
         vec3_add(&tmpv, &xaxis_near_scaled, &yaxis_near_scaled)));
     vec3_setv(&corners[1], vec3_sub(&tmpv, &near_center,

src/engine/cmp-mgr.c

 
 	/* before render stage, we pass active cmdqueue as param */
     switch (stage_id)   {
-    case CMP_UPDATE_STAGE1:
-        param = scn_getcam();
-        break;
     case CMP_UPDATE_STAGE4:
         param = gfx_get_cmdqueue(0);
         break;
         obj->xform_cmp = hdl;
     else if (type == cmp_bounds_type)
         obj->bounds_cmp = hdl;
-    else if (type == cmp_model_type)
+    else if (type == cmp_model_type)    {
         obj->model_cmp = hdl;
+        obj->model_shadow_cmp = hdl;
+    }
 }
 
 void cmp_zeroobj(struct cmp_obj* obj)
     obj->model_cmp = INVALID_HANDLE;
     obj->anim_cmp = INVALID_HANDLE;
     obj->rbody_cmp = INVALID_HANDLE;
+    obj->model_shadow_cmp = INVALID_HANDLE;
 }
 
 cmptype_t cmp_gettype(cmp_t c)

src/engine/cmp-types.h

     cmphandle_t model_cmp;
     cmphandle_t anim_cmp;
     cmphandle_t rbody_cmp;
+    cmphandle_t model_shadow_cmp;
 };
 
 

src/engine/components/cmp-lodmodel.c

     struct allocator* alloc, struct allocator* tmp_alloc, bool_t update_bounds);
 void lodmodel_buildidxs(struct cmp_lodmodel* lodmodel);
 cmphandle_t lodmodel_switchmodel(cmphandle_t cur_hdl, cmphandle_t new_hdl);
+result_t lodmodel_loadmodel(struct cmp_obj* obj, cmphandle_t model_hdl, const char* filepath,
+    struct allocator* alloc, struct allocator* tmp_alloc, bool_t update_bounds);
 
 /*************************************************************************************************
  * inlines 
     memset(&params, 0x00, sizeof(params));
 
     params.name = "lod-model";
-    params.stride = sizeof(struct cmp_model);
+    params.stride = sizeof(struct cmp_lodmodel);
     params.create_func = cmp_lodmodel_create;
     params.destroy_func = cmp_lodmodel_destroy;
     params.grow_cnt = 300;
     m->scheme_id = lod_findmodelscheme(m->scheme_name);
 
     host_obj->model_cmp = INVALID_HANDLE;
+    host_obj->model_shadow_cmp = INVALID_HANDLE;
     return RET_OK;
 }
 
         m->lod_idxs[i] = 0;
     }
     host_obj->model_cmp = INVALID_HANDLE;
+    host_obj->model_shadow_cmp = INVALID_HANDLE;
 }
 
 bool_t cmp_lodmodel_applylod(cmphandle_t lodmdl_hdl, const struct vec3f* campos)
     if (dot_d < l*l)    {
         host->model_cmp = lodmodel_switchmodel(host->model_cmp, 
             m->models[m->lod_idxs[LOD_INDEX_LOW]]);
-
         return TRUE;
     }   else    {
         return FALSE;   /* too far, doesn't get rendered anymore */
     }
 }
 
+/* same as normal applylod, but uses 1 level lower LOD for every range (faster for shadows) */
+bool_t cmp_lodmodel_applylod_shadow(cmphandle_t lodmdl_hdl, const struct vec3f* campos)
+{
+    struct cmp_obj* host = cmp_getinstancehost(lodmdl_hdl);
+    struct cmp_lodmodel* m = cmp_getinstancedata(lodmdl_hdl);
+    struct cmp_bounds* b = cmp_getinstancedata(host->bounds_cmp);
+    const struct lod_model_scheme* scheme = lod_getmodelscheme(m->scheme_id);
+
+    /* calculate distance factors to the bounds of object */
+    struct vec3f d;
+    vec3_setf(&d, campos->x - b->ws_s.x, campos->y - b->ws_s.y, campos->z - b->ws_s.z);
+    fl32 r = b->ws_s.r;
+    fl32 dot_d = vec3_dot(&d, &d);
+
+    /* test high detail */
+    fl32 l = scheme->high_range + r;
+    if (dot_d < l*l)    {
+        host->model_shadow_cmp = lodmodel_switchmodel(host->model_shadow_cmp, 
+            m->models[m->lod_idxs[LOD_INDEX_MED]]);
+        return TRUE;
+    }
+
+    /* test medium detail */
+    l = scheme->medium_range + r;
+    if (dot_d < l*l)    {
+        host->model_shadow_cmp = lodmodel_switchmodel(host->model_shadow_cmp, 
+            m->models[m->lod_idxs[LOD_INDEX_LOW]]);
+        return TRUE;
+    }
+
+    /* test low detail */
+    l = scheme->low_range + r;
+    if (dot_d < l*l)    {
+        host->model_shadow_cmp = lodmodel_switchmodel(host->model_shadow_cmp, 
+            m->models[m->lod_idxs[LOD_INDEX_LOW]]);
+        return TRUE;
+    }   else    {
+        return FALSE;   /* too far, doesn't get rendered anymore */
+    }
+}
+
+
 cmphandle_t lodmodel_switchmodel(cmphandle_t cur_hdl, cmphandle_t new_hdl)
 {
+    if (cur_hdl == new_hdl)
+        return cur_hdl;
+
     struct cmp_model* cur = cmp_getinstancedata(cur_hdl);
     struct cmp_model* n = cmp_getinstancedata(new_hdl);
 
     if (m->models[LOD_INDEX_HIGH] == INVALID_HANDLE)
         return RET_FAIL;
     obj->model_cmp = m->models[LOD_INDEX_HIGH];
+    obj->model_shadow_cmp = m->models[LOD_INDEX_HIGH];
     return lodmodel_loadmodel(obj, m->models[LOD_INDEX_HIGH], m->filepath_hi, alloc, 
         tmp_alloc, TRUE);
 }
     return r;
 }
 
-result_t lodmodel_loadmodel(struct cmp_obj* obj, cmphandle_t model_hdl, const char* filepath,
-    struct allocator* alloc, struct allocator* tmp_alloc, bool_t update_bounds)
-{
-    struct cmp_model* mdl = cmp_getinstancedata(model_hdl);
-    strcpy(mdl->filepath, filepath);
-    if (!update_bounds)
-        BIT_ADD(mdl->flags, CMP_MODELFLAG_NOBOUNDUPDATE);
-    BIT_ADD(mdl->flags, CMP_MODELFLAG_ISLOD);
-    return cmp_model_modify(obj, alloc, tmp_alloc, mdl, model_hdl);
-}
-
 void lodmodel_buildidxs(struct cmp_lodmodel* lodmodel)
 {
     if (lodmodel_checkmodel(lodmodel, LOD_INDEX_LOW))    {
     lodmodel->lod_idxs[LOD_INDEX_HIGH] = LOD_INDEX_HIGH;
 }
 
+result_t lodmodel_loadmodel(struct cmp_obj* obj, cmphandle_t model_hdl, const char* filepath,
+    struct allocator* alloc, struct allocator* tmp_alloc, bool_t update_bounds)
+{
+    struct cmp_model* mdl = cmp_getinstancedata(model_hdl);
+    strcpy(mdl->filepath, filepath);
+    if (!update_bounds)
+        BIT_ADD(mdl->flags, CMP_MODELFLAG_NOBOUNDUPDATE);
+    BIT_ADD(mdl->flags, CMP_MODELFLAG_ISLOD);
+    return cmp_model_modify(obj, alloc, tmp_alloc, mdl, model_hdl);
+}
+
 result_t cmp_lodmodel_modify_shadows(struct cmp_obj* obj, struct allocator* alloc,
     struct allocator* tmp_alloc, void* data, cmphandle_t cur_hdl)
 {

src/engine/components/cmp-lodmodel.h

 
 /* used by scene-mgr */
 bool_t cmp_lodmodel_applylod(cmphandle_t lodmdl_hdl, const struct vec3f* campos);
+bool_t cmp_lodmodel_applylod_shadow(cmphandle_t lodmdl_hdl, const struct vec3f* campos);
 
 #ifdef __cplusplus
 }

src/engine/components/cmp-model.c

 {
 	struct cmp_model* m = data;
 	m->model_hdl = INVALID_HANDLE;
-    if (host_obj != NULL)
+    if (host_obj != NULL)   {
 	    host_obj->model_cmp = hdl;
+        host_obj->model_shadow_cmp = hdl;
+    }
 	return RET_OK;
 }
 
 void cmp_model_destroy(struct cmp_obj* host_obj, void* data, cmphandle_t hdl)
 {
 	cmp_model_destroydata(host_obj, data, hdl, TRUE);
-    if (host_obj != NULL)
+    if (host_obj != NULL)   {
 	    host_obj->model_cmp = INVALID_HANDLE;
+        host_obj->model_shadow_cmp = INVALID_HANDLE;
+    }
 }
 
 /* update node xforms */

src/engine/gfx-occ.c

     uint32 prev_shaderid;
     gfx_sampler sampl_point;
 
+#if defined(_OCCDEMO_)
+    fl32* zbuff_ext;  /* for drawing occludees */
+    gfx_texture tex_ext; 
+#endif
+
     struct mat4f viewport;
     struct mat4f viewprojvp;  /* world to viewport matrix */
 
         err_printn(__FILE__, __LINE__, RET_OUTOFMEMORY);
         return RET_OUTOFMEMORY;
     }
+#if defined(_OCCDEMO_)
+    g_occ.zbuff_ext = ALIGNED_ALLOC(size*sizeof(fl32), MID_GFX);
+    if (g_occ.zbuff_ext == NULL)    {
+        err_printn(__FILE__, __LINE__, RET_OUTOFMEMORY);
+        return RET_OUTOFMEMORY;
+    }
+#endif
 
     /* create preview buffers/shaders for dev-mode */
     if (BIT_CHECK(eng_get_params()->flags, ENG_FLAG_DEV))   {
             {GFX_INPUTELEMENT_ID_POSITION, "vsi_pos"},
             {GFX_INPUTELEMENT_ID_TEXCOORD0, "vsi_coord"}
         };
+
+#if defined(_OCCDEMO_)
+        const struct gfx_shader_define defines[] = {"_EXTRA_", "1"};
+        uint32 define_cnt = 1;
+#else
+        const struct gfx_shader_define* defines = NULL;
+        uint32 define_cnt = 0;
+#endif
         g_occ.prev_shaderid = gfx_shader_load("occ-prev", eng_get_lsralloc(), 
-            "shaders/fsq.vs", "shaders/occ-prev.ps", NULL, bindings, 2, NULL, 0, NULL);
+            "shaders/fsq.vs", "shaders/occ-prev.ps", NULL, bindings, 2, defines, define_cnt, NULL);
         if (g_occ.prev_shaderid == 0)   {
             err_print(__FILE__, __LINE__, "occ-init failed: could not load preview shader");
             return RET_FAIL;
 {
     if (g_occ.zbuff != NULL)
         ALIGNED_FREE(g_occ.zbuff);
+#if defined(_OCCDEMO_)
+    if (g_occ.zbuff_ext != NULL)
+        ALIGNED_FREE(g_occ.zbuff_ext);
+#endif
     if (g_occ.prev_shaderid != 0)   
         gfx_shader_unload(g_occ.prev_shaderid);
     if (g_occ.sampl_point != NULL)
     if (g_occ.prev_rt == NULL)
         return RET_FAIL;
 
+#if defined(_OCCDEMO_)
+    g_occ.tex_ext = gfx_create_texture(GFX_TEXTURE_2D, width, height, 1, GFX_FORMAT_R32_FLOAT, 
+        1, 1, data.size, &data, GFX_MEMHINT_DYNAMIC);
+    if (g_occ.tex_ext == NULL)
+        return RET_FAIL;
+#endif
+
     return RET_OK;
 }
 
         gfx_destroy_texture(g_occ.prev_tex);
     if (g_occ.tex != NULL)
         gfx_destroy_texture(g_occ.tex);
+#if defined(_OCCDEMO_)
+    if (g_occ.tex_ext != NULL)
+        gfx_destroy_texture(g_occ.tex_ext);
+#endif
+
 }
 
 void gfx_occ_setmatrices(const struct mat4f* viewproj)
 {
     ASSERT(g_occ.zbuff != NULL);
     occ_clearzbuff(g_occ.zbuff, g_occ.zbuff_width*g_occ.zbuff_height);
+#if defined(_OCCDEMO_)
+    occ_clearzbuff(g_occ.zbuff_ext, g_occ.zbuff_width*g_occ.zbuff_height);
+#endif
     memset(&g_occ.stats, 0x00, sizeof(struct gfx_occ_stats));
+
 }
 
 void occ_clearzbuff(fl32* zbuff, int32 pixel_cnt)
 
     A_ALIGNED_FREE(tmp_alloc, verts);
     g_occ.stats.occ_obj_cnt ++;
+    g_occ.stats.occ_tri_cnt += occ->tri_cnt;
 }
 
 /* reference: http://www.opengl.org/wiki/Vertex_Transformation */
     if (v0->w > 1.0f || v1->w > 1.0f || v2->w > 1.0f)
         return;
 
-    g_occ.stats.occ_tri_cnt ++;
-
     /* extract the stuff we need from the triangle */
     struct vec2i vs[3];
     simd_t z0 = _mm_set1_ps(1.0f - v0->z);
 
     /* update texture data */
     gfx_texture_update(cmdqueue, g_occ.tex, g_occ.zbuff);
+#if defined(_OCCDEMO_)
+    gfx_texture_update(cmdqueue, g_occ.tex_ext, g_occ.zbuff_ext);
+#endif
 
     /* render depth preview */
     gfx_output_setrendertarget(cmdqueue, g_occ.prev_rt);
     gfx_shader_bindconstants(cmdqueue, shader);
     gfx_shader_bindsamplertexture(cmdqueue, shader, SHADER_NAME(s_depth), g_occ.sampl_point, 
         g_occ.tex);
+#if defined(_OCCDEMO_)
+    gfx_shader_bindsamplertexture(cmdqueue, shader, SHADER_NAME(s_depth_ext), g_occ.sampl_point, 
+        g_occ.tex_ext);
+#endif
     gfx_draw_fullscreenquad();
 }
 
     struct vec3f tmp;
     fl32 sum = 0.0f;
 
+    g_occ.stats.test_obj_cnt ++;
+    g_occ.stats.test_tri_cnt += 2;
+
     vec3_sub(&pts[0], center, vec3_add(&tmp, xaxis, yaxis));
     vec3_add(&pts[1], center, vec3_sub(&tmp, yaxis, xaxis));
     vec3_add(&pts[2], center, vec3_add(&tmp, xaxis, yaxis));
 
     occ_transform_verts_noworld(pts, pts, 4, &g_occ.viewprojvp);
     sum += occ_testtri(&pts[0], &pts[1], &pts[2]);
+#if !defined(_OCCDEMO_)
     if (sum > OCC_THRESHOLD)
         return TRUE;
+#endif
     sum += occ_testtri(&pts[2], &pts[3], &pts[0]);
-#if 0
-    /* for debugging */
-    occ_drawtri(&pts[0], &pts[1], &pts[2]);
-    occ_drawtri(&pts[2], &pts[3], &pts[0]);
-#endif
     return sum > OCC_THRESHOLD;
 }
 
 fl32 occ_testtri(const struct vec3f* v0, const struct vec3f* v1, const struct vec3f* v2)
 {
     fl32* buff = g_occ.zbuff;
+#if defined(_OCCDEMO_)
+    fl32* buff_ext = g_occ.zbuff_ext;
+#endif
     int32 w = g_occ.zbuff_width;
     int32 h = g_occ.zbuff_height;
 
             depth = _mm_add_ps(depth, _mm_mul_ps(_mm_cvtepi32_ps(w1), z1));
             depth = _mm_add_ps(depth, _mm_mul_ps(_mm_cvtepi32_ps(w2), z2));
 
-            /* write to buffer (with the help of masks
+            /* count zbuffer writes
              * if (mask[lane] == 0 AND prev_depth > depth) then pixel will not be written */
             simd_t prev_depth = _mm_load_ps(&buff[x_idx]);
             simd_t depth_mask = _mm_cmplt_ps(depth, prev_depth);
             test = _mm_hadd_ps(test, test);
             test = _mm_hadd_ps(test, test);
             cnt4 = _mm_add_ss(cnt4, test);
+
+#if defined(_OCCDEMO_)
+            /* demo only: write to secondry buffer for preview */
+            _mm_store_ps(&buff_ext[x_idx], depth);
+#endif
         }
 
         w0_row = _mm_add_epi32(w0_row, _mm_load_si128((simd4i_t*)e12[1].n));
         w2_row = _mm_add_epi32(w2_row, _mm_load_si128((simd4i_t*)e01[1].n));
         _mm_store_ss(&cnt, cnt4);
 
+#if !defined(_OCCDEMO_)
         /* early exit (have to do more tests in terms of performance) */
         if (cnt > OCC_THRESHOLD)
             return cnt;
+#endif
     }
 
     return cnt;
     return r;
 }
 
+/**
+ * batching algorithm:
+ * data:
+ * batch(shader_id #1) --> batch_node(unique_id #1)/subidx --> linked_list(instances)
+ *                      batch_node(unique_id #2)/subidx --> linked_list(instances)
+ * batch(shader_id #2) --> batch_node(unique_id #1)/subidx --> linked_list(instances)
+ *                      batch_node(unique_id #2)/subidx --> linked_list(instances)
+ * method: incoming item ...
+ *   1) first we search look in shader table, search for shader_id, if not found, create new batch
+ *   2) look in batch's unique_id table, if not found, create a new empty batch_node (see data), else ...
+ *   3) add new batch_node to batch_item's nodes
+ *   4) if the batch_node exists, check if we reach instance_cnt limit in the linked_list of nodes
+ *   5) if there is no room for more items, add new batch_node to the linked_list of the first batch node
+ */
 void gfx_renderpass_additem_tosubpass(struct allocator* alloc, 
     struct gfx_renderpass_sub* rpdata,
     enum cmp_obj_type objtype, 
     struct hashtable_item* subitem = hashtable_chained_find(&bitem->uid_table, unique_id);
     struct gfx_batch_node* bnode;
     if (subitem != NULL)    {
-        bnode = (struct gfx_batch_node*)subitem->value;
-        /* if we are out of max-instances limit, add another batch to the linked-list of current 
-         * also add it to linked-list of the first batch node for recursing  */
+        struct gfx_batch_node* bnode_first = (struct gfx_batch_node*)subitem->value;
+        /* the first bnode in the list, is always the last node that we added, so ...
+         * check for instance_cnt of the last_node and see if we need more bnodes */
+        bnode = (bnode_first->bll != NULL) ? bnode_first->bll->data : bnode_first;
+
         if (bnode->instance_cnt >= GFX_INSTANCES_MAX)   {
-            struct gfx_batch_node* bnode_first = (struct gfx_batch_node*)subitem->value;
         	struct gfx_batch_node* bnode_new = 
                 A_ALLOC(alloc, sizeof(struct gfx_batch_node), MID_GFX);
         	ASSERT(bnode_new);
             gfx_batch_initnode(alloc, bnode_new, unique_id, sub_idx, ritem);
             list_add(&bnode_first->bll, &bnode_new->lnode, bnode_new);
-            subitem->value = (uptr_t)bnode_new;
             bnode = bnode_new;
         }
     }   else    {

src/engine/renderpaths/gfx-csm.c

 	gfx_texture prev_tex[CSM_CASCADE_CNT];
     uint32 shader_cnt;
     struct csm_shader shaders[CSM_SHADER_CNT];
+    struct vec4f cascade_planes[CSM_SHADER_CNT*4];  /* 4 planes for each cascade instead of 6 */
     uint32 prev_shader;
     struct gfx_cblock* cb_frame;
     struct gfx_cblock* cb_xforms;
+    struct gfx_cblock* cb_frame_gs;
     gfx_rasterstate rs_bias;
     gfx_rasterstate rs_bias_doublesided;
     gfx_depthstencilstate ds_depth;
 			err_print(__FILE__, __LINE__, "gfx-csm init failed: could not create prev buffers");
 			return RET_FAIL;
 		}
+
         /* console commands */
         con_register_cmd("gfx_debugcsm", csm_console_debugcsm, NULL, "gfx_debugcsm [1*/0]");
 	}
         gfx_shader_get(g_csm->shaders[0].shader_id), "cb_frame");
     g_csm->cb_xforms = gfx_shader_create_cblock(lsr_alloc, tmp_alloc, 
         gfx_shader_get(g_csm->shaders[0].shader_id), "cb_xforms");
-    if (g_csm->cb_frame == NULL || g_csm->cb_xforms == NULL)    {
+    g_csm->cb_frame_gs = gfx_shader_create_cblock(lsr_alloc, tmp_alloc,
+        gfx_shader_get(g_csm->shaders[0].shader_id), "cb_frame_gs");
+    if (g_csm->cb_frame == NULL || g_csm->cb_xforms == NULL || g_csm->cb_frame_gs == NULL)    {
         err_print(__FILE__, __LINE__, "gfx-csm init failed: could not create cblocks");
         return RET_FAIL;
     }
             gfx_shader_destroy_cblock(g_csm->cb_frame);
         if (g_csm->cb_xforms != NULL)
             gfx_shader_destroy_cblock(g_csm->cb_xforms);
+        if (g_csm->cb_frame_gs != NULL)
+            gfx_shader_destroy_cblock(g_csm->cb_frame_gs);
 
         csm_unload_prev_shaders();
         csm_unload_shaders();
     gfx_output_clearrendertarget(cmdqueue, g_csm->shadow_rt, NULL, 1.0f, 0, GFX_CLEAR_DEPTH);
 
     struct gfx_cblock* cb_frame = g_csm->cb_frame;
+    struct gfx_cblock* cb_frame_gs = g_csm->cb_frame_gs;
 
     gfx_cb_set4mv(cb_frame, SHADER_NAME(c_cascade_mats), g_csm->cascade_vps, CSM_CASCADE_CNT);
     gfx_shader_updatecblock(cmdqueue, cb_frame);
 
+    gfx_cb_set4fv(cb_frame_gs, SHADER_NAME(c_cascade_planes), g_csm->cascade_planes, 
+        4*CSM_CASCADE_CNT);
+    gfx_shader_updatecblock(cmdqueue, cb_frame_gs);
+
     for (uint32 i = 0; i < batch_cnt; i++)  {
         struct gfx_batch_item* bitem = &batch_items[i];
         struct gfx_shader* shader = gfx_shader_get(bitem->shader_id);
     struct gfx_model_geo* geo = &gmodel->geos[mesh->geo_id];
     gfx_sampler sampler = gfx_get_globalsampler();
 
-    const struct gfx_cblock* cblocks[] = {g_csm->cb_frame, g_csm->cb_xforms};
-    gfx_shader_bindcblocks(cmdqueue, shader, cblocks, 2);
+    const struct gfx_cblock* cblocks[] = {g_csm->cb_frame, g_csm->cb_xforms, g_csm->cb_frame_gs};
+    gfx_shader_bindcblocks(cmdqueue, shader, cblocks, 3);
 
     /* set diffuse texture for alpha-test */
     if (bnode->sub_idx != INVALID_INDEX)    {
         gfx_destroy_sampler(g_csm->sampl_linear);
 }
 
+void csm_calc_cascadeplanes(struct vec4f* planes, const struct plane vp_planes[6], 
+    const struct mat4f* cmat)
+{
+    /* near plane */
+    const struct plane* p;
+    struct vec4f pv;
+    struct mat4f mt;
+
+    mat4_inv(&mt, cmat);
+    mat4_transpose_self(&mt);
+
+    /* right plane */
+    p = &vp_planes[CAM_FRUSTUM_RIGHT];
+    vec4_setf(&pv, p->nx, p->ny, p->nz, p->d);
+    vec4_transform(&planes[0], &pv, &mt);
+
+    /* left plane */
+    p = &vp_planes[CAM_FRUSTUM_LEFT];
+    vec4_setf(&pv, p->nx, p->ny, p->nz, p->d);
+    vec4_transform(&planes[1], &pv, &mt);
+
+    /* top plane */
+    p = &vp_planes[CAM_FRUSTUM_TOP];
+    vec4_setf(&pv, p->nx, p->ny, p->nz, p->d);
+    vec4_transform(&planes[2], &pv, &mt);
+
+    /* bottom plane */
+    p = &vp_planes[CAM_FRUSTUM_BOTTOM];
+    vec4_setf(&pv, p->nx, p->ny, p->nz, p->d);
+    vec4_transform(&planes[3], &pv, &mt);
+}
+
 void gfx_csm_prepare(const struct gfx_view_params* params, const struct vec3f* light_dir, 
     const struct aabb* world_bounds)
 {
 
     /* calculate cascades */
     struct frustum f;   /* frustum points for cascades */
+    struct plane vp_planes[6];
 
     for (uint32 i = 0; i < CSM_CASCADE_CNT; i++)    {
         cam_calc_frustumcorners(params->cam, (struct vec3f*)f.points, &splits[i], &splits[i+1]);
         csm_calc_minsphere(&g_csm->cascades[i].bounds, &f, &params->view, &view_inv);
-        memcpy(&g_csm->cascade_frusts[i], &f, sizeof(f));
+        memcpy(&g_csm->cascade_frusts[i], &f, sizeof(f));        
 
         /* cascade matrixes: first we find two extreme points of the world, related to cascade */
         struct vec3f scenter;
 
         /* calculate final matrix */
         mat3_mul4(&g_csm->cascade_vps[i], &g_csm->cascades[i].view, &g_csm->cascades[i].proj);
+
+        cam_calc_frustumplanes(vp_planes, &g_csm->cascade_vps[i]);
+        csm_calc_cascadeplanes(&g_csm->cascade_planes[i*4], vp_planes, &g_csm->cascade_vps[i]);
+
         csm_round_mat(&g_csm->cascade_vps[i], &g_csm->cascade_vps[i], g_csm->shadowmap_size);
 
         mat4_mul(&g_csm->shadow_mats[i], 

src/engine/scene-mgr.c

     struct array* models, const struct gfx_view_params* params, out uint32* obj_idx);
 uint32 scene_add_light(struct cmp_obj* obj, uint32 bounds_idx, uint32 item_idx, struct array* mats, 
     struct array* lights, const struct gfx_view_params* params, out uint32* obj_idx);
+uint32 scene_add_model_shadow(struct cmp_obj* obj, uint32 bounds_idx, uint32 item_idx, 
+    struct array* mats, struct array* models, const struct gfx_view_params* params, 
+        out uint32* obj_idx);
 
 struct scn_render_model* scene_create_rendermodels(struct allocator* alloc, struct array* models,
     struct mat3f* mats, struct sphere* bounds, uint32 item_offset, out uint32* pcnt);
     for (uint32 i = 0; i < spatial_culled_cnt; i++) {
         if (culls[i]) {
             struct cmp_obj* obj = spatial_culled_objs[i];
-            item_idx += scene_add_model(obj, i, item_idx, &tmp_mats, &tmp_models, params, &obj_idx);
+            item_idx += scene_add_model_shadow(obj, i, item_idx, &tmp_mats, &tmp_models, params, 
+                &obj_idx);
         }  /* endif: not culled */ 
     }
 
     return gmodel->renderable_cnt;
 }
 
+uint32 scene_add_model_shadow(struct cmp_obj* obj, uint32 bounds_idx, uint32 item_idx, 
+    struct array* mats, struct array* models, const struct gfx_view_params* params, 
+    out uint32* obj_idx)
+{
+    bool_t vis = TRUE;
+    struct cmp_model* m = cmp_getinstancedata(obj->model_shadow_cmp);
+
+    /* apply LOD if model is owned by LOD component */
+    if (BIT_CHECK(m->flags, CMP_MODELFLAG_ISLOD))   {
+        vis = cmp_lodmodel_applylod_shadow(cmp_findinstance(obj->chain, cmp_lodmodel_type),
+            &params->cam_pos);
+        if (!vis)
+            return 0;
+        /* refetch model, because it may be changed by LOD */
+        m = cmp_getinstancedata(obj->model_shadow_cmp);
+    }
+
+    struct gfx_model* gmodel = rs_get_model(m->model_hdl);
+    ASSERT(gmodel != NULL);
+
+    for (uint32 i = 0, cnt = gmodel->renderable_cnt; i < cnt; i++)  {
+        struct scn_render_model* rmodel = arr_add(models);
+        struct mat3f* rmat = arr_add(mats);
+        if (rmodel == NULL || rmat == NULL)
+            return 0;
+
+        uint32 node_idx = gmodel->renderable_idxs[i];
+
+        /* render-model */
+        rmodel->model_hdl = obj->model_shadow_cmp;
+        rmodel->sun_shadows = !m->exclude_shadows;
+        rmodel->gmodel = gmodel;
+        rmodel->inst = m->model_inst;
+        rmodel->mat_idx = item_idx + i;
+        rmodel->bounds_idx = bounds_idx;
+        rmodel->node_idx = node_idx;
+
+        /* world-space transform matrix */
+        struct cmp_xform* xf = cmp_getinstancedata(m->xforms[node_idx]);
+        mat3_setm(rmat, &xf->ws_mat);
+    }
+
+    (*obj_idx) ++;
+
+    return gmodel->renderable_cnt;
+}
+
 
 void scn_setcam(struct camera* cam)
 {

src/vs2010/engine.vcxproj

       <DisableLanguageExtensions>false</DisableLanguageExtensions>
       <RuntimeTypeInfo>false</RuntimeTypeInfo>
       <CompileAs>CompileAsC</CompileAs>
-      <PreprocessorDefinitions>_ENGINE_EXPORT_;_SIMD_SSE_;_WIN_;FULL_VERSION="0.3.0";_PROFILE_;_D3D_;_ENABLEASSERT_;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>_ENGINE_EXPORT_;_SIMD_SSE_;_WIN_;FULL_VERSION="0.3.0";_PROFILE_;_ENABLEASSERT_;_D3D_;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <DisableCilkKeywords>true</DisableCilkKeywords>
       <DisableIntelLangExtensions>true</DisableIntelLangExtensions>
       <C99Support>true</C99Support>

test-data/subclips.json

+{
+	"clips": [
+		{
+			"name": "walk",
+			"start": 0
+			"end": -1
+		},
+		{
+			"name": "talk",
+			"start", 100
+			"end": -1
+		}
+	]
+}

test-data/test3.lua

 	"test-data/barrel.h3dm",
 	"test-data/wagon.h3dm"}
 
-for i = 1, 50 do
-	midx = core.randRangeInt(1, 4)
+	points = {
+	{x=40, z=40},
+	{x=-40, z=40},
+	{x=-40, z=-40},
+	{x=40, z=-40}
+}
+
+for i = 1, 4 do
+	name = "house" .. i
+	obj = s:createModel(name, models[1])
+	yaw = core.randRangeFloat(0, 360)
+	obj:move(points[i].x, 0, points[i].z)
+end
+
+for i = 1, 200 do
+	midx = core.randRangeInt(2, 4)
 	name = "obj" .. i
 	obj = s:createModel(name, models[midx])
 	x = core.randRangeFloat(-40, 40)
 	obj:move(x, 0, z)
 	obj:rotate(0, yaw, 0)
 end
---[[
-for i=0, 3 do
+
+for i=1, 2 do
 	name = "statue" .. i
-	obj = s:createModel(name, "test-data/statue.h3dm")
+	obj = s:createModelLod(name, "test-data/statue.h3dm", 
+	"test-data/statue-md.h3dm", "test-data/statue-lo.h3dm")
 
-	x = core.randf_range(-40, 40)
-	z = core.randf_range(-40, 40)
-	yaw = core.randf_range(0, 360)
+	x = core.randRangeFloat(-40, 40)
+	z = core.randRangeFloat(-40, 40)
+	yaw = core.randRangeFloat(0, 360)
 	
 	obj:move(x, 0, z)
 	obj:rotate(0, yaw, 0)
 end
---]]
+
 eng.setSunDir(1, -0.8, 0)
+eng.setSunIntensity(1)

tools/h3dimport-gui.py

                 "-fps", str(self.edit_fps.text())]
         if g_verbosemode:
             args.extend(["-v"])
-        print args
         QtGui.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.WaitCursor))
         r = subprocess.call(args)
         QtGui.QApplication.restoreOverrideCursor()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.