Commits

spencercw committed 8cb8386

Optimise Direct3D 11 renderer by rendering directly into the texture buffer.

  • Participants
  • Parent commits f7c2590

Comments (0)

Files changed (8)

gb_emulator/include/gb_emulator/gb_video.h

 	 * \param gbc The \c gbc flag passed to draw().
 	 * \param unscaledBuffer The buffer to fill with the processed but unscaled image. This must be
 	 * \c WIDTH * \c HEIGHT in size.
-	 * \param scaledBuffer The buffer to fill with the processed and upscaled image. This must be
-	 * \c SCALED_WIDTH * \c SCALED_HEIGHT in size.
+	 * \param scaledBuffer The buffer to fill with the processed and upscaled image. This must be at
+	 * least \c SCALED_WIDTH * \c SCALED_HEIGHT in size.
+	 * \param stride The width of \c scaledBuffer. This must be at least \c SCALED_WIDTH.
 	 */
 	static void postProcess(const uint16_t *pixelBuffer, bool gbc, uint32_t *unscaledBuffer,
-		uint32_t *scaledBuffer);
+		uint32_t *scaledBuffer, unsigned stride = SCALED_WIDTH);
 
 private:
 	enum DrawType

gb_emulator/include/gb_emulator/gb_video_d3d11.h

 	// used.
 	bool gbcBound_;
 
-	// RGB pixel buffers. These is only used in software rendering mode.
+	// RGB pixel buffer. This is only used in software rendering mode.
 	boost::scoped_array<uint32_t> unscaledPixelBuffer_;
-	boost::scoped_array<uint32_t> scaledPixelBuffer_;
 
 	// Window message procedure
 	static LRESULT CALLBACK windowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam);

gb_emulator/src/gb_video.cpp

 }
 
 void GbVideo::postProcess(const uint16_t *pixelBuffer, bool gbc, uint32_t *unscaledBuffer,
-	uint32_t *scaledBuffer)
+	uint32_t *scaledBuffer, unsigned stride)
 {
 	// Convert the colour space to RGB
 	for (unsigned y = 0; y != HEIGHT; ++y)
 	}
 
 	// Upscale the image
-	hq4x_32(unscaledBuffer, scaledBuffer, WIDTH, HEIGHT);
+	hq4x_32(unscaledBuffer, scaledBuffer, WIDTH, HEIGHT, stride);
 }
 
 void GbVideo::load(const GbVideoData &data)

gb_emulator/src/gb_video_d3d11.cpp

 	if (renderer_ == GbConfig::RENDER_SOFTWARE)
 	{
 		unscaledPixelBuffer_.reset(new uint32_t[WIDTH * HEIGHT]);
-		scaledPixelBuffer_.reset(new uint32_t[SCALED_WIDTH * SCALED_HEIGHT]);
 	}
 
 	// Register the window class
 			}
 		}
 	}
-	else
-	{
-		// Perform post-processing in software
-		postProcess(pixelBuffer, gbc, unscaledPixelBuffer_.get(), scaledPixelBuffer_.get());
-	}
 
-	// Fill the texture to display
+	// Map the texture into our address space
 	D3D11_MAPPED_SUBRESOURCE resource;
 	HRESULT hr = deviceContext_->Map(texture_, 0, D3D11_MAP_WRITE_DISCARD, 0, &resource);
 	if (FAILED(hr))
 		throw runtime_error("Failed to map texture: " + lexical_cast<string>(hr));
 	}
 
-	// Copy the data into the texture. We have to do it line-by-line because the width of the pixel
-	// buffer does not match that of the texture.
 	if (renderer_ == GbConfig::RENDER_HARDWARE)
 	{
+		// Copy the data into the texture
 		uint16_t *buf = reinterpret_cast<uint16_t *>(resource.pData);
 		for (unsigned y = 0; y != HEIGHT; ++y)
 		{
 	}
 	else
 	{
+		// Perform post-processing in software
 		uint32_t *buf = reinterpret_cast<uint32_t *>(resource.pData);
-		for (unsigned y = 0; y != SCALED_HEIGHT; ++y)
-		{
-			memcpy(&buf[y * 1024], &scaledPixelBuffer_[y * SCALED_WIDTH],
-				SCALED_WIDTH * sizeof(uint32_t));
-		}
+		postProcess(pixelBuffer, gbc, unscaledPixelBuffer_.get(), buf, 1024);
 	}
+
 	deviceContext_->Unmap(texture_, 0);
 
 	// Select the vertex buffer

third_party/hqx/include/hqx.h

 #endif
 
 HQX_API void HQX_CALLCONV hqxInit(void);
-HQX_API void HQX_CALLCONV hq2x_32( uint32_t * src, uint32_t * dest, int width, int height );
-HQX_API void HQX_CALLCONV hq3x_32( uint32_t * src, uint32_t * dest, int width, int height );
-HQX_API void HQX_CALLCONV hq4x_32( uint32_t * src, uint32_t * dest, int width, int height );
+HQX_API void HQX_CALLCONV hq2x_32( uint32_t * src, uint32_t * dest, int width, int height, int stride );
+HQX_API void HQX_CALLCONV hq3x_32( uint32_t * src, uint32_t * dest, int width, int height, int stride );
+HQX_API void HQX_CALLCONV hq4x_32( uint32_t * src, uint32_t * dest, int width, int height, int stride );
 
 #if defined(__cplusplus)
 }

third_party/hqx/src/hq2x.c

 #define PIXEL11_90    Interp9(dp+dpL+1, w[5], w[6], w[8]);
 #define PIXEL11_100   Interp10(dp+dpL+1, w[5], w[6], w[8]);
 
-HQX_API void HQX_CALLCONV hq2x_32( uint32_t * sp, uint32_t * dp, int Xres, int Yres )
+HQX_API void HQX_CALLCONV hq2x_32( uint32_t * sp, uint32_t * dp, int Xres, int Yres, int dpL )
 {
     int  i, j, k;
     int  prevline, nextline;
     uint32_t  w[10];
-    int dpL = Xres * 2;
 
     //   +----+----+----+
     //   |    |    |    |
             sp++;
             dp += 2;
         }
-        dp += dpL;
+        dp -= Xres * 2;
+        dp += dpL * 2;
     }
 }

third_party/hqx/src/hq3x.c

 #define PIXEL22_5   Interp5(dp+dpL+dpL+2, w[6], w[8]);
 #define PIXEL22_C   *(dp+dpL+dpL+2) = w[5];
 
-HQX_API void HQX_CALLCONV hq3x_32( uint32_t * sp, uint32_t * dp, int Xres, int Yres )
+HQX_API void HQX_CALLCONV hq3x_32( uint32_t * sp, uint32_t * dp, int Xres, int Yres, int dpL )
 {
     int  i, j, k;
     int  prevline, nextline;
     uint32_t  w[10];
-    int dpL = Xres * 3;
 
     //   +----+----+----+
     //   |    |    |    |
             sp++;
             dp += 3;
         }
-        dp += (dpL * 2);
-    }
+        dp -= Xres * 3;
+        dp += dpL * 3;
+	}
 }

third_party/hqx/src/hq4x.c

 #define PIXEL33_81    Interp8(dp+dpL+dpL+dpL+3, w[5], w[6]);
 #define PIXEL33_82    Interp8(dp+dpL+dpL+dpL+3, w[5], w[8]);
 
-HQX_API void HQX_CALLCONV hq4x_32( uint32_t * sp, uint32_t * dp, int Xres, int Yres )
+HQX_API void HQX_CALLCONV hq4x_32( uint32_t * sp, uint32_t * dp, int Xres, int Yres, int dpL )
 {
     int  i, j, k;
     int  prevline, nextline;
     uint32_t w[10];
-    int dpL = Xres * 4;
 
     //   +----+----+----+
     //   |    |    |    |
             sp++;
             dp += 4;
         }
-        dp += (dpL * 3);
+        dp -= Xres * 4;
+        dp += dpL * 4;
     }
 }