BC4BC5.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548
  1. //-------------------------------------------------------------------------------------
  2. // BC4BC5.cpp
  3. //
  4. // Block-compression (BC) functionality for BC4 and BC5 (DirectX 10 texture compression)
  5. //
  6. // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
  7. // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  8. // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
  9. // PARTICULAR PURPOSE.
  10. //
  11. // Copyright (c) Microsoft Corporation. All rights reserved.
  12. //
  13. // http://go.microsoft.com/fwlink/?LinkId=248926
  14. //-------------------------------------------------------------------------------------
  15. #include "directxtexp.h"
  16. #include "BC.h"
  17. namespace DirectX
  18. {
  19. //------------------------------------------------------------------------------------
  20. // Constants
  21. //------------------------------------------------------------------------------------
  22. // Because these are used in SAL annotations, they need to remain macros rather than const values
  23. #define BLOCK_LEN 4
  24. // length of each block in texel
  25. #define BLOCK_SIZE (BLOCK_LEN * BLOCK_LEN)
  26. // total texels in a 4x4 block.
  27. //------------------------------------------------------------------------------------
  28. // Structures
  29. //-------------------------------------------------------------------------------------
  30. #pragma warning(push)
  31. #pragma warning(disable : 4201)
  32. // BC4U/BC5U
  33. struct BC4_UNORM
  34. {
  35. float R(size_t uOffset) const
  36. {
  37. size_t uIndex = GetIndex(uOffset);
  38. return DecodeFromIndex(uIndex);
  39. }
  40. float DecodeFromIndex(size_t uIndex) const
  41. {
  42. if (uIndex == 0)
  43. return red_0 / 255.0f;
  44. if (uIndex == 1)
  45. return red_1 / 255.0f;
  46. float fred_0 = red_0 / 255.0f;
  47. float fred_1 = red_1 / 255.0f;
  48. if (red_0 > red_1)
  49. {
  50. uIndex -= 1;
  51. return (fred_0 * (7-uIndex) + fred_1 * uIndex) / 7.0f;
  52. }
  53. else
  54. {
  55. if (uIndex == 6)
  56. return 0.0f;
  57. if (uIndex == 7)
  58. return 1.0f;
  59. uIndex -= 1;
  60. return (fred_0 * (5-uIndex) + fred_1 * uIndex) / 5.0f;
  61. }
  62. }
  63. size_t GetIndex(size_t uOffset) const
  64. {
  65. return (size_t) ((data >> (3*uOffset + 16)) & 0x07);
  66. }
  67. void SetIndex(size_t uOffset, size_t uIndex)
  68. {
  69. data &= ~((uint64_t) 0x07 << (3*uOffset + 16));
  70. data |= ((uint64_t) uIndex << (3*uOffset + 16));
  71. }
  72. union
  73. {
  74. struct
  75. {
  76. uint8_t red_0;
  77. uint8_t red_1;
  78. uint8_t indices[6];
  79. };
  80. uint64_t data;
  81. };
  82. };
  83. // BC4S/BC5S
  84. struct BC4_SNORM
  85. {
  86. float R(size_t uOffset) const
  87. {
  88. size_t uIndex = GetIndex(uOffset);
  89. return DecodeFromIndex(uIndex);
  90. }
  91. float DecodeFromIndex(size_t uIndex) const
  92. {
  93. int8_t sred_0 = (red_0 == -128)? -127 : red_0;
  94. int8_t sred_1 = (red_1 == -128)? -127 : red_1;
  95. if (uIndex == 0)
  96. return sred_0 / 127.0f;
  97. if (uIndex == 1)
  98. return sred_1 / 127.0f;
  99. float fred_0 = sred_0 / 127.0f;
  100. float fred_1 = sred_1 / 127.0f;
  101. if (red_0 > red_1)
  102. {
  103. uIndex -= 1;
  104. return (fred_0 * (7-uIndex) + fred_1 * uIndex) / 7.0f;
  105. }
  106. else
  107. {
  108. if (uIndex == 6)
  109. return -1.0f;
  110. if (uIndex == 7)
  111. return 1.0f;
  112. uIndex -= 1;
  113. return (fred_0 * (5-uIndex) + fred_1 * uIndex) / 5.0f;
  114. }
  115. }
  116. size_t GetIndex(size_t uOffset) const
  117. {
  118. return (size_t) ((data >> (3*uOffset + 16)) & 0x07);
  119. }
  120. void SetIndex(size_t uOffset, size_t uIndex)
  121. {
  122. data &= ~((uint64_t) 0x07 << (3*uOffset + 16));
  123. data |= ((uint64_t) uIndex << (3*uOffset + 16));
  124. }
  125. union
  126. {
  127. struct
  128. {
  129. int8_t red_0;
  130. int8_t red_1;
  131. uint8_t indices[6];
  132. };
  133. uint64_t data;
  134. };
  135. };
  136. #pragma warning(pop)
  137. //-------------------------------------------------------------------------------------
  138. // Convert a floating point value to an 8-bit SNORM
  139. //-------------------------------------------------------------------------------------
  140. static void inline FloatToSNorm( _In_ float fVal, _Out_ int8_t *piSNorm )
  141. {
  142. const uint32_t dwMostNeg = ( 1 << ( 8 * sizeof( int8_t ) - 1 ) );
  143. if( _isnan( fVal ) )
  144. fVal = 0;
  145. else
  146. if( fVal > 1 )
  147. fVal = 1; // Clamp to 1
  148. else
  149. if( fVal < -1 )
  150. fVal = -1; // Clamp to -1
  151. fVal = fVal * (int8_t) ( dwMostNeg - 1 );
  152. if( fVal >= 0 )
  153. fVal += .5f;
  154. else
  155. fVal -= .5f;
  156. *piSNorm = (int8_t) (fVal);
  157. }
  158. //------------------------------------------------------------------------------
  159. static void FindEndPointsBC4U( _In_reads_(BLOCK_SIZE) const float theTexelsU[], _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1)
  160. {
  161. // The boundary of codec for signed/unsigned format
  162. float MIN_NORM;
  163. float MAX_NORM = 1.0f;
  164. int8_t iStart, iEnd;
  165. size_t i;
  166. MIN_NORM = 0.0f;
  167. // Find max/min of input texels
  168. float fBlockMax = theTexelsU[0];
  169. float fBlockMin = theTexelsU[0];
  170. for (i = 0; i < BLOCK_SIZE; ++i)
  171. {
  172. if (theTexelsU[i]<fBlockMin)
  173. {
  174. fBlockMin = theTexelsU[i];
  175. }
  176. else if (theTexelsU[i]>fBlockMax)
  177. {
  178. fBlockMax = theTexelsU[i];
  179. }
  180. }
  181. // If there are boundary values in input texels, Should use 4 block-codec to guarantee
  182. // the exact code of the boundary values.
  183. bool bUsing4BlockCodec = ( MIN_NORM == fBlockMin || MAX_NORM == fBlockMax );
  184. // Using Optimize
  185. float fStart, fEnd;
  186. if (!bUsing4BlockCodec)
  187. {
  188. OptimizeAlpha<false>(&fStart, &fEnd, theTexelsU, 8);
  189. iStart = (uint8_t) (fStart * 255.0f);
  190. iEnd = (uint8_t) (fEnd * 255.0f);
  191. endpointU_0 = iEnd;
  192. endpointU_1 = iStart;
  193. }
  194. else
  195. {
  196. OptimizeAlpha<false>(&fStart, &fEnd, theTexelsU, 6);
  197. iStart = (uint8_t) (fStart * 255.0f);
  198. iEnd = (uint8_t) (fEnd * 255.0f);
  199. endpointU_1 = iEnd;
  200. endpointU_0 = iStart;
  201. }
  202. }
  203. static void FindEndPointsBC4S(_In_reads_(BLOCK_SIZE) const float theTexelsU[], _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1)
  204. {
  205. // The boundary of codec for signed/unsigned format
  206. float MIN_NORM;
  207. float MAX_NORM = 1.0f;
  208. int8_t iStart, iEnd;
  209. size_t i;
  210. MIN_NORM = -1.0f;
  211. // Find max/min of input texels
  212. float fBlockMax = theTexelsU[0];
  213. float fBlockMin = theTexelsU[0];
  214. for (i = 0; i < BLOCK_SIZE; ++i)
  215. {
  216. if (theTexelsU[i]<fBlockMin)
  217. {
  218. fBlockMin = theTexelsU[i];
  219. }
  220. else if (theTexelsU[i]>fBlockMax)
  221. {
  222. fBlockMax = theTexelsU[i];
  223. }
  224. }
  225. // If there are boundary values in input texels, Should use 4 block-codec to guarantee
  226. // the exact code of the boundary values.
  227. bool bUsing4BlockCodec = ( MIN_NORM == fBlockMin || MAX_NORM == fBlockMax );
  228. // Using Optimize
  229. float fStart, fEnd;
  230. if (!bUsing4BlockCodec)
  231. {
  232. OptimizeAlpha<true>(&fStart, &fEnd, theTexelsU, 8);
  233. FloatToSNorm(fStart, &iStart);
  234. FloatToSNorm(fEnd, &iEnd);
  235. endpointU_0 = iEnd;
  236. endpointU_1 = iStart;
  237. }
  238. else
  239. {
  240. OptimizeAlpha<true>(&fStart, &fEnd, theTexelsU, 6);
  241. FloatToSNorm(fStart, &iStart);
  242. FloatToSNorm(fEnd, &iEnd);
  243. endpointU_1 = iEnd;
  244. endpointU_0 = iStart;
  245. }
  246. }
  247. //------------------------------------------------------------------------------
  248. static inline void FindEndPointsBC5U( _In_reads_(BLOCK_SIZE) const float theTexelsU[], _In_reads_(BLOCK_SIZE) const float theTexelsV[],
  249. _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1, _Out_ uint8_t &endpointV_0, _Out_ uint8_t &endpointV_1)
  250. {
  251. //Encoding the U and V channel by BC4 codec separately.
  252. FindEndPointsBC4U( theTexelsU, endpointU_0, endpointU_1);
  253. FindEndPointsBC4U( theTexelsV, endpointV_0, endpointV_1);
  254. }
  255. static inline void FindEndPointsBC5S( _In_reads_(BLOCK_SIZE) const float theTexelsU[], _In_reads_(BLOCK_SIZE) const float theTexelsV[],
  256. _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1, _Out_ int8_t &endpointV_0, _Out_ int8_t &endpointV_1)
  257. {
  258. //Encoding the U and V channel by BC4 codec separately.
  259. FindEndPointsBC4S( theTexelsU, endpointU_0, endpointU_1);
  260. FindEndPointsBC4S( theTexelsV, endpointV_0, endpointV_1);
  261. }
  262. //------------------------------------------------------------------------------
  263. static void FindClosestUNORM(_Inout_ BC4_UNORM* pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[])
  264. {
  265. float rGradient[8];
  266. int i;
  267. for (i = 0; i < 8; ++i)
  268. {
  269. rGradient[i] = pBC->DecodeFromIndex(i);
  270. }
  271. for (i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  272. {
  273. size_t uBestIndex = 0;
  274. float fBestDelta = 100000;
  275. for (size_t uIndex = 0; uIndex < 8; uIndex++)
  276. {
  277. float fCurrentDelta = fabsf(rGradient[uIndex]-theTexelsU[i]);
  278. if (fCurrentDelta < fBestDelta)
  279. {
  280. uBestIndex = uIndex;
  281. fBestDelta = fCurrentDelta;
  282. }
  283. }
  284. pBC->SetIndex(i, uBestIndex);
  285. }
  286. }
  287. static void FindClosestSNORM(_Inout_ BC4_SNORM* pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[])
  288. {
  289. float rGradient[8];
  290. int i;
  291. for (i = 0; i < 8; ++i)
  292. {
  293. rGradient[i] = pBC->DecodeFromIndex(i);
  294. }
  295. for (i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  296. {
  297. size_t uBestIndex = 0;
  298. float fBestDelta = 100000;
  299. for (size_t uIndex = 0; uIndex < 8; uIndex++)
  300. {
  301. float fCurrentDelta = fabsf(rGradient[uIndex]-theTexelsU[i]);
  302. if (fCurrentDelta < fBestDelta)
  303. {
  304. uBestIndex = uIndex;
  305. fBestDelta = fCurrentDelta;
  306. }
  307. }
  308. pBC->SetIndex(i, uBestIndex);
  309. }
  310. }
  311. //=====================================================================================
  312. // Entry points
  313. //=====================================================================================
  314. //-------------------------------------------------------------------------------------
  315. // BC4 Compression
  316. //-------------------------------------------------------------------------------------
  317. _Use_decl_annotations_
  318. void D3DXDecodeBC4U( XMVECTOR *pColor, const uint8_t *pBC )
  319. {
  320. assert( pColor && pBC );
  321. static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
  322. auto pBC4 = reinterpret_cast<const BC4_UNORM*>(pBC);
  323. for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  324. {
  325. #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
  326. pColor[i] = XMVectorSet( pBC4->R(i), 0, 0, 1.0f);
  327. }
  328. }
  329. _Use_decl_annotations_
  330. void D3DXDecodeBC4S(XMVECTOR *pColor, const uint8_t *pBC)
  331. {
  332. assert( pColor && pBC );
  333. static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
  334. auto pBC4 = reinterpret_cast<const BC4_SNORM*>(pBC);
  335. for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  336. {
  337. #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
  338. pColor[i] = XMVectorSet( pBC4->R(i), 0, 0, 1.0f);
  339. }
  340. }
  341. _Use_decl_annotations_
  342. void D3DXEncodeBC4U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
  343. {
  344. UNREFERENCED_PARAMETER( flags );
  345. assert( pBC && pColor );
  346. static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
  347. memset(pBC, 0, sizeof(BC4_UNORM));
  348. auto pBC4 = reinterpret_cast<BC4_UNORM*>(pBC);
  349. float theTexelsU[NUM_PIXELS_PER_BLOCK];
  350. for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  351. {
  352. theTexelsU[i] = XMVectorGetX( pColor[i] );
  353. }
  354. FindEndPointsBC4U(theTexelsU, pBC4->red_0, pBC4->red_1);
  355. FindClosestUNORM(pBC4, theTexelsU);
  356. }
  357. _Use_decl_annotations_
  358. void D3DXEncodeBC4S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
  359. {
  360. UNREFERENCED_PARAMETER( flags );
  361. assert( pBC && pColor );
  362. static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
  363. memset(pBC, 0, sizeof(BC4_UNORM));
  364. auto pBC4 = reinterpret_cast<BC4_SNORM*>(pBC);
  365. float theTexelsU[NUM_PIXELS_PER_BLOCK];
  366. for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  367. {
  368. theTexelsU[i] = XMVectorGetX( pColor[i] );
  369. }
  370. FindEndPointsBC4S(theTexelsU, pBC4->red_0, pBC4->red_1);
  371. FindClosestSNORM(pBC4, theTexelsU);
  372. }
  373. //-------------------------------------------------------------------------------------
  374. // BC5 Compression
  375. //-------------------------------------------------------------------------------------
  376. _Use_decl_annotations_
  377. void D3DXDecodeBC5U(XMVECTOR *pColor, const uint8_t *pBC)
  378. {
  379. assert( pColor && pBC );
  380. static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
  381. auto pBCR = reinterpret_cast<const BC4_UNORM*>(pBC);
  382. auto pBCG = reinterpret_cast<const BC4_UNORM*>(pBC+sizeof(BC4_UNORM));
  383. for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  384. {
  385. #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
  386. pColor[i] = XMVectorSet(pBCR->R(i), pBCG->R(i), 0, 1.0f);
  387. }
  388. }
  389. _Use_decl_annotations_
  390. void D3DXDecodeBC5S(XMVECTOR *pColor, const uint8_t *pBC)
  391. {
  392. assert( pColor && pBC );
  393. static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
  394. auto pBCR = reinterpret_cast<const BC4_SNORM*>(pBC);
  395. auto pBCG = reinterpret_cast<const BC4_SNORM*>(pBC+sizeof(BC4_SNORM));
  396. for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  397. {
  398. #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
  399. pColor[i] = XMVectorSet(pBCR->R(i), pBCG->R(i), 0, 1.0f);
  400. }
  401. }
  402. _Use_decl_annotations_
  403. void D3DXEncodeBC5U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
  404. {
  405. UNREFERENCED_PARAMETER( flags );
  406. assert( pBC && pColor );
  407. static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
  408. memset(pBC, 0, sizeof(BC4_UNORM)*2);
  409. auto pBCR = reinterpret_cast<BC4_UNORM*>(pBC);
  410. auto pBCG = reinterpret_cast<BC4_UNORM*>(pBC+sizeof(BC4_UNORM));
  411. float theTexelsU[NUM_PIXELS_PER_BLOCK];
  412. float theTexelsV[NUM_PIXELS_PER_BLOCK];
  413. for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  414. {
  415. XMFLOAT4A clr;
  416. XMStoreFloat4A( &clr, pColor[i] );
  417. theTexelsU[i] = clr.x;
  418. theTexelsV[i] = clr.y;
  419. }
  420. FindEndPointsBC5U(
  421. theTexelsU,
  422. theTexelsV,
  423. pBCR->red_0,
  424. pBCR->red_1,
  425. pBCG->red_0,
  426. pBCG->red_1);
  427. FindClosestUNORM(pBCR, theTexelsU);
  428. FindClosestUNORM(pBCG, theTexelsV);
  429. }
  430. _Use_decl_annotations_
  431. void D3DXEncodeBC5S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
  432. {
  433. UNREFERENCED_PARAMETER( flags );
  434. assert( pBC && pColor );
  435. static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
  436. memset(pBC, 0, sizeof(BC4_UNORM)*2);
  437. auto pBCR = reinterpret_cast<BC4_SNORM*>(pBC);
  438. auto pBCG = reinterpret_cast<BC4_SNORM*>(pBC+sizeof(BC4_SNORM));
  439. float theTexelsU[NUM_PIXELS_PER_BLOCK];
  440. float theTexelsV[NUM_PIXELS_PER_BLOCK];
  441. for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
  442. {
  443. XMFLOAT4A clr;
  444. XMStoreFloat4A( &clr, pColor[i] );
  445. theTexelsU[i] = clr.x;
  446. theTexelsV[i] = clr.y;
  447. }
  448. FindEndPointsBC5S(
  449. theTexelsU,
  450. theTexelsV,
  451. pBCR->red_0,
  452. pBCR->red_1,
  453. pBCG->red_0,
  454. pBCG->red_1);
  455. FindClosestSNORM(pBCR, theTexelsU);
  456. FindClosestSNORM(pBCG, theTexelsV);
  457. }
  458. } // namespace