Co-authored-by: sbird <sbird@no.tld> Co-authored-by: Hedara <hedara90@gmail.com> Co-authored-by: DizzyEggg <jajkodizzy@wp.pl>
1394 lines
45 KiB
C
1394 lines
45 KiB
C
#include "global.h"
|
|
#include "malloc.h"
|
|
#include "data.h"
|
|
#include "decompress.h"
|
|
#include "decompress_error_handler.h"
|
|
#include "pokemon.h"
|
|
#include "pokemon_sprite_visualizer.h"
|
|
#include "text.h"
|
|
#include "menu.h"
|
|
|
|
// === WARNING === WARNING === WARNING ===
|
|
// === No user serviceable code before ===
|
|
// === the SpecialPokePic function, do ===
|
|
// === not modify code unless magician ===
|
|
// === WARNING === WARNING === WARNING ===
|
|
|
|
static void SmolDecompressData(const struct SmolHeader *header, const u32 *data, void *dest);
|
|
static void SmolDecompressTilemap(const struct SmolTilemapHeader *header, const u32 *data, u16 *dest);
|
|
|
|
static bool32 isModeLoEncoded(enum CompressionMode mode);
|
|
static bool32 isModeSymEncoded(enum CompressionMode mode);
|
|
static bool32 isModeSymDelta(enum CompressionMode mode);
|
|
|
|
|
|
#define TABLE_READ_K(tableVal)((tableVal & 7))
|
|
#define TABLE_READ_SYMBOL(tableVal)((tableVal & 0xFF) >> 3)
|
|
#define TABLE_READ_Y(tableVal)((tableVal >> 8) & 0xFF)
|
|
#define TABLE_READ_MASK(tableVal)((tableVal >> 16))
|
|
|
|
/*
|
|
Layout is the following:
|
|
u32 kVal:3;
|
|
u32 symbol:5; // Set in BuildDecompressionTable
|
|
u32 yVal:8;
|
|
u32 mask:8;
|
|
*/
|
|
|
|
#define SET_TABLE_ENTRY(k, y, mask)(((k) & 7) | ((y) << 8) | ((mask) << 16))
|
|
|
|
static IWRAM_DATA u32 sWorkingYkTable[TANS_TABLE_SIZE] = {0};
|
|
|
|
// Helper struct to build the tANS decode tables without having to do calculations at run-time
|
|
// Mask Table is 0, 1, 3, 7, 15, 31, 63.
|
|
static const u32 sYkTemplate[2*TANS_TABLE_SIZE] = {
|
|
[0] = 0,
|
|
[1] = SET_TABLE_ENTRY(6, (1 << 6) - 64, 63),
|
|
[2] = SET_TABLE_ENTRY(5, (2 << 5) - 64, 31),
|
|
[3] = SET_TABLE_ENTRY(5, (3 << 5) - 64, 31),
|
|
[4] = SET_TABLE_ENTRY(4, (4 << 4) - 64, 15),
|
|
[5] = SET_TABLE_ENTRY(4, (5 << 4) - 64, 15),
|
|
[6] = SET_TABLE_ENTRY(4, (6 << 4) - 64, 15),
|
|
[7] = SET_TABLE_ENTRY(4, (7 << 4) - 64, 15),
|
|
[8] = SET_TABLE_ENTRY(3, (8 << 3) - 64, 7),
|
|
[9] = SET_TABLE_ENTRY(3, (9 << 3) - 64, 7),
|
|
[10] = SET_TABLE_ENTRY(3, (10 << 3) - 64, 7),
|
|
[11] = SET_TABLE_ENTRY(3, (11 << 3) - 64, 7),
|
|
[12] = SET_TABLE_ENTRY(3, (12 << 3) - 64, 7),
|
|
[13] = SET_TABLE_ENTRY(3, (13 << 3) - 64, 7),
|
|
[14] = SET_TABLE_ENTRY(3, (14 << 3) - 64, 7),
|
|
[15] = SET_TABLE_ENTRY(3, (15 << 3) - 64, 7),
|
|
[16] = SET_TABLE_ENTRY(2, (16 << 2) - 64, 3),
|
|
[17] = SET_TABLE_ENTRY(2, (17 << 2) - 64, 3),
|
|
[18] = SET_TABLE_ENTRY(2, (18 << 2) - 64, 3),
|
|
[19] = SET_TABLE_ENTRY(2, (19 << 2) - 64, 3),
|
|
[20] = SET_TABLE_ENTRY(2, (20 << 2) - 64, 3),
|
|
[21] = SET_TABLE_ENTRY(2, (21 << 2) - 64, 3),
|
|
[22] = SET_TABLE_ENTRY(2, (22 << 2) - 64, 3),
|
|
[23] = SET_TABLE_ENTRY(2, (23 << 2) - 64, 3),
|
|
[24] = SET_TABLE_ENTRY(2, (24 << 2) - 64, 3),
|
|
[25] = SET_TABLE_ENTRY(2, (25 << 2) - 64, 3),
|
|
[26] = SET_TABLE_ENTRY(2, (26 << 2) - 64, 3),
|
|
[27] = SET_TABLE_ENTRY(2, (27 << 2) - 64, 3),
|
|
[28] = SET_TABLE_ENTRY(2, (28 << 2) - 64, 3),
|
|
[29] = SET_TABLE_ENTRY(2, (29 << 2) - 64, 3),
|
|
[30] = SET_TABLE_ENTRY(2, (30 << 2) - 64, 3),
|
|
[31] = SET_TABLE_ENTRY(2, (31 << 2) - 64, 3),
|
|
[32] = SET_TABLE_ENTRY(1, (32 << 1) - 64, 1),
|
|
[33] = SET_TABLE_ENTRY(1, (33 << 1) - 64, 1),
|
|
[34] = SET_TABLE_ENTRY(1, (34 << 1) - 64, 1),
|
|
[35] = SET_TABLE_ENTRY(1, (35 << 1) - 64, 1),
|
|
[36] = SET_TABLE_ENTRY(1, (36 << 1) - 64, 1),
|
|
[37] = SET_TABLE_ENTRY(1, (37 << 1) - 64, 1),
|
|
[38] = SET_TABLE_ENTRY(1, (38 << 1) - 64, 1),
|
|
[39] = SET_TABLE_ENTRY(1, (39 << 1) - 64, 1),
|
|
[40] = SET_TABLE_ENTRY(1, (40 << 1) - 64, 1),
|
|
[41] = SET_TABLE_ENTRY(1, (41 << 1) - 64, 1),
|
|
[42] = SET_TABLE_ENTRY(1, (42 << 1) - 64, 1),
|
|
[43] = SET_TABLE_ENTRY(1, (43 << 1) - 64, 1),
|
|
[44] = SET_TABLE_ENTRY(1, (44 << 1) - 64, 1),
|
|
[45] = SET_TABLE_ENTRY(1, (45 << 1) - 64, 1),
|
|
[46] = SET_TABLE_ENTRY(1, (46 << 1) - 64, 1),
|
|
[47] = SET_TABLE_ENTRY(1, (47 << 1) - 64, 1),
|
|
[48] = SET_TABLE_ENTRY(1, (48 << 1) - 64, 1),
|
|
[49] = SET_TABLE_ENTRY(1, (49 << 1) - 64, 1),
|
|
[50] = SET_TABLE_ENTRY(1, (50 << 1) - 64, 1),
|
|
[51] = SET_TABLE_ENTRY(1, (51 << 1) - 64, 1),
|
|
[52] = SET_TABLE_ENTRY(1, (52 << 1) - 64, 1),
|
|
[53] = SET_TABLE_ENTRY(1, (53 << 1) - 64, 1),
|
|
[54] = SET_TABLE_ENTRY(1, (54 << 1) - 64, 1),
|
|
[55] = SET_TABLE_ENTRY(1, (55 << 1) - 64, 1),
|
|
[56] = SET_TABLE_ENTRY(1, (56 << 1) - 64, 1),
|
|
[57] = SET_TABLE_ENTRY(1, (57 << 1) - 64, 1),
|
|
[58] = SET_TABLE_ENTRY(1, (58 << 1) - 64, 1),
|
|
[59] = SET_TABLE_ENTRY(1, (59 << 1) - 64, 1),
|
|
[60] = SET_TABLE_ENTRY(1, (60 << 1) - 64, 1),
|
|
[61] = SET_TABLE_ENTRY(1, (61 << 1) - 64, 1),
|
|
[62] = SET_TABLE_ENTRY(1, (62 << 1) - 64, 1),
|
|
[63] = SET_TABLE_ENTRY(1, (63 << 1) - 64, 1),
|
|
[64] = SET_TABLE_ENTRY(0, 64 - 64, 0),
|
|
[65] = SET_TABLE_ENTRY(0, 65 - 64, 0),
|
|
[66] = SET_TABLE_ENTRY(0, 66 - 64, 0),
|
|
[67] = SET_TABLE_ENTRY(0, 67 - 64, 0),
|
|
[68] = SET_TABLE_ENTRY(0, 68 - 64, 0),
|
|
[69] = SET_TABLE_ENTRY(0, 69 - 64, 0),
|
|
[70] = SET_TABLE_ENTRY(0, 70 - 64, 0),
|
|
[71] = SET_TABLE_ENTRY(0, 71 - 64, 0),
|
|
[72] = SET_TABLE_ENTRY(0, 72 - 64, 0),
|
|
[73] = SET_TABLE_ENTRY(0, 73 - 64, 0),
|
|
[74] = SET_TABLE_ENTRY(0, 74 - 64, 0),
|
|
[75] = SET_TABLE_ENTRY(0, 75 - 64, 0),
|
|
[76] = SET_TABLE_ENTRY(0, 76 - 64, 0),
|
|
[77] = SET_TABLE_ENTRY(0, 77 - 64, 0),
|
|
[78] = SET_TABLE_ENTRY(0, 78 - 64, 0),
|
|
[79] = SET_TABLE_ENTRY(0, 79 - 64, 0),
|
|
[80] = SET_TABLE_ENTRY(0, 80 - 64, 0),
|
|
[81] = SET_TABLE_ENTRY(0, 81 - 64, 0),
|
|
[82] = SET_TABLE_ENTRY(0, 82 - 64, 0),
|
|
[83] = SET_TABLE_ENTRY(0, 83 - 64, 0),
|
|
[84] = SET_TABLE_ENTRY(0, 84 - 64, 0),
|
|
[85] = SET_TABLE_ENTRY(0, 85 - 64, 0),
|
|
[86] = SET_TABLE_ENTRY(0, 86 - 64, 0),
|
|
[87] = SET_TABLE_ENTRY(0, 87 - 64, 0),
|
|
[88] = SET_TABLE_ENTRY(0, 88 - 64, 0),
|
|
[89] = SET_TABLE_ENTRY(0, 89 - 64, 0),
|
|
[90] = SET_TABLE_ENTRY(0, 90 - 64, 0),
|
|
[91] = SET_TABLE_ENTRY(0, 91 - 64, 0),
|
|
[92] = SET_TABLE_ENTRY(0, 92 - 64, 0),
|
|
[93] = SET_TABLE_ENTRY(0, 93 - 64, 0),
|
|
[94] = SET_TABLE_ENTRY(0, 94 - 64, 0),
|
|
[95] = SET_TABLE_ENTRY(0, 95 - 64, 0),
|
|
[96] = SET_TABLE_ENTRY(0, 96 - 64, 0),
|
|
[97] = SET_TABLE_ENTRY(0, 97 - 64, 0),
|
|
[98] = SET_TABLE_ENTRY(0, 98 - 64, 0),
|
|
[99] = SET_TABLE_ENTRY(0, 99 - 64, 0),
|
|
[100] = SET_TABLE_ENTRY(0, 100 - 64, 0),
|
|
[101] = SET_TABLE_ENTRY(0, 101 - 64, 0),
|
|
[102] = SET_TABLE_ENTRY(0, 102 - 64, 0),
|
|
[103] = SET_TABLE_ENTRY(0, 103 - 64, 0),
|
|
[104] = SET_TABLE_ENTRY(0, 104 - 64, 0),
|
|
[105] = SET_TABLE_ENTRY(0, 105 - 64, 0),
|
|
[106] = SET_TABLE_ENTRY(0, 106 - 64, 0),
|
|
[107] = SET_TABLE_ENTRY(0, 107 - 64, 0),
|
|
[108] = SET_TABLE_ENTRY(0, 108 - 64, 0),
|
|
[109] = SET_TABLE_ENTRY(0, 109 - 64, 0),
|
|
[110] = SET_TABLE_ENTRY(0, 110 - 64, 0),
|
|
[111] = SET_TABLE_ENTRY(0, 111 - 64, 0),
|
|
[112] = SET_TABLE_ENTRY(0, 112 - 64, 0),
|
|
[113] = SET_TABLE_ENTRY(0, 113 - 64, 0),
|
|
[114] = SET_TABLE_ENTRY(0, 114 - 64, 0),
|
|
[115] = SET_TABLE_ENTRY(0, 115 - 64, 0),
|
|
[116] = SET_TABLE_ENTRY(0, 116 - 64, 0),
|
|
[117] = SET_TABLE_ENTRY(0, 117 - 64, 0),
|
|
[118] = SET_TABLE_ENTRY(0, 118 - 64, 0),
|
|
[119] = SET_TABLE_ENTRY(0, 119 - 64, 0),
|
|
[120] = SET_TABLE_ENTRY(0, 120 - 64, 0),
|
|
[121] = SET_TABLE_ENTRY(0, 121 - 64, 0),
|
|
[122] = SET_TABLE_ENTRY(0, 122 - 64, 0),
|
|
[123] = SET_TABLE_ENTRY(0, 123 - 64, 0),
|
|
[124] = SET_TABLE_ENTRY(0, 124 - 64, 0),
|
|
[125] = SET_TABLE_ENTRY(0, 125 - 64, 0),
|
|
[126] = SET_TABLE_ENTRY(0, 126 - 64, 0),
|
|
[127] = SET_TABLE_ENTRY(0, 127 - 64, 0),
|
|
};
|
|
|
|
// Checks if `ptr` is likely LZ77 data
|
|
// Checks word-alignment, min/max size, and header byte
|
|
// Returns uncompressed size if true, 0 otherwise
|
|
u32 IsLZ77Data(const void *ptr, u32 minSize, u32 maxSize)
|
|
{
|
|
const u8 *data = ptr;
|
|
u32 size;
|
|
// Compressed data must be word aligned
|
|
if (((u32)ptr) & 3)
|
|
return 0;
|
|
// Check LZ77 header byte
|
|
// See https://problemkaputt.de/gbatek.htm#biosdecompressionfunctions
|
|
if (data[0] != 0x10)
|
|
return 0;
|
|
|
|
// Read 24-bit uncompressed size
|
|
size = data[1] | (data[2] << 8) | (data[3] << 16);
|
|
if (size >= minSize && size <= maxSize)
|
|
return size;
|
|
return 0;
|
|
}
|
|
|
|
static inline u32 DoLoadCompressedSpriteSheet(const struct CompressedSpriteSheet *src, void *buffer)
|
|
{
|
|
struct SpriteSheet dest;
|
|
|
|
dest.data = buffer;
|
|
dest.size = src->size;
|
|
dest.tag = src->tag;
|
|
return LoadSpriteSheet(&dest);
|
|
}
|
|
|
|
u32 LoadCompressedSpriteSheet(const struct CompressedSpriteSheet *src)
|
|
{
|
|
void *buffer = malloc_and_decompress(src->data, NULL);
|
|
u32 ret = DoLoadCompressedSpriteSheet(src, buffer);
|
|
Free(buffer);
|
|
|
|
return ret;
|
|
}
|
|
|
|
u32 LoadCompressedSpriteSheetOverrideBuffer(const struct CompressedSpriteSheet *src, void *buffer)
|
|
{
|
|
DecompressDataWithHeaderWram(src->data, buffer);
|
|
return DoLoadCompressedSpriteSheet(src, buffer);
|
|
}
|
|
|
|
// This can be used for either compressed or uncompressed sprite sheets
|
|
u32 LoadCompressedSpriteSheetByTemplate(const struct SpriteTemplate *template, s32 offset)
|
|
{
|
|
struct SpriteTemplate myTemplate;
|
|
struct SpriteFrameImage myImage;
|
|
u32 size;
|
|
|
|
// Check for LZ77 header and read uncompressed size, or fallback if not compressed (zero size)
|
|
if (IsCompressedData(template->images->data))
|
|
{
|
|
size = GetDecompressedDataSize(template->images->data);
|
|
void *buffer = malloc_and_decompress(template->images->data, NULL);
|
|
myImage.data = buffer;
|
|
myImage.size = size + offset;
|
|
myTemplate.images = &myImage;
|
|
myTemplate.tileTag = template->tileTag;
|
|
|
|
u32 ret = LoadSpriteSheetByTemplate(&myTemplate, 0, offset);
|
|
Free(buffer);
|
|
return ret;
|
|
}
|
|
return LoadSpriteSheetByTemplate(template, 0, offset);
|
|
|
|
}
|
|
|
|
void DecompressPicFromTable(const struct CompressedSpriteSheet *src, void *buffer)
|
|
{
|
|
DecompressDataWithHeaderWram(src->data, buffer);
|
|
}
|
|
|
|
void HandleLoadSpecialPokePic(bool32 isFrontPic, void *dest, s32 species, u32 personality)
|
|
{
|
|
LoadSpecialPokePic(dest, species, personality, isFrontPic);
|
|
}
|
|
|
|
// Wrapper function for all decompression calls using formats with headers
|
|
// calls the correct decompression function depending on the header
|
|
// VRAM version
|
|
void DecompressDataWithHeaderVram(const u32 *src, void *dest)
|
|
{
|
|
union CompressionHeader header;
|
|
CpuCopy32(src, &header, 8);
|
|
switch (header.smol.mode)
|
|
{
|
|
case MODE_LZ77:
|
|
LZ77UnCompVram(src, dest);
|
|
break;
|
|
case IS_TILEMAP:
|
|
SmolDecompressTilemap(&header.smolTilemap, &src[2], dest);
|
|
break;
|
|
case BASE_ONLY:
|
|
case ENCODE_SYMS:
|
|
case ENCODE_DELTA_SYMS:
|
|
case ENCODE_LO:
|
|
case ENCODE_BOTH:
|
|
case ENCODE_BOTH_DELTA_SYMS:
|
|
SmolDecompressData(&header.smol, &src[2], dest);
|
|
break;
|
|
default:
|
|
DecompressionError(src, HEADER_ERROR);
|
|
}
|
|
}
|
|
|
|
// Wrapper function for all decompression calls using formats with headers
|
|
// calls the correct decompression function depending on the header
|
|
// WRAM version
|
|
void DecompressDataWithHeaderWram(const u32 *src, void *dest)
|
|
{
|
|
union CompressionHeader header;
|
|
CpuCopy32(src, &header, 8);
|
|
switch (header.smol.mode)
|
|
{
|
|
case MODE_LZ77:
|
|
FastLZ77UnCompWram(src, dest);
|
|
break;
|
|
case IS_TILEMAP:
|
|
SmolDecompressTilemap(&header.smolTilemap, &src[2], dest);
|
|
break;
|
|
case BASE_ONLY:
|
|
case ENCODE_SYMS:
|
|
case ENCODE_DELTA_SYMS:
|
|
case ENCODE_LO:
|
|
case ENCODE_BOTH:
|
|
case ENCODE_BOTH_DELTA_SYMS:
|
|
SmolDecompressData(&header.smol, &src[2], dest);
|
|
break;
|
|
default:
|
|
DecompressionError(src, HEADER_ERROR);
|
|
}
|
|
}
|
|
|
|
#define REP0(X)
|
|
#define REP1(X) X
|
|
#define REP2(X) REP1(X) X
|
|
#define REP3(X) REP2(X) X
|
|
#define REP4(X) REP3(X) X
|
|
#define REP5(X) REP4(X) X
|
|
#define REP6(X) REP5(X) X
|
|
#define REP7(X) REP6(X) X
|
|
#define REP8(X) REP7(X) X
|
|
#define REP9(X) REP8(X) X
|
|
#define REP10(X) REP9(X) X
|
|
|
|
#define REP(TENS,ONES,X) \
|
|
REP##TENS(REP10(X)) \
|
|
REP##ONES(X)
|
|
|
|
// Unpack packed tANS encoded data symbol frequences into their individual parts
|
|
static __attribute__((always_inline)) inline void UnpackFrequenciesLoop(const u32 *packedFreqs, u16 *freqs, u32 i)
|
|
{
|
|
// Loop unpack
|
|
freqs[i*5 + 0] = (packedFreqs[i] >> (6*0)) & PACKED_FREQ_MASK;
|
|
freqs[i*5 + 1] = (packedFreqs[i] >> (6*1)) & PACKED_FREQ_MASK;
|
|
freqs[i*5 + 2] = (packedFreqs[i] >> (6*2)) & PACKED_FREQ_MASK;
|
|
freqs[i*5 + 3] = (packedFreqs[i] >> (6*3)) & PACKED_FREQ_MASK;
|
|
freqs[i*5 + 4] = (packedFreqs[i] >> (6*4)) & PACKED_FREQ_MASK;
|
|
|
|
freqs[15] += (packedFreqs[i] & PARTIAL_FREQ_MASK) >> (30 - 2*i);
|
|
}
|
|
|
|
static __attribute__((always_inline)) inline void UnpackFrequencies(const u32 *packedFreqs, u16 *freqs)
|
|
{
|
|
freqs[15] = 0;
|
|
|
|
UnpackFrequenciesLoop(packedFreqs, freqs, 0);
|
|
UnpackFrequenciesLoop(packedFreqs, freqs, 1);
|
|
UnpackFrequenciesLoop(packedFreqs, freqs, 2);
|
|
}
|
|
|
|
// This is a small function, so we can store it in IWRAM for improved performance and don't need to worry about it taking too much precious IWRAM space.
|
|
ARM_FUNC __attribute__((section(".iwram.code"))) __attribute__((noinline)) static void CopyTable(u32 *dst, const u32 *src, u32 size, u32 orrVal)
|
|
{
|
|
for (u32 i = 0; i < size; i++) {
|
|
*dst++ = (*src++) | orrVal;
|
|
}
|
|
}
|
|
|
|
// Build the tANS decompression table from the specified frequencies and the precomputed helper struct
|
|
__attribute__((optimize("-O3"))) static void BuildDecompressionTable(const u32 *packedFreqs, u32 *table)
|
|
{
|
|
u16 freqs[16];
|
|
|
|
UnpackFrequencies(packedFreqs, freqs);
|
|
|
|
for (u8 i = 0; i < 16; i++)
|
|
{
|
|
const u32 *srcTemplate;
|
|
|
|
switch (freqs[i]) {
|
|
case 0:
|
|
break;
|
|
default: {
|
|
srcTemplate = &sYkTemplate[freqs[i]];
|
|
CopyTable(table, srcTemplate, freqs[i], i << 3);
|
|
table += freqs[i];
|
|
srcTemplate += freqs[i];
|
|
break;
|
|
}
|
|
case 1:
|
|
srcTemplate = &sYkTemplate[1];
|
|
REP(0, 1, *table++ = *srcTemplate++ | (i << 3);)
|
|
break;
|
|
case 2:
|
|
srcTemplate = &sYkTemplate[2];
|
|
REP(0, 2, *table++ = (*srcTemplate++) | (i << 3);)
|
|
break;
|
|
case 3:
|
|
srcTemplate = &sYkTemplate[3];
|
|
REP(0, 3, *table++ = (*srcTemplate++) | (i << 3);)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static IWRAM_DATA u8 sBitIndex = 0;
|
|
static IWRAM_DATA const u32 *sDataPtr = 0;
|
|
static IWRAM_DATA u32 sCurrState = 0;
|
|
|
|
// 33 because of FastUnsafeCopy32, we divide by 4 because the buffer is an array of u32
|
|
#define FUNC_BUFFER_SIZE(funcStart, funcEnd)(((u32)(funcEnd) - (u32)(funcStart) + 33) / 4)
|
|
|
|
extern void FastUnsafeCopy32(void *, const void *, u32 size);
|
|
|
|
// Dark Egg magic
|
|
static inline void CopyFuncToIwram(void *funcBuffer, const void *funcStartAddress, const void *funcEndAdress)
|
|
{
|
|
FastUnsafeCopy32(funcBuffer, funcStartAddress, funcEndAdress - funcStartAddress);
|
|
}
|
|
|
|
// The reason for macros and unrolling the loops stems from the following:
|
|
// currK can be max 6, meaning in the worst case scenario it takes minimum 4 loop iterations, where we don't need to check if bitIndex is >= 32, because it's mathematically impossible for it to be.
|
|
// This optimization matters for large images, because we don't waste a cycle on comparing operations. If the loops run over 5000 times in total, these can be costly.
|
|
|
|
#define LOT_LOOP_MAIN(nibble) \
|
|
{ \
|
|
u32 ykVals = ykTable[sCurrState]; \
|
|
symbol |= TABLE_READ_SYMBOL(ykVals) << (nibble*4); \
|
|
currK = TABLE_READ_K(ykVals); \
|
|
sCurrState = TABLE_READ_Y(ykVals); \
|
|
sCurrState += (currBits >> bitIndex) & TABLE_READ_MASK(ykVals); \
|
|
bitIndex += currK; \
|
|
}
|
|
|
|
// The same for all the loops
|
|
#define LOOP_BITADVANCE() \
|
|
{ \
|
|
currBits = *data++; \
|
|
bitIndex -= 32; \
|
|
if (bitIndex != 0) \
|
|
{ \
|
|
sCurrState += (currBits & ((1u << bitIndex) - 1)) << (currK - bitIndex); \
|
|
} \
|
|
}
|
|
// Inner loop of tANS decoding for Lengths and Offset data for decompression instructions, uses u8 data sizes
|
|
// Basic process for decoding a tANS encoded value is to read the current symbol from the decoding table, then calculate the next state
|
|
// from the y and k values for the current state and add the value read from the next k bits in the bitstream
|
|
ARM_FUNC __attribute__((flatten, noinline, no_reorder)) __attribute__((optimize("-O3"))) static void DecodeLOtANSLoop(const u32 *data, u32 *ykTable, u8 *resultVec, u8 *resultVecEnd)
|
|
{
|
|
u32 currBits = *data++;
|
|
u32 bitIndex = sBitIndex;
|
|
u16 *resultVec_u16 = (u16 *) resultVec;
|
|
|
|
do
|
|
{
|
|
u32 currK;
|
|
u32 symbol = 0;
|
|
|
|
{
|
|
LOT_LOOP_MAIN(0);
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
LOT_LOOP_MAIN(1);
|
|
LOT_LOOP_MAIN(2);
|
|
LOT_LOOP_MAIN(3);
|
|
|
|
goto LOOP_STORE;
|
|
}
|
|
}
|
|
{
|
|
LOT_LOOP_MAIN(1);
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
LOT_LOOP_MAIN(2);
|
|
LOT_LOOP_MAIN(3);
|
|
|
|
goto LOOP_STORE;
|
|
}
|
|
}
|
|
{
|
|
LOT_LOOP_MAIN(2);
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
LOT_LOOP_MAIN(3);
|
|
|
|
goto LOOP_STORE;
|
|
}
|
|
}
|
|
{
|
|
LOT_LOOP_MAIN(3);
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
*resultVec_u16++ = symbol;
|
|
if (resultVec_u16 >= (u16 *) resultVecEnd)
|
|
break;
|
|
|
|
symbol = 0;
|
|
|
|
LOT_LOOP_MAIN(0);
|
|
LOT_LOOP_MAIN(1);
|
|
LOT_LOOP_MAIN(2);
|
|
LOT_LOOP_MAIN(3);
|
|
}
|
|
}
|
|
|
|
LOOP_STORE:
|
|
*resultVec_u16++ = symbol;
|
|
} while (resultVec_u16 < (u16 *) resultVecEnd);
|
|
|
|
sBitIndex = bitIndex;
|
|
sDataPtr = data - 1;
|
|
}
|
|
|
|
// Dark Egg magic
|
|
ARM_FUNC __attribute__((no_reorder)) static void SwitchToArmCallLOtANS(const u32 *data, u32 *ykTable, void *resultVec, void *resultVecEnd, void (*decodeFunction)(const u32 *data, u32 *ykTable, void *resultVec, void *resultVecEnd))
|
|
{
|
|
decodeFunction(data, ykTable, resultVec, resultVecEnd);
|
|
}
|
|
|
|
// Function that decodes tANS encoded LO data, resulting data is u8 values
|
|
static void DecodeLOtANS(const u32 *data, const u32 *pFreqs, u8 *resultVec, u32 count)
|
|
{
|
|
BuildDecompressionTable(pFreqs, sWorkingYkTable);
|
|
|
|
// We want to store in packs of 2, so count needs to be divisible by 2
|
|
u32 remainingCount = count % 2;
|
|
|
|
u32 funcBuffer[FUNC_BUFFER_SIZE(DecodeLOtANSLoop, SwitchToArmCallLOtANS)];
|
|
|
|
CopyFuncToIwram(funcBuffer, DecodeLOtANSLoop, SwitchToArmCallLOtANS);
|
|
SwitchToArmCallLOtANS(data, sWorkingYkTable, resultVec, &resultVec[count - remainingCount], (void *) funcBuffer);
|
|
|
|
if (remainingCount)
|
|
{
|
|
u32 currBits = *sDataPtr;
|
|
u32 symbol = 0;
|
|
for (u32 currNibble = 0; currNibble < 2; currNibble++)
|
|
{
|
|
u32 ykVals = sWorkingYkTable[sCurrState];
|
|
symbol |= TABLE_READ_SYMBOL(ykVals) << (currNibble*4);
|
|
u32 currK = TABLE_READ_K(ykVals);
|
|
sCurrState = TABLE_READ_Y(ykVals);
|
|
sCurrState += (currBits >> sBitIndex) & TABLE_READ_MASK(ykVals);
|
|
sBitIndex += currK;
|
|
if (sBitIndex >= 32)
|
|
{
|
|
currBits = *(++sDataPtr);
|
|
sBitIndex -= 32;
|
|
if (sBitIndex != 0)
|
|
{
|
|
sCurrState += (currBits & ((1u << sBitIndex) - 1)) << (currK - sBitIndex);
|
|
}
|
|
}
|
|
}
|
|
resultVec[count - remainingCount] = symbol;
|
|
}
|
|
}
|
|
|
|
// The reason this function is UNUSED, because it's currently exactly the same as `DecodeLOtANSLoop`(as it was optimized out for halfwords and not bytes as it's technically designed).
|
|
// If ever DecodeLOtANSLoop or DecodeSymtANSLoop were to change make sure to uncomment the 'CopyFuncToIwram' call.
|
|
|
|
ARM_FUNC __attribute__((flatten, noinline, no_reorder)) __attribute__((optimize("-O3"))) UNUSED static void DecodeSymtANSLoop(const u32 *data, u32 *ykTable, u16 *resultVec, u16 *resultVecEnd)
|
|
{
|
|
u32 currBits = *data++;
|
|
u32 bitIndex = sBitIndex;
|
|
|
|
do
|
|
{
|
|
u32 symbol = 0;
|
|
for (u32 currNibble = 0; currNibble < 4; currNibble++)
|
|
{
|
|
u32 ykVals = ykTable[sCurrState];
|
|
symbol |= TABLE_READ_SYMBOL(ykVals) << (currNibble*4);
|
|
u32 currK = TABLE_READ_K(ykVals);
|
|
sCurrState = TABLE_READ_Y(ykVals);
|
|
sCurrState += (currBits >> bitIndex) & TABLE_READ_MASK(ykVals);
|
|
bitIndex += currK;
|
|
if (bitIndex >= 32)
|
|
{
|
|
currBits = *data++;
|
|
bitIndex -= 32;
|
|
if (bitIndex != 0)
|
|
{
|
|
sCurrState += (currBits & ((1u << bitIndex) - 1)) << (currK - bitIndex);
|
|
}
|
|
}
|
|
}
|
|
*resultVec++ = symbol;
|
|
} while (resultVec < resultVecEnd);
|
|
|
|
sBitIndex = bitIndex;
|
|
sDataPtr = data - 1;
|
|
}
|
|
|
|
ARM_FUNC __attribute__((no_reorder)) static void SwitchToArmCallDecodeSymtANS(const u32 *data, u32 *ykTable, u16 *resultVec, u16 *resultVecEnd, void (*decodeFunction)(const u32 *data, u32 *ykTable, u16 *resultVec, u16 *resultVecEnd))
|
|
{
|
|
decodeFunction(data, ykTable, resultVec, resultVecEnd);
|
|
}
|
|
|
|
static void DecodeSymtANS(const u32 *data, const u32 *pFreqs, u16 *resultVec, u32 count)
|
|
{
|
|
BuildDecompressionTable(pFreqs, sWorkingYkTable);
|
|
|
|
u32 funcBuffer[FUNC_BUFFER_SIZE(DecodeLOtANSLoop, SwitchToArmCallLOtANS)];
|
|
// CopyFuncToIwram(funcBuffer, DecodeSymtANSLoop, SwitchToArmCallDecodeSymtANS);
|
|
CopyFuncToIwram(funcBuffer, DecodeLOtANSLoop, SwitchToArmCallLOtANS);
|
|
SwitchToArmCallDecodeSymtANS(data, sWorkingYkTable, resultVec, &resultVec[count], (void *) funcBuffer);
|
|
}
|
|
|
|
#define ANS_LOOP_MAIN(nibble) \
|
|
{ \
|
|
u32 ykVals = ykTable[sCurrState]; \
|
|
currK = TABLE_READ_K(ykVals); \
|
|
currSymbol = (currSymbol + TABLE_READ_SYMBOL(ykVals)) & 0xf; \
|
|
symbol |= currSymbol << (nibble*4); \
|
|
sCurrState = TABLE_READ_Y(ykVals); \
|
|
sCurrState += ((currBits >> bitIndex) & TABLE_READ_MASK(ykVals)); \
|
|
bitIndex += currK; \
|
|
}
|
|
|
|
// Inner loop of tANS decoding for delta encoded symbol data, uses u16 data size
|
|
// Basic process for decoding a tANS encoded value is to read the current symbol from the decoding table, then calculate the next state
|
|
// from the y and k values for the current state and add the value read from the next k bits in the bitstream
|
|
ARM_FUNC __attribute__((flatten, noinline, no_reorder)) __attribute__((optimize("-O3"))) u32 DecodeSymDeltatANSLoop(const u32 *data, u32 *ykTable, u16 *resultVec, u16 *resultVecEnd)
|
|
{
|
|
u32 currBits = *data++;
|
|
u32 currSymbol = 0;
|
|
u32 bitIndex = sBitIndex;
|
|
u32 * resultVec_32 = (u32*)(resultVec); // Since we're doing 2 symbols at one time we store as word which is faster than storing two halfwords.
|
|
|
|
do
|
|
{
|
|
u32 symbol = 0;
|
|
u32 currK;
|
|
|
|
{
|
|
ANS_LOOP_MAIN(0);
|
|
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
ANS_LOOP_MAIN(1);
|
|
ANS_LOOP_MAIN(2);
|
|
ANS_LOOP_MAIN(3);
|
|
ANS_LOOP_MAIN(4);
|
|
|
|
goto NIBBLE_5;
|
|
}
|
|
}
|
|
{
|
|
ANS_LOOP_MAIN(1);
|
|
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
ANS_LOOP_MAIN(2);
|
|
ANS_LOOP_MAIN(3);
|
|
ANS_LOOP_MAIN(4);
|
|
ANS_LOOP_MAIN(5);
|
|
|
|
goto NIBBLE_6;
|
|
}
|
|
}
|
|
{
|
|
ANS_LOOP_MAIN(2);
|
|
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
ANS_LOOP_MAIN(3);
|
|
ANS_LOOP_MAIN(4);
|
|
ANS_LOOP_MAIN(5);
|
|
ANS_LOOP_MAIN(6);
|
|
|
|
goto NIBBLE_7;
|
|
}
|
|
}
|
|
{
|
|
ANS_LOOP_MAIN(3);
|
|
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
ANS_LOOP_MAIN(4);
|
|
ANS_LOOP_MAIN(5);
|
|
ANS_LOOP_MAIN(6);
|
|
ANS_LOOP_MAIN(7);
|
|
|
|
goto LOOP_STORE;
|
|
}
|
|
}
|
|
NIBBLE_4:
|
|
{
|
|
ANS_LOOP_MAIN(4);
|
|
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
ANS_LOOP_MAIN(5);
|
|
ANS_LOOP_MAIN(6);
|
|
ANS_LOOP_MAIN(7);
|
|
|
|
goto LOOP_STORE;
|
|
}
|
|
}
|
|
NIBBLE_5:
|
|
{
|
|
ANS_LOOP_MAIN(5);
|
|
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
ANS_LOOP_MAIN(6);
|
|
ANS_LOOP_MAIN(7);
|
|
|
|
goto LOOP_STORE;
|
|
}
|
|
}
|
|
NIBBLE_6:
|
|
{
|
|
ANS_LOOP_MAIN(6);
|
|
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
ANS_LOOP_MAIN(7);
|
|
|
|
goto LOOP_STORE;
|
|
}
|
|
}
|
|
NIBBLE_7:
|
|
{
|
|
ANS_LOOP_MAIN(7);
|
|
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
*resultVec_32++ = (symbol);
|
|
if (resultVec_32 >= (u32 *) resultVecEnd)
|
|
break;
|
|
|
|
symbol = 0;
|
|
|
|
ANS_LOOP_MAIN(0);
|
|
ANS_LOOP_MAIN(1);
|
|
ANS_LOOP_MAIN(2);
|
|
ANS_LOOP_MAIN(3);
|
|
|
|
goto NIBBLE_4;
|
|
}
|
|
}
|
|
|
|
LOOP_STORE:
|
|
*resultVec_32++ = (symbol);
|
|
|
|
} while (resultVec_32 < (u32 *) resultVecEnd);
|
|
sBitIndex = bitIndex;
|
|
sDataPtr = data - 1;
|
|
return currSymbol;
|
|
}
|
|
|
|
// Dark Egg magic
|
|
ARM_FUNC __attribute__((no_reorder)) static u32 SwitchToArmCallSymDeltaANS(const u32 *data, u32 *ykTable, u16 *resultVec, u16 *resultVecEnd, u32 (*decodeFunction)(const u32 *data, u32 *ykTable, u16 *resultVec, u16 *resultVecEnd))
|
|
{
|
|
return decodeFunction(data, ykTable, resultVec, resultVecEnd);
|
|
}
|
|
|
|
static void DecodeSymDeltatANS(const u32 *data, const u32 *pFreqs, u16 *resultVec, u32 count)
|
|
{
|
|
BuildDecompressionTable(pFreqs, sWorkingYkTable);
|
|
|
|
// We want to store in packs of 2, so count needs to be divisible by 2
|
|
u32 remainingCount = count % 2;
|
|
|
|
u32 funcBuffer[FUNC_BUFFER_SIZE(DecodeSymDeltatANSLoop, SwitchToArmCallSymDeltaANS)];
|
|
CopyFuncToIwram(funcBuffer, DecodeSymDeltatANSLoop, SwitchToArmCallSymDeltaANS);
|
|
u32 currSymbol = SwitchToArmCallSymDeltaANS(data, sWorkingYkTable, resultVec, &resultVec[count - remainingCount], (void *) funcBuffer);
|
|
|
|
if (remainingCount)
|
|
{
|
|
u32 currK;
|
|
u32 *ykTable = sWorkingYkTable;
|
|
const u32 *data = sDataPtr;
|
|
u32 currBits = *data++;
|
|
u32 bitIndex = sBitIndex;
|
|
u32 symbol = 0;
|
|
|
|
ANS_LOOP_MAIN(0);
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
ANS_LOOP_MAIN(1);
|
|
ANS_LOOP_MAIN(2);
|
|
ANS_LOOP_MAIN(3);
|
|
|
|
goto LOOP_STORE;
|
|
}
|
|
|
|
ANS_LOOP_MAIN(1);
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
ANS_LOOP_MAIN(2);
|
|
ANS_LOOP_MAIN(3);
|
|
|
|
goto LOOP_STORE;
|
|
}
|
|
|
|
ANS_LOOP_MAIN(2);
|
|
if (bitIndex >= 32)
|
|
{
|
|
LOOP_BITADVANCE();
|
|
|
|
ANS_LOOP_MAIN(3);
|
|
|
|
goto LOOP_STORE;
|
|
}
|
|
|
|
ANS_LOOP_MAIN(3);
|
|
|
|
LOOP_STORE:
|
|
resultVec[count - remainingCount] = symbol;
|
|
sBitIndex = bitIndex;
|
|
}
|
|
}
|
|
|
|
static __attribute__((always_inline)) inline void Fill16(u16 value, void *_dst, u32 size)
|
|
{
|
|
u16 *dst = _dst;
|
|
for (u32 i = 0; i < size; i++) {
|
|
dst[i] = value;
|
|
}
|
|
}
|
|
|
|
static __attribute__((always_inline)) inline void Copy16(const void *_src, void *_dst, u32 size)
|
|
{
|
|
const u16 *src = _src;
|
|
u16 *dst = _dst;
|
|
for (u32 i = 0; i < size; i++) {
|
|
dst[i] = src[i];
|
|
}
|
|
}
|
|
|
|
// Function to decode the instructions into the actual decompressed data
|
|
// Basic process:
|
|
// Read length from the loVec, 1 or 2 bytes as indicated by the last bit in the first byte
|
|
// Read offset from the loVec, 1 or 2 bytes as indicated by the last bit in the first byte
|
|
// If length is not 0 and offset is not 1:
|
|
// Insert the current value from the Symbol vector into current result position and advance symbol vector by 1
|
|
// Copy <length> values from <offset> values back in the result vector
|
|
// If length is not 0 and offser is 1:
|
|
// Insert the current value from the Symbol vector into current result position <length> times, then advance symbol vector by 1
|
|
// If length is 0:
|
|
// Insert <offset> number of symbols from the symbol vector into the result vector and advance the symbol vector position by <offset>
|
|
ARM_FUNC __attribute__((flatten, noinline, no_reorder)) __attribute__((optimize("-O3"))) static void DecodeInstructions(u32 headerLoSize, const u8 *loVec, const u16 *symVec, u16 *dest)
|
|
{
|
|
const u8 *loVecEnd = loVec + headerLoSize;
|
|
do
|
|
{
|
|
u32 currOffset, currLength;
|
|
|
|
if (loVec[0] & CONTINUE_BIT)
|
|
{
|
|
currLength = (loVec[0] & FIRST_LO_MASK) | (loVec[1] << 7);
|
|
currOffset = loVec[2] & FIRST_LO_MASK;
|
|
if (loVec[2] & CONTINUE_BIT)
|
|
{
|
|
currOffset |= loVec[3] << 7;
|
|
loVec += 4;
|
|
}
|
|
else
|
|
{
|
|
loVec += 3;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
currLength = loVec[0] & FIRST_LO_MASK;
|
|
currOffset = loVec[1] & FIRST_LO_MASK;
|
|
|
|
if (loVec[1] & CONTINUE_BIT)
|
|
{
|
|
currOffset |= (loVec[2] << 7);
|
|
loVec += 3;
|
|
}
|
|
else
|
|
{
|
|
loVec += 2;
|
|
}
|
|
}
|
|
|
|
if (currLength != 0)
|
|
{
|
|
u16 symVecVal = *symVec;
|
|
*dest++ = *symVec++;
|
|
if (currOffset == 1)
|
|
{
|
|
Fill16(symVecVal, dest, currLength);
|
|
dest += currLength;
|
|
}
|
|
else
|
|
{
|
|
// Copy16 is slower in this case.
|
|
u16 *from = dest - currOffset;
|
|
u16 *to = dest + currLength;
|
|
do {
|
|
*dest++ = *from++;
|
|
} while (dest != to);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Copy16(symVec, dest, currOffset);
|
|
dest += currOffset;
|
|
symVec += currOffset;
|
|
}
|
|
} while (loVec < loVecEnd);
|
|
}
|
|
|
|
// Dark Egg magic
|
|
ARM_FUNC __attribute__((no_reorder)) static void SwitchToArmCallDecodeInstructions(u32 headerLoSize, const u8 *loVec, const u16 *symVec, void *dest, void (*decodeFunction)(u32 headerLoSize, const u8 *loVec, const u16 *symVec, void *dest))
|
|
{
|
|
decodeFunction(headerLoSize, loVec, symVec, dest);
|
|
}
|
|
|
|
// Dark Egg magic
|
|
static void DecodeInstructionsIwram(u32 headerLoSize, const u8 *loVec, const u16 *symVec, void *dest)
|
|
{
|
|
u32 funcBuffer[FUNC_BUFFER_SIZE(DecodeInstructions, SwitchToArmCallDecodeInstructions)];
|
|
|
|
CopyFuncToIwram(funcBuffer, DecodeInstructions, SwitchToArmCallDecodeInstructions);
|
|
SwitchToArmCallDecodeInstructions(headerLoSize, loVec, symVec, dest, (void *) funcBuffer);
|
|
}
|
|
|
|
// Entrance point for smol compressed data
|
|
static void SmolDecompressData(const struct SmolHeader *header, const u32 *data, void *dest)
|
|
{
|
|
// This is apparently needed due to Game Freak sending bullshit down the decompression pipeline
|
|
if (header->loSize == 0 || header->symSize == 0)
|
|
return;
|
|
const u8 *leftoverPos = (u8 *)data;
|
|
|
|
sCurrState = header->initialState;
|
|
|
|
u32 headerLoSize = header->loSize;
|
|
u32 headerSymSize = header->symSize;
|
|
|
|
const u32 *pLoFreqs = NULL;
|
|
const u32 *pSymFreqs = NULL;
|
|
|
|
// Use different decoding flows depending on which mode the data is compressed with
|
|
switch (header->mode)
|
|
{
|
|
case BASE_ONLY: // Used by .fastSmol, there is no encoding there, so we can quickly decode all the instructions and quit.
|
|
DecodeInstructionsIwram(headerLoSize, leftoverPos + headerSymSize*2, (void *) leftoverPos, dest);
|
|
return;
|
|
case ENCODE_LO:
|
|
pLoFreqs = &data[0];
|
|
sDataPtr = &data[3];
|
|
break;
|
|
case ENCODE_DELTA_SYMS:
|
|
case ENCODE_SYMS:
|
|
pSymFreqs = &data[0];
|
|
sDataPtr = &data[3];
|
|
break;
|
|
case ENCODE_BOTH:
|
|
case ENCODE_BOTH_DELTA_SYMS:
|
|
pLoFreqs = &data[0];
|
|
pSymFreqs = &data[3];
|
|
sDataPtr = &data[6];
|
|
break;
|
|
}
|
|
|
|
bool32 loEncoded = isModeLoEncoded(header->mode);
|
|
bool32 symEncoded = isModeSymEncoded(header->mode);
|
|
bool32 symDelta = isModeSymDelta(header->mode);
|
|
|
|
// Everything needs to be aligned.
|
|
u32 alignedLoSize = header->loSize % 2 == 1 ? headerLoSize + 1 : headerLoSize;
|
|
u32 alignedSymSize = header->symSize % 2 == 1 ? headerSymSize + 1 : headerSymSize;
|
|
void *memoryAlloced = Alloc((alignedSymSize*2) + alignedLoSize);
|
|
u16 *symVec = memoryAlloced;
|
|
u8 *loVec = memoryAlloced + alignedSymSize*2;
|
|
|
|
sBitIndex = 0;
|
|
// Decode tANS encoded LO data, mode 3, 4 and 5
|
|
if (loEncoded)
|
|
{
|
|
DecodeLOtANS(sDataPtr, pLoFreqs, loVec, headerLoSize);
|
|
leftoverPos += 12;
|
|
}
|
|
// Decode tANS encoded symbol data, mode 1, 2, 4 and 5
|
|
if (symEncoded)
|
|
{
|
|
// Symbols are delta encoded, mode 2 and 5
|
|
if (symDelta)
|
|
DecodeSymDeltatANS(sDataPtr, pSymFreqs, symVec, headerSymSize);
|
|
// Symbols are not delta encoded, mode 1 and 4
|
|
else
|
|
DecodeSymtANS(sDataPtr, pSymFreqs, symVec, headerSymSize);
|
|
leftoverPos += 12;
|
|
}
|
|
|
|
// If not both of lo and sym data are tANS encoded, data that isn't entropy encoded exists
|
|
// This is stored after the 32-bit aligned bitstream
|
|
if (loEncoded || symEncoded)
|
|
leftoverPos += 4*header->bitstreamSize;
|
|
|
|
// Copy the not entropy encoded symbol data to the symbol buffer
|
|
// Symbol data is u16 aligned
|
|
if (symEncoded == FALSE)
|
|
{
|
|
symVec = (void *) leftoverPos;
|
|
leftoverPos += headerSymSize*2;
|
|
}
|
|
|
|
// Copy the not entropy encoded lo data to the lo buffer
|
|
// Despite the individual lo values being u8 aligned, the entire vector for the u8 values is u16 aligned
|
|
if (loEncoded == FALSE)
|
|
{
|
|
loVec = (void *) leftoverPos;
|
|
}
|
|
|
|
// Actually decode the final data from loVec and symVec
|
|
DecodeInstructionsIwram(headerLoSize, loVec, symVec, dest);
|
|
|
|
Free(memoryAlloced);
|
|
}
|
|
|
|
ARM_FUNC __attribute__((flatten, noinline, no_reorder)) __attribute__((optimize("-O3"))) static void DeltaDecodeTileNumbers(u16 *tileNumbers, u32 arraySize)
|
|
{
|
|
u32 prevVal = 0;
|
|
u32 reminder = arraySize % 8;
|
|
u16 *dst = tileNumbers + (arraySize - reminder);
|
|
do
|
|
{
|
|
prevVal += *tileNumbers;
|
|
*tileNumbers++ = prevVal;
|
|
|
|
prevVal += *tileNumbers;
|
|
*tileNumbers++ = prevVal;
|
|
|
|
prevVal += *tileNumbers;
|
|
*tileNumbers++ = prevVal;
|
|
|
|
prevVal += *tileNumbers;
|
|
*tileNumbers++ = prevVal;
|
|
|
|
prevVal += *tileNumbers;
|
|
*tileNumbers++ = prevVal;
|
|
|
|
prevVal += *tileNumbers;
|
|
*tileNumbers++ = prevVal;
|
|
|
|
prevVal += *tileNumbers;
|
|
*tileNumbers++ = prevVal;
|
|
|
|
prevVal += *tileNumbers;
|
|
*tileNumbers++ = prevVal;
|
|
} while (tileNumbers != dst);
|
|
|
|
for (u32 i = 0; i < reminder; i++)
|
|
{
|
|
prevVal += *tileNumbers;
|
|
*tileNumbers++ = prevVal;
|
|
}
|
|
}
|
|
|
|
ARM_FUNC __attribute__((no_reorder)) static void SwitchToArmCallDecodeTileNumbers(u16 *tileNumbers, u32 arraySize, void (*decodeFunction)(u16 *tileNumbers, u32 arraySize))
|
|
{
|
|
decodeFunction(tileNumbers, arraySize);
|
|
}
|
|
|
|
static void SmolDecompressTilemap(const struct SmolTilemapHeader *header, const u32 *data, u16 *dest)
|
|
{
|
|
u16 *deltaDest = dest;
|
|
u32 loOffset = header->symSize*2 + 2*(header->symSize % 2);
|
|
u8 *loVec = (u8 *)data;
|
|
loVec = &loVec[loOffset];
|
|
u16 *symVec = (u16 *)data;
|
|
|
|
DecodeInstructionsIwram(header->tileNumberSize, loVec, symVec, dest);
|
|
u32 arraySize = header->tilemapSize/2;
|
|
|
|
u32 funcBuffer[FUNC_BUFFER_SIZE(DeltaDecodeTileNumbers, SwitchToArmCallDecodeTileNumbers)];
|
|
|
|
CopyFuncToIwram(funcBuffer, DeltaDecodeTileNumbers, SwitchToArmCallDecodeTileNumbers);
|
|
SwitchToArmCallDecodeTileNumbers(deltaDest, arraySize, (void *) funcBuffer);
|
|
}
|
|
|
|
// Helper functions for determining modes
|
|
static bool32 isModeLoEncoded(enum CompressionMode mode)
|
|
{
|
|
if (mode == ENCODE_LO
|
|
|| mode == ENCODE_BOTH
|
|
|| mode == ENCODE_BOTH_DELTA_SYMS)
|
|
return TRUE;
|
|
return FALSE;
|
|
}
|
|
|
|
static bool32 isModeSymEncoded(enum CompressionMode mode)
|
|
{
|
|
if (mode == ENCODE_SYMS
|
|
|| mode == ENCODE_DELTA_SYMS
|
|
|| mode == ENCODE_BOTH
|
|
|| mode == ENCODE_BOTH_DELTA_SYMS)
|
|
return TRUE;
|
|
return FALSE;
|
|
}
|
|
|
|
static bool32 isModeSymDelta(enum CompressionMode mode)
|
|
{
|
|
if (mode == ENCODE_DELTA_SYMS
|
|
|| mode == ENCODE_BOTH_DELTA_SYMS)
|
|
return TRUE;
|
|
return FALSE;
|
|
}
|
|
|
|
void LoadSpecialPokePic(void *dest, s32 species, u32 personality, bool8 isFrontPic)
|
|
{
|
|
species = SanitizeSpeciesId(species);
|
|
if (species == SPECIES_UNOWN)
|
|
species = GetUnownSpeciesId(personality);
|
|
|
|
if (isFrontPic)
|
|
{
|
|
#if P_GENDER_DIFFERENCES
|
|
if (gSpeciesInfo[species].frontPicFemale != NULL && IsPersonalityFemale(species, personality))
|
|
DecompressDataWithHeaderWram(gSpeciesInfo[species].frontPicFemale, dest);
|
|
else
|
|
#endif
|
|
if (gSpeciesInfo[species].frontPic != NULL)
|
|
DecompressDataWithHeaderWram(gSpeciesInfo[species].frontPic, dest);
|
|
else
|
|
DecompressDataWithHeaderWram(gSpeciesInfo[SPECIES_NONE].frontPic, dest);
|
|
}
|
|
else
|
|
{
|
|
#if P_GENDER_DIFFERENCES
|
|
if (gSpeciesInfo[species].backPicFemale != NULL && IsPersonalityFemale(species, personality))
|
|
DecompressDataWithHeaderWram(gSpeciesInfo[species].backPicFemale, dest);
|
|
else
|
|
#endif
|
|
if (gSpeciesInfo[species].backPic != NULL)
|
|
DecompressDataWithHeaderWram(gSpeciesInfo[species].backPic, dest);
|
|
else
|
|
DecompressDataWithHeaderWram(gSpeciesInfo[SPECIES_NONE].backPic, dest);
|
|
}
|
|
|
|
if (species == SPECIES_SPINDA && isFrontPic)
|
|
{
|
|
DrawSpindaSpots(personality, dest, FALSE);
|
|
DrawSpindaSpots(personality, dest, TRUE);
|
|
}
|
|
}
|
|
|
|
void Unused_DecompressDataWithHeaderWramIndirect(const void **src, void *dest)
|
|
{
|
|
DecompressDataWithHeaderWram(*src, dest);
|
|
}
|
|
|
|
static void UNUSED StitchObjectsOn8x8Canvas(s32 object_size, s32 object_count, u8 *src_tiles, u8 *dest_tiles)
|
|
{
|
|
/*
|
|
This function appears to emulate behaviour found in the GB(C) versions regarding how the Pokemon images
|
|
are stitched together to be displayed on the battle screen.
|
|
Given "compacted" tiles, an object count and a bounding box/object size, place the tiles in such a way
|
|
that the result will have each object centered in a 8x8 tile canvas.
|
|
*/
|
|
s32 i, j, k, l;
|
|
u8 *src = src_tiles, *dest = dest_tiles;
|
|
u8 bottom_off;
|
|
|
|
if (object_size & 1)
|
|
{
|
|
// Object size is odd
|
|
bottom_off = (object_size >> 1) + 4;
|
|
for (l = 0; l < object_count; l++)
|
|
{
|
|
// Clear all unused rows of tiles plus the half-tile required due to centering
|
|
for (j = 0; j < 8-object_size; j++)
|
|
{
|
|
for (k = 0; k < 8; k++)
|
|
{
|
|
for (i = 0; i < 16; i++)
|
|
{
|
|
if (j % 2 == 0)
|
|
{
|
|
// Clear top half of top tile and bottom half of bottom tile when on even j
|
|
((dest+i) + (k << 5))[((j >> 1) << 8)] = 0;
|
|
((bottom_off << 8) + (dest+i) + (k << 5) + 16)[((j >> 1) << 8)] = 0;
|
|
}
|
|
else
|
|
{
|
|
// Clear bottom half of top tile and top half of tile following bottom tile when on odd j
|
|
((dest+i) + (k << 5) + 16)[((j >> 1) << 8)] = 0;
|
|
((bottom_off << 8) + (dest+i) + (k << 5) + 256)[((j >> 1) << 8)] = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Clear the columns to the left and right that wont be used completely
|
|
// Unlike the previous loops, this will clear the later used space as well
|
|
for (j = 0; j < 2; j++)
|
|
{
|
|
for (i = 0; i < 8; i++)
|
|
{
|
|
for (k = 0; k < 32; k++)
|
|
{
|
|
// Left side
|
|
((dest+k) + (i << 8))[(j << 5)] = 0;
|
|
// Right side
|
|
((dest+k) + (i << 8))[(j << 5)+192] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Skip the top row and first tile on the second row for objects of size 5
|
|
if (object_size == 5) dest += 0x120;
|
|
|
|
// Copy tile data
|
|
for (j = 0; j < object_size; j++)
|
|
{
|
|
for (k = 0; k < object_size; k++)
|
|
{
|
|
for (i = 0; i < 4; i++)
|
|
{
|
|
// Offset the tile by +4px in both x and y directions
|
|
(dest + (i << 2))[18] = (src + (i << 2))[0];
|
|
(dest + (i << 2))[19] = (src + (i << 2))[1];
|
|
(dest + (i << 2))[48] = (src + (i << 2))[2];
|
|
(dest + (i << 2))[49] = (src + (i << 2))[3];
|
|
|
|
(dest + (i << 2))[258] = (src + (i << 2))[16];
|
|
(dest + (i << 2))[259] = (src + (i << 2))[17];
|
|
(dest + (i << 2))[288] = (src + (i << 2))[18];
|
|
(dest + (i << 2))[289] = (src + (i << 2))[19];
|
|
}
|
|
src += 32;
|
|
dest += 32;
|
|
}
|
|
|
|
// At the end of a row, skip enough tiles to get to the beginning of the next row
|
|
if (object_size == 7) dest += 0x20;
|
|
else if (object_size == 5) dest += 0x60;
|
|
}
|
|
|
|
// Skip remaining unused space to go to the beginning of the next object
|
|
if (object_size == 7) dest += 0x100;
|
|
else if (object_size == 5) dest += 0x1e0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Object size is even
|
|
for (i = 0; i < object_count; i++)
|
|
{
|
|
// For objects of size 6, the first and last row and column will be cleared
|
|
// While the remaining space will be filled with actual data
|
|
if (object_size == 6)
|
|
{
|
|
for (k = 0; k < 256; k++)
|
|
{
|
|
*dest = 0;
|
|
dest++;
|
|
}
|
|
}
|
|
|
|
for (j = 0; j < object_size; j++)
|
|
{
|
|
if (object_size == 6)
|
|
{
|
|
for (k = 0; k < 32; k++)
|
|
{
|
|
*dest = 0;
|
|
dest++;
|
|
}
|
|
}
|
|
|
|
// Copy tile data
|
|
for (k = 0; k < 32 * object_size; k++)
|
|
{
|
|
*dest = *src;
|
|
src++;
|
|
dest++;
|
|
}
|
|
|
|
if (object_size == 6)
|
|
{
|
|
for (k = 0; k < 32; k++)
|
|
{
|
|
*dest = 0;
|
|
dest++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (object_size == 6)
|
|
{
|
|
for (k = 0; k < 256; k++)
|
|
{
|
|
*dest = 0;
|
|
dest++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
u32 GetDecompressedDataSize(const u32 *ptr)
|
|
{
|
|
union CompressionHeader *header = (union CompressionHeader *)ptr;
|
|
switch (header->smol.mode)
|
|
{
|
|
case MODE_LZ77:
|
|
return header->lz77.size;
|
|
case IS_TILEMAP:
|
|
return header->smolTilemap.tilemapSize;
|
|
default:
|
|
return header->smol.imageSize*SMOL_IMAGE_SIZE_MULTIPLIER;
|
|
}
|
|
}
|
|
|
|
bool32 IsCompressedData(const u32 *ptr)
|
|
{
|
|
u32 size;
|
|
union CompressionHeader *header = (union CompressionHeader *)ptr;
|
|
switch (header->smol.mode)
|
|
{
|
|
case MODE_LZ77:
|
|
return IsLZ77Data(ptr, TILE_SIZE_4BPP, MAX_DECOMPRESSION_BUFFER_SIZE);
|
|
case BASE_ONLY:
|
|
case ENCODE_SYMS:
|
|
case ENCODE_DELTA_SYMS:
|
|
case ENCODE_LO:
|
|
case ENCODE_BOTH:
|
|
case ENCODE_BOTH_DELTA_SYMS:
|
|
size = GetDecompressedDataSize(ptr);
|
|
if (size % TILE_SIZE_4BPP == 0 && size < MAX_DECOMPRESSION_BUFFER_SIZE)
|
|
return TRUE;
|
|
break;
|
|
case IS_FRAME_CONTAINER:
|
|
// No implemented yet
|
|
case IS_TILEMAP:
|
|
// Has to use another assumption
|
|
default:
|
|
// Is not one of these cases, it's not compressed data
|
|
return FALSE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
bool8 LoadCompressedSpriteSheetUsingHeap(const struct CompressedSpriteSheet *src)
|
|
{
|
|
struct SpriteSheet dest;
|
|
void *buffer;
|
|
|
|
buffer = AllocZeroed(GetDecompressedDataSize(&src->data[0]));
|
|
DecompressDataWithHeaderWram(src->data, buffer);
|
|
|
|
dest.data = buffer;
|
|
dest.size = src->size;
|
|
dest.tag = src->tag;
|
|
|
|
LoadSpriteSheet(&dest);
|
|
Free(buffer);
|
|
return FALSE;
|
|
}
|
|
|
|
extern const u32 LZ77UnCompWRAMOptimized[];
|
|
extern const u32 LZ77UnCompWRAMOptimized_end[];
|
|
|
|
ARM_FUNC static void SwitchToArmCallFastLZ77(const u32 *src, void *dest, void (*funcPtr)(const u32 *src, void *dest))
|
|
{
|
|
funcPtr(src, dest);
|
|
}
|
|
|
|
void FastLZ77UnCompWram(const u32 *src, void *dest)
|
|
{
|
|
u32 funcBuffer[200];
|
|
|
|
CopyFuncToIwram(funcBuffer, LZ77UnCompWRAMOptimized, LZ77UnCompWRAMOptimized_end);
|
|
SwitchToArmCallFastLZ77(src, dest, (void *) funcBuffer);
|
|
}
|