/* LzmaSpec.c -- LZMA Reference Decoder | |
2013-07-28 : Igor Pavlov : Public domain */ | |
// This code implements LZMA file decoding according to LZMA specification. | |
// This code is not optimized for speed. | |
#include <stdio.h> | |
#ifdef _MSC_VER | |
#pragma warning(disable : 4710) // function not inlined | |
#pragma warning(disable : 4996) // This function or variable may be unsafe | |
#endif | |
typedef unsigned char Byte; | |
typedef unsigned short UInt16; | |
#ifdef _LZMA_UINT32_IS_ULONG | |
typedef unsigned long UInt32; | |
#else | |
typedef unsigned int UInt32; | |
#endif | |
#if defined(_MSC_VER) || defined(__BORLANDC__) | |
typedef unsigned __int64 UInt64; | |
#else | |
typedef unsigned long long int UInt64; | |
#endif | |
struct CInputStream | |
{ | |
FILE *File; | |
UInt64 Processed; | |
void Init() { Processed = 0; } | |
Byte ReadByte() | |
{ | |
int c = getc(File); | |
if (c < 0) | |
throw "Unexpected end of file"; | |
Processed++; | |
return (Byte)c; | |
} | |
}; | |
struct COutStream | |
{ | |
FILE *File; | |
UInt64 Processed; | |
void Init() { Processed = 0; } | |
void WriteByte(Byte b) | |
{ | |
if (putc(b, File) == EOF) | |
throw "File writing error"; | |
Processed++; | |
} | |
}; | |
class COutWindow | |
{ | |
Byte *Buf; | |
UInt32 Pos; | |
UInt32 Size; | |
bool IsFull; | |
public: | |
unsigned TotalPos; | |
COutStream OutStream; | |
COutWindow(): Buf(NULL) {} | |
~COutWindow() { delete []Buf; } | |
void Create(UInt32 dictSize) | |
{ | |
Buf = new Byte[dictSize]; | |
Pos = 0; | |
Size = dictSize; | |
IsFull = false; | |
TotalPos = 0; | |
} | |
void PutByte(Byte b) | |
{ | |
TotalPos++; | |
Buf[Pos++] = b; | |
if (Pos == Size) | |
{ | |
Pos = 0; | |
IsFull = true; | |
} | |
OutStream.WriteByte(b); | |
} | |
Byte GetByte(UInt32 dist) const | |
{ | |
return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos]; | |
} | |
void CopyMatch(UInt32 dist, unsigned len) | |
{ | |
for (; len > 0; len--) | |
PutByte(GetByte(dist)); | |
} | |
bool CheckDistance(UInt32 dist) const | |
{ | |
return dist <= Pos || IsFull; | |
} | |
bool IsEmpty() const | |
{ | |
return Pos == 0 && !IsFull; | |
} | |
}; | |
#define kNumBitModelTotalBits 11 | |
#define kNumMoveBits 5 | |
typedef UInt16 CProb; | |
#define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2) | |
#define INIT_PROBS(p) \ | |
{ for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; } | |
class CRangeDecoder | |
{ | |
UInt32 Range; | |
UInt32 Code; | |
void Normalize(); | |
public: | |
CInputStream *InStream; | |
bool Corrupted; | |
void Init(); | |
bool IsFinishedOK() const { return Code == 0; } | |
UInt32 DecodeDirectBits(unsigned numBits); | |
unsigned DecodeBit(CProb *prob); | |
}; | |
void CRangeDecoder::Init() | |
{ | |
Corrupted = false; | |
if (InStream->ReadByte() != 0) | |
Corrupted = true; | |
Range = 0xFFFFFFFF; | |
Code = 0; | |
for (int i = 0; i < 4; i++) | |
Code = (Code << 8) | InStream->ReadByte(); | |
if (Code == Range) | |
Corrupted = true; | |
} | |
#define kTopValue ((UInt32)1 << 24) | |
void CRangeDecoder::Normalize() | |
{ | |
if (Range < kTopValue) | |
{ | |
Range <<= 8; | |
Code = (Code << 8) | InStream->ReadByte(); | |
} | |
} | |
UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits) | |
{ | |
UInt32 res = 0; | |
do | |
{ | |
Range >>= 1; | |
Code -= Range; | |
UInt32 t = 0 - ((UInt32)Code >> 31); | |
Code += Range & t; | |
if (Code == Range) | |
Corrupted = true; | |
Normalize(); | |
res <<= 1; | |
res += t + 1; | |
} | |
while (--numBits); | |
return res; | |
} | |
unsigned CRangeDecoder::DecodeBit(CProb *prob) | |
{ | |
unsigned v = *prob; | |
UInt32 bound = (Range >> kNumBitModelTotalBits) * v; | |
unsigned symbol; | |
if (Code < bound) | |
{ | |
v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits; | |
Range = bound; | |
symbol = 0; | |
} | |
else | |
{ | |
v -= v >> kNumMoveBits; | |
Code -= bound; | |
Range -= bound; | |
symbol = 1; | |
} | |
*prob = (CProb)v; | |
Normalize(); | |
return symbol; | |
} | |
unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc) | |
{ | |
unsigned m = 1; | |
unsigned symbol = 0; | |
for (unsigned i = 0; i < numBits; i++) | |
{ | |
unsigned bit = rc->DecodeBit(&probs[m]); | |
m <<= 1; | |
m += bit; | |
symbol |= (bit << i); | |
} | |
return symbol; | |
} | |
template <unsigned NumBits> | |
class CBitTreeDecoder | |
{ | |
CProb Probs[(unsigned)1 << NumBits]; | |
public: | |
void Init() | |
{ | |
INIT_PROBS(Probs); | |
} | |
unsigned Decode(CRangeDecoder *rc) | |
{ | |
unsigned m = 1; | |
for (unsigned i = 0; i < NumBits; i++) | |
m = (m << 1) + rc->DecodeBit(&Probs[m]); | |
return m - ((unsigned)1 << NumBits); | |
} | |
unsigned ReverseDecode(CRangeDecoder *rc) | |
{ | |
return BitTreeReverseDecode(Probs, NumBits, rc); | |
} | |
}; | |
#define kNumPosBitsMax 4 | |
#define kNumStates 12 | |
#define kNumLenToPosStates 4 | |
#define kNumAlignBits 4 | |
#define kStartPosModelIndex 4 | |
#define kEndPosModelIndex 14 | |
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) | |
#define kMatchMinLen 2 | |
class CLenDecoder | |
{ | |
CProb Choice; | |
CProb Choice2; | |
CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax]; | |
CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax]; | |
CBitTreeDecoder<8> HighCoder; | |
public: | |
void Init() | |
{ | |
Choice = PROB_INIT_VAL; | |
Choice2 = PROB_INIT_VAL; | |
HighCoder.Init(); | |
for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++) | |
{ | |
LowCoder[i].Init(); | |
MidCoder[i].Init(); | |
} | |
} | |
unsigned Decode(CRangeDecoder *rc, unsigned posState) | |
{ | |
if (rc->DecodeBit(&Choice) == 0) | |
return LowCoder[posState].Decode(rc); | |
if (rc->DecodeBit(&Choice2) == 0) | |
return 8 + MidCoder[posState].Decode(rc); | |
return 16 + HighCoder.Decode(rc); | |
} | |
}; | |
unsigned UpdateState_Literal(unsigned state) | |
{ | |
if (state < 4) return 0; | |
else if (state < 10) return state - 3; | |
else return state - 6; | |
} | |
unsigned UpdateState_Match (unsigned state) { return state < 7 ? 7 : 10; } | |
unsigned UpdateState_Rep (unsigned state) { return state < 7 ? 8 : 11; } | |
unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; } | |
#define LZMA_DIC_MIN (1 << 12) | |
class CLzmaDecoder | |
{ | |
public: | |
CRangeDecoder RangeDec; | |
COutWindow OutWindow; | |
bool markerIsMandatory; | |
unsigned lc, pb, lp; | |
UInt32 dictSize; | |
UInt32 dictSizeInProperties; | |
void DecodeProperties(const Byte *properties) | |
{ | |
unsigned d = properties[0]; | |
if (d >= (9 * 5 * 5)) | |
throw "Incorrect LZMA properties"; | |
lc = d % 9; | |
d /= 9; | |
pb = d / 5; | |
lp = d % 5; | |
dictSizeInProperties = 0; | |
for (int i = 0; i < 4; i++) | |
dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i); | |
dictSize = dictSizeInProperties; | |
if (dictSize < LZMA_DIC_MIN) | |
dictSize = LZMA_DIC_MIN; | |
} | |
CLzmaDecoder(): LitProbs(NULL) {} | |
~CLzmaDecoder() { delete []LitProbs; } | |
void Create() | |
{ | |
OutWindow.Create(dictSize); | |
CreateLiterals(); | |
} | |
int Decode(bool unpackSizeDefined, UInt64 unpackSize); | |
private: | |
CProb *LitProbs; | |
void CreateLiterals() | |
{ | |
LitProbs = new CProb[(UInt32)0x300 << (lc + lp)]; | |
} | |
void InitLiterals() | |
{ | |
UInt32 num = (UInt32)0x300 << (lc + lp); | |
for (UInt32 i = 0; i < num; i++) | |
LitProbs[i] = PROB_INIT_VAL; | |
} | |
void DecodeLiteral(unsigned state, UInt32 rep0) | |
{ | |
unsigned prevByte = 0; | |
if (!OutWindow.IsEmpty()) | |
prevByte = OutWindow.GetByte(1); | |
unsigned symbol = 1; | |
unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc)); | |
CProb *probs = &LitProbs[(UInt32)0x300 * litState]; | |
if (state >= 7) | |
{ | |
unsigned matchByte = OutWindow.GetByte(rep0 + 1); | |
do | |
{ | |
unsigned matchBit = (matchByte >> 7) & 1; | |
matchByte <<= 1; | |
unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]); | |
symbol = (symbol << 1) | bit; | |
if (matchBit != bit) | |
break; | |
} | |
while (symbol < 0x100); | |
} | |
while (symbol < 0x100) | |
symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]); | |
OutWindow.PutByte((Byte)(symbol - 0x100)); | |
} | |
CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates]; | |
CBitTreeDecoder<kNumAlignBits> AlignDecoder; | |
CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex]; | |
void InitDist() | |
{ | |
for (unsigned i = 0; i < kNumLenToPosStates; i++) | |
PosSlotDecoder[i].Init(); | |
AlignDecoder.Init(); | |
INIT_PROBS(PosDecoders); | |
} | |
unsigned DecodeDistance(unsigned len) | |
{ | |
unsigned lenState = len; | |
if (lenState > kNumLenToPosStates - 1) | |
lenState = kNumLenToPosStates - 1; | |
unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec); | |
if (posSlot < 4) | |
return posSlot; | |
unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1); | |
UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits); | |
if (posSlot < kEndPosModelIndex) | |
dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec); | |
else | |
{ | |
dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits; | |
dist += AlignDecoder.ReverseDecode(&RangeDec); | |
} | |
return dist; | |
} | |
CProb IsMatch[kNumStates << kNumPosBitsMax]; | |
CProb IsRep[kNumStates]; | |
CProb IsRepG0[kNumStates]; | |
CProb IsRepG1[kNumStates]; | |
CProb IsRepG2[kNumStates]; | |
CProb IsRep0Long[kNumStates << kNumPosBitsMax]; | |
CLenDecoder LenDecoder; | |
CLenDecoder RepLenDecoder; | |
void Init() | |
{ | |
InitLiterals(); | |
InitDist(); | |
INIT_PROBS(IsMatch); | |
INIT_PROBS(IsRep); | |
INIT_PROBS(IsRepG0); | |
INIT_PROBS(IsRepG1); | |
INIT_PROBS(IsRepG2); | |
INIT_PROBS(IsRep0Long); | |
LenDecoder.Init(); | |
RepLenDecoder.Init(); | |
} | |
}; | |
#define LZMA_RES_ERROR 0 | |
#define LZMA_RES_FINISHED_WITH_MARKER 1 | |
#define LZMA_RES_FINISHED_WITHOUT_MARKER 2 | |
int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize) | |
{ | |
Init(); | |
RangeDec.Init(); | |
UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0; | |
unsigned state = 0; | |
for (;;) | |
{ | |
if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory) | |
if (RangeDec.IsFinishedOK()) | |
return LZMA_RES_FINISHED_WITHOUT_MARKER; | |
unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1); | |
if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0) | |
{ | |
if (unpackSizeDefined && unpackSize == 0) | |
return LZMA_RES_ERROR; | |
DecodeLiteral(state, rep0); | |
state = UpdateState_Literal(state); | |
unpackSize--; | |
continue; | |
} | |
unsigned len; | |
if (RangeDec.DecodeBit(&IsRep[state]) != 0) | |
{ | |
if (unpackSizeDefined && unpackSize == 0) | |
return LZMA_RES_ERROR; | |
if (OutWindow.IsEmpty()) | |
return LZMA_RES_ERROR; | |
if (RangeDec.DecodeBit(&IsRepG0[state]) == 0) | |
{ | |
if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0) | |
{ | |
state = UpdateState_ShortRep(state); | |
OutWindow.PutByte(OutWindow.GetByte(rep0 + 1)); | |
unpackSize--; | |
continue; | |
} | |
} | |
else | |
{ | |
UInt32 dist; | |
if (RangeDec.DecodeBit(&IsRepG1[state]) == 0) | |
dist = rep1; | |
else | |
{ | |
if (RangeDec.DecodeBit(&IsRepG2[state]) == 0) | |
dist = rep2; | |
else | |
{ | |
dist = rep3; | |
rep3 = rep2; | |
} | |
rep2 = rep1; | |
} | |
rep1 = rep0; | |
rep0 = dist; | |
} | |
len = RepLenDecoder.Decode(&RangeDec, posState); | |
state = UpdateState_Rep(state); | |
} | |
else | |
{ | |
rep3 = rep2; | |
rep2 = rep1; | |
rep1 = rep0; | |
len = LenDecoder.Decode(&RangeDec, posState); | |
state = UpdateState_Match(state); | |
rep0 = DecodeDistance(len); | |
if (rep0 == 0xFFFFFFFF) | |
return RangeDec.IsFinishedOK() ? | |
LZMA_RES_FINISHED_WITH_MARKER : | |
LZMA_RES_ERROR; | |
if (unpackSizeDefined && unpackSize == 0) | |
return LZMA_RES_ERROR; | |
if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0)) | |
return LZMA_RES_ERROR; | |
} | |
len += kMatchMinLen; | |
bool isError = false; | |
if (unpackSizeDefined && unpackSize < len) | |
{ | |
len = (unsigned)unpackSize; | |
isError = true; | |
} | |
OutWindow.CopyMatch(rep0 + 1, len); | |
unpackSize -= len; | |
if (isError) | |
return LZMA_RES_ERROR; | |
} | |
} | |
static void Print(const char *s) | |
{ | |
fputs(s, stdout); | |
} | |
static void PrintError(const char *s) | |
{ | |
fputs(s, stderr); | |
} | |
#define CONVERT_INT_TO_STR(charType, tempSize) \ | |
void ConvertUInt64ToString(UInt64 val, char *s) | |
{ | |
char temp[32]; | |
unsigned i = 0; | |
while (val >= 10) | |
{ | |
temp[i++] = (char)('0' + (unsigned)(val % 10)); | |
val /= 10; | |
} | |
*s++ = (char)('0' + (unsigned)val); | |
while (i != 0) | |
{ | |
i--; | |
*s++ = temp[i]; | |
} | |
*s = 0; | |
} | |
void PrintUInt64(const char *title, UInt64 v) | |
{ | |
Print(title); | |
Print(" : "); | |
char s[32]; | |
ConvertUInt64ToString(v, s); | |
Print(s); | |
Print(" bytes \n"); | |
} | |
int main2(int numArgs, const char *args[]) | |
{ | |
Print("\nLZMA Reference Decoder 9.31 : Igor Pavlov : Public domain : 2013-02-06\n"); | |
if (numArgs == 1) | |
Print("\nUse: lzmaSpec a.lzma outFile"); | |
if (numArgs != 3) | |
throw "you must specify two parameters"; | |
CInputStream inStream; | |
inStream.File = fopen(args[1], "rb"); | |
inStream.Init(); | |
if (inStream.File == 0) | |
throw "Can't open input file"; | |
CLzmaDecoder lzmaDecoder; | |
lzmaDecoder.OutWindow.OutStream.File = fopen(args[2], "wb+"); | |
lzmaDecoder.OutWindow.OutStream.Init(); | |
if (inStream.File == 0) | |
throw "Can't open output file"; | |
Byte header[13]; | |
int i; | |
for (i = 0; i < 13; i++) | |
header[i] = inStream.ReadByte(); | |
lzmaDecoder.DecodeProperties(header); | |
printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb); | |
printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties); | |
printf("\nDictionary Size for decoding = %u", lzmaDecoder.dictSize); | |
UInt64 unpackSize = 0; | |
bool unpackSizeDefined = false; | |
for (i = 0; i < 8; i++) | |
{ | |
Byte b = header[5 + i]; | |
if (b != 0xFF) | |
unpackSizeDefined = true; | |
unpackSize |= (UInt64)b << (8 * i); | |
} | |
lzmaDecoder.markerIsMandatory = !unpackSizeDefined; | |
Print("\n"); | |
if (unpackSizeDefined) | |
PrintUInt64("Uncompressed Size", unpackSize); | |
else | |
Print("End marker is expected\n"); | |
lzmaDecoder.RangeDec.InStream = &inStream; | |
Print("\n"); | |
lzmaDecoder.Create(); | |
// we support the streams that have uncompressed size and marker. | |
int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize); | |
PrintUInt64("Read ", inStream.Processed); | |
PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed); | |
if (res == LZMA_RES_ERROR) | |
throw "LZMA decoding error"; | |
else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER) | |
Print("Finished without end marker"); | |
else if (res == LZMA_RES_FINISHED_WITH_MARKER) | |
{ | |
if (unpackSizeDefined) | |
{ | |
if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize) | |
throw "Finished with end marker before than specified size"; | |
Print("Warning: "); | |
} | |
Print("Finished with end marker"); | |
} | |
else | |
throw "Internal Error"; | |
Print("\n"); | |
if (lzmaDecoder.RangeDec.Corrupted) | |
{ | |
Print("\nWarning: LZMA stream is corrupted\n"); | |
} | |
return 0; | |
} | |
int | |
#ifdef _MSC_VER | |
__cdecl | |
#endif | |
main(int numArgs, const char *args[]) | |
{ | |
try { return main2(numArgs, args); } | |
catch (const char *s) | |
{ | |
PrintError("\nError:\n"); | |
PrintError(s); | |
PrintError("\n"); | |
return 1; | |
} | |
catch(...) | |
{ | |
PrintError("\nError\n"); | |
return 1; | |
} | |
} |