blob: 104afe374a1e822eae9a7f7debb471e8abcf5980 [file] [log] [blame]
/*
* LZMAEncoderNormal
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lzma;
import org.tukaani.xz.lz.LZEncoder;
import org.tukaani.xz.lz.Matches;
import org.tukaani.xz.rangecoder.RangeEncoder;
final class LZMAEncoderNormal extends LZMAEncoder {
private static final int OPTS = 4096;
private static final int EXTRA_SIZE_BEFORE = OPTS;
private static final int EXTRA_SIZE_AFTER = OPTS;
private final Optimum[] opts = new Optimum[OPTS];
private int optCur = 0;
private int optEnd = 0;
private Matches matches;
// These are fields solely to avoid allocating the objects again and
// again on each function call.
private final int[] repLens = new int[REPS];
private final State nextState = new State();
static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) {
return LZEncoder.getMemoryUsage(dictSize,
Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE),
EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf)
+ OPTS * 64 / 1024;
}
LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb,
int dictSize, int extraSizeBefore,
int niceLen, int mf, int depthLimit) {
super(rc, LZEncoder.getInstance(dictSize,
Math.max(extraSizeBefore,
EXTRA_SIZE_BEFORE),
EXTRA_SIZE_AFTER,
niceLen, MATCH_LEN_MAX,
mf, depthLimit),
lc, lp, pb, dictSize, niceLen);
for (int i = 0; i < OPTS; ++i)
opts[i] = new Optimum();
}
public void reset() {
optCur = 0;
optEnd = 0;
super.reset();
}
/**
* Converts the opts array from backward indexes to forward indexes.
* Then it will be simple to get the next symbol from the array
* in later calls to <code>getNextSymbol()</code>.
*/
private int convertOpts() {
optEnd = optCur;
int optPrev = opts[optCur].optPrev;
do {
Optimum opt = opts[optCur];
if (opt.prev1IsLiteral) {
opts[optPrev].optPrev = optCur;
opts[optPrev].backPrev = -1;
optCur = optPrev--;
if (opt.hasPrev2) {
opts[optPrev].optPrev = optPrev + 1;
opts[optPrev].backPrev = opt.backPrev2;
optCur = optPrev;
optPrev = opt.optPrev2;
}
}
int temp = opts[optPrev].optPrev;
opts[optPrev].optPrev = optCur;
optCur = optPrev;
optPrev = temp;
} while (optCur > 0);
optCur = opts[0].optPrev;
back = opts[optCur].backPrev;
return optCur;
}
int getNextSymbol() {
// If there are pending symbols from an earlier call to this
// function, return those symbols first.
if (optCur < optEnd) {
int len = opts[optCur].optPrev - optCur;
optCur = opts[optCur].optPrev;
back = opts[optCur].backPrev;
return len;
}
assert optCur == optEnd;
optCur = 0;
optEnd = 0;
back = -1;
if (readAhead == -1)
matches = getMatches();
// Get the number of bytes available in the dictionary, but
// not more than the maximum match length. If there aren't
// enough bytes remaining to encode a match at all, return
// immediately to encode this byte as a literal.
int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX);
if (avail < MATCH_LEN_MIN)
return 1;
// Get the lengths of repeated matches.
int repBest = 0;
for (int rep = 0; rep < REPS; ++rep) {
repLens[rep] = lz.getMatchLen(reps[rep], avail);
if (repLens[rep] < MATCH_LEN_MIN) {
repLens[rep] = 0;
continue;
}
if (repLens[rep] > repLens[repBest])
repBest = rep;
}
// Return if the best repeated match is at least niceLen bytes long.
if (repLens[repBest] >= niceLen) {
back = repBest;
skip(repLens[repBest] - 1);
return repLens[repBest];
}
// Initialize mainLen and mainDist to the longest match found
// by the match finder.
int mainLen = 0;
int mainDist = 0;
if (matches.count > 0) {
mainLen = matches.len[matches.count - 1];
mainDist = matches.dist[matches.count - 1];
// Return if it is at least niceLen bytes long.
if (mainLen >= niceLen) {
back = mainDist + REPS;
skip(mainLen - 1);
return mainLen;
}
}
int curByte = lz.getByte(0);
int matchByte = lz.getByte(reps[0] + 1);
// If the match finder found no matches and this byte cannot be
// encoded as a repeated match (short or long), we must be return
// to have the byte encoded as a literal.
if (mainLen < MATCH_LEN_MIN && curByte != matchByte
&& repLens[repBest] < MATCH_LEN_MIN)
return 1;
int pos = lz.getPos();
int posState = pos & posMask;
// Calculate the price of encoding the current byte as a literal.
{
int prevByte = lz.getByte(1);
int literalPrice = literalEncoder.getPrice(curByte, matchByte,
prevByte, pos, state);
opts[1].set1(literalPrice, 0, -1);
}
int anyMatchPrice = getAnyMatchPrice(state, posState);
int anyRepPrice = getAnyRepPrice(anyMatchPrice, state);
// If it is possible to encode this byte as a short rep, see if
// it is cheaper than encoding it as a literal.
if (matchByte == curByte) {
int shortRepPrice = getShortRepPrice(anyRepPrice,
state, posState);
if (shortRepPrice < opts[1].price)
opts[1].set1(shortRepPrice, 0, 0);
}
// Return if there is neither normal nor long repeated match. Use
// a short match instead of a literal if is is possible and cheaper.
optEnd = Math.max(mainLen, repLens[repBest]);
if (optEnd < MATCH_LEN_MIN) {
assert optEnd == 0 : optEnd;
back = opts[1].backPrev;
return 1;
}
// Update the lookup tables for distances and lengths before using
// those price calculation functions. (The price function above
// don't need these tables.)
updatePrices();
// Initialize the state and reps of this position in opts[].
// updateOptStateAndReps() will need these to get the new
// state and reps for the next byte.
opts[0].state.set(state);
System.arraycopy(reps, 0, opts[0].reps, 0, REPS);
// Initialize the prices for latter opts that will be used below.
for (int i = optEnd; i >= MATCH_LEN_MIN; --i)
opts[i].reset();
// Calculate the prices of repeated matches of all lengths.
for (int rep = 0; rep < REPS; ++rep) {
int repLen = repLens[rep];
if (repLen < MATCH_LEN_MIN)
continue;
int longRepPrice = getLongRepPrice(anyRepPrice, rep,
state, posState);
do {
int price = longRepPrice + repLenEncoder.getPrice(repLen,
posState);
if (price < opts[repLen].price)
opts[repLen].set1(price, 0, rep);
} while (--repLen >= MATCH_LEN_MIN);
}
// Calculate the prices of normal matches that are longer than rep0.
{
int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN);
if (len <= mainLen) {
int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
state);
// Set i to the index of the shortest match that is
// at least len bytes long.
int i = 0;
while (len > matches.len[i])
++i;
while (true) {
int dist = matches.dist[i];
int price = getMatchAndLenPrice(normalMatchPrice,
dist, len, posState);
if (price < opts[len].price)
opts[len].set1(price, 0, dist + REPS);
if (len == matches.len[i])
if (++i == matches.count)
break;
++len;
}
}
}
avail = Math.min(lz.getAvail(), OPTS - 1);
// Get matches for later bytes and optimize the use of LZMA symbols
// by calculating the prices and picking the cheapest symbol
// combinations.
while (++optCur < optEnd) {
matches = getMatches();
if (matches.count > 0
&& matches.len[matches.count - 1] >= niceLen)
break;
--avail;
++pos;
posState = pos & posMask;
updateOptStateAndReps();
anyMatchPrice = opts[optCur].price
+ getAnyMatchPrice(opts[optCur].state, posState);
anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state);
calc1BytePrices(pos, posState, avail, anyRepPrice);
if (avail >= MATCH_LEN_MIN) {
int startLen = calcLongRepPrices(pos, posState,
avail, anyRepPrice);
if (matches.count > 0)
calcNormalMatchPrices(pos, posState, avail,
anyMatchPrice, startLen);
}
}
return convertOpts();
}
/**
* Updates the state and reps for the current byte in the opts array.
*/
private void updateOptStateAndReps() {
int optPrev = opts[optCur].optPrev;
assert optPrev < optCur;
if (opts[optCur].prev1IsLiteral) {
--optPrev;
if (opts[optCur].hasPrev2) {
opts[optCur].state.set(opts[opts[optCur].optPrev2].state);
if (opts[optCur].backPrev2 < REPS)
opts[optCur].state.updateLongRep();
else
opts[optCur].state.updateMatch();
} else {
opts[optCur].state.set(opts[optPrev].state);
}
opts[optCur].state.updateLiteral();
} else {
opts[optCur].state.set(opts[optPrev].state);
}
if (optPrev == optCur - 1) {
// Must be either a short rep or a literal.
assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1;
if (opts[optCur].backPrev == 0)
opts[optCur].state.updateShortRep();
else
opts[optCur].state.updateLiteral();
System.arraycopy(opts[optPrev].reps, 0,
opts[optCur].reps, 0, REPS);
} else {
int back;
if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) {
optPrev = opts[optCur].optPrev2;
back = opts[optCur].backPrev2;
opts[optCur].state.updateLongRep();
} else {
back = opts[optCur].backPrev;
if (back < REPS)
opts[optCur].state.updateLongRep();
else
opts[optCur].state.updateMatch();
}
if (back < REPS) {
opts[optCur].reps[0] = opts[optPrev].reps[back];
int rep;
for (rep = 1; rep <= back; ++rep)
opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1];
for (; rep < REPS; ++rep)
opts[optCur].reps[rep] = opts[optPrev].reps[rep];
} else {
opts[optCur].reps[0] = back - REPS;
System.arraycopy(opts[optPrev].reps, 0,
opts[optCur].reps, 1, REPS - 1);
}
}
}
/**
* Calculates prices of a literal, a short rep, and literal + rep0.
*/
private void calc1BytePrices(int pos, int posState,
int avail, int anyRepPrice) {
// This will be set to true if using a literal or a short rep.
boolean nextIsByte = false;
int curByte = lz.getByte(0);
int matchByte = lz.getByte(opts[optCur].reps[0] + 1);
// Try a literal.
int literalPrice = opts[optCur].price
+ literalEncoder.getPrice(curByte, matchByte, lz.getByte(1),
pos, opts[optCur].state);
if (literalPrice < opts[optCur + 1].price) {
opts[optCur + 1].set1(literalPrice, optCur, -1);
nextIsByte = true;
}
// Try a short rep.
if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur
|| opts[optCur + 1].backPrev != 0)) {
int shortRepPrice = getShortRepPrice(anyRepPrice,
opts[optCur].state,
posState);
if (shortRepPrice <= opts[optCur + 1].price) {
opts[optCur + 1].set1(shortRepPrice, optCur, 0);
nextIsByte = true;
}
}
// If neither a literal nor a short rep was the cheapest choice,
// try literal + long rep0.
if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) {
int lenLimit = Math.min(niceLen, avail - 1);
int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit);
if (len >= MATCH_LEN_MIN) {
nextState.set(opts[optCur].state);
nextState.updateLiteral();
int nextPosState = (pos + 1) & posMask;
int price = literalPrice
+ getLongRepAndLenPrice(0, len,
nextState, nextPosState);
int i = optCur + 1 + len;
while (optEnd < i)
opts[++optEnd].reset();
if (price < opts[i].price)
opts[i].set2(price, optCur, 0);
}
}
}
/**
* Calculates prices of long rep and long rep + literal + rep0.
*/
private int calcLongRepPrices(int pos, int posState,
int avail, int anyRepPrice) {
int startLen = MATCH_LEN_MIN;
int lenLimit = Math.min(avail, niceLen);
for (int rep = 0; rep < REPS; ++rep) {
int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit);
if (len < MATCH_LEN_MIN)
continue;
while (optEnd < optCur + len)
opts[++optEnd].reset();
int longRepPrice = getLongRepPrice(anyRepPrice, rep,
opts[optCur].state, posState);
for (int i = len; i >= MATCH_LEN_MIN; --i) {
int price = longRepPrice
+ repLenEncoder.getPrice(i, posState);
if (price < opts[optCur + i].price)
opts[optCur + i].set1(price, optCur, rep);
}
if (rep == 0)
startLen = len + 1;
int len2Limit = Math.min(niceLen, avail - len - 1);
int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep],
len2Limit);
if (len2 >= MATCH_LEN_MIN) {
// Rep
int price = longRepPrice
+ repLenEncoder.getPrice(len, posState);
nextState.set(opts[optCur].state);
nextState.updateLongRep();
// Literal
int curByte = lz.getByte(len, 0);
int matchByte = lz.getByte(0); // lz.getByte(len, len)
int prevByte = lz.getByte(len, 1);
price += literalEncoder.getPrice(curByte, matchByte, prevByte,
pos + len, nextState);
nextState.updateLiteral();
// Rep0
int nextPosState = (pos + len + 1) & posMask;
price += getLongRepAndLenPrice(0, len2,
nextState, nextPosState);
int i = optCur + len + 1 + len2;
while (optEnd < i)
opts[++optEnd].reset();
if (price < opts[i].price)
opts[i].set3(price, optCur, rep, len, 0);
}
}
return startLen;
}
/**
* Calculates prices of a normal match and normal match + literal + rep0.
*/
private void calcNormalMatchPrices(int pos, int posState, int avail,
int anyMatchPrice, int startLen) {
// If the longest match is so long that it would not fit into
// the opts array, shorten the matches.
if (matches.len[matches.count - 1] > avail) {
matches.count = 0;
while (matches.len[matches.count] < avail)
++matches.count;
matches.len[matches.count++] = avail;
}
if (matches.len[matches.count - 1] < startLen)
return;
while (optEnd < optCur + matches.len[matches.count - 1])
opts[++optEnd].reset();
int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
opts[optCur].state);
int match = 0;
while (startLen > matches.len[match])
++match;
for (int len = startLen; ; ++len) {
int dist = matches.dist[match];
// Calculate the price of a match of len bytes from the nearest
// possible distance.
int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice,
dist, len, posState);
if (matchAndLenPrice < opts[optCur + len].price)
opts[optCur + len].set1(matchAndLenPrice,
optCur, dist + REPS);
if (len != matches.len[match])
continue;
// Try match + literal + rep0. First get the length of the rep0.
int len2Limit = Math.min(niceLen, avail - len - 1);
int len2 = lz.getMatchLen(len + 1, dist, len2Limit);
if (len2 >= MATCH_LEN_MIN) {
nextState.set(opts[optCur].state);
nextState.updateMatch();
// Literal
int curByte = lz.getByte(len, 0);
int matchByte = lz.getByte(0); // lz.getByte(len, len)
int prevByte = lz.getByte(len, 1);
int price = matchAndLenPrice
+ literalEncoder.getPrice(curByte, matchByte,
prevByte, pos + len,
nextState);
nextState.updateLiteral();
// Rep0
int nextPosState = (pos + len + 1) & posMask;
price += getLongRepAndLenPrice(0, len2,
nextState, nextPosState);
int i = optCur + len + 1 + len2;
while (optEnd < i)
opts[++optEnd].reset();
if (price < opts[i].price)
opts[i].set3(price, optCur, dist + REPS, len, 0);
}
if (++match == matches.count)
break;
}
}
}