| /* |
| * LZEncoder |
| * |
| * Authors: Lasse Collin <lasse.collin@tukaani.org> |
| * Igor Pavlov <http://7-zip.org/> |
| * |
| * This file has been put into the public domain. |
| * You can do whatever you want with this file. |
| */ |
| |
| package org.tukaani.xz.lz; |
| |
| import java.io.OutputStream; |
| import java.io.IOException; |
| |
| public abstract class LZEncoder { |
| public static final int MF_HC4 = 0x04; |
| public static final int MF_BT4 = 0x14; |
| |
| /** |
| * Number of bytes to keep available before the current byte |
| * when moving the LZ window. |
| */ |
| private final int keepSizeBefore; |
| |
| /** |
| * Number of bytes that must be available, the current byte included, |
| * to make hasEnoughData return true. Flushing and finishing are |
| * naturally exceptions to this since there cannot be any data after |
| * the end of the uncompressed input. |
| */ |
| private final int keepSizeAfter; |
| |
| final int matchLenMax; |
| final int niceLen; |
| |
| final byte[] buf; |
| |
| int readPos = -1; |
| private int readLimit = -1; |
| private boolean finishing = false; |
| private int writePos = 0; |
| private int pendingSize = 0; |
| |
| static void normalize(int[] positions, int normalizationOffset) { |
| for (int i = 0; i < positions.length; ++i) { |
| if (positions[i] <= normalizationOffset) |
| positions[i] = 0; |
| else |
| positions[i] -= normalizationOffset; |
| } |
| } |
| |
| /** |
| * Gets the size of the LZ window buffer that needs to be allocated. |
| */ |
| private static int getBufSize( |
| int dictSize, int extraSizeBefore, int extraSizeAfter, |
| int matchLenMax) { |
| int keepSizeBefore = extraSizeBefore + dictSize; |
| int keepSizeAfter = extraSizeAfter + matchLenMax; |
| int reserveSize = Math.min(dictSize / 2 + (256 << 10), 512 << 20); |
| return keepSizeBefore + keepSizeAfter + reserveSize; |
| } |
| |
| /** |
| * Gets approximate memory usage of the LZEncoder base structure and |
| * the match finder as kibibytes. |
| */ |
| public static int getMemoryUsage( |
| int dictSize, int extraSizeBefore, int extraSizeAfter, |
| int matchLenMax, int mf) { |
| // Buffer size + a little extra |
| int m = getBufSize(dictSize, extraSizeBefore, extraSizeAfter, |
| matchLenMax) / 1024 + 10; |
| |
| switch (mf) { |
| case MF_HC4: |
| m += HC4.getMemoryUsage(dictSize); |
| break; |
| |
| case MF_BT4: |
| m += BT4.getMemoryUsage(dictSize); |
| break; |
| |
| default: |
| throw new IllegalArgumentException(); |
| } |
| |
| return m; |
| } |
| |
| /** |
| * Creates a new LZEncoder. |
| * <p> |
| * @param dictSize dictionary size |
| * |
| * @param extraSizeBefore |
| * number of bytes to keep available in the |
| * history in addition to dictSize |
| * |
| * @param extraSizeAfter |
| * number of bytes that must be available |
| * after current position + matchLenMax |
| * |
| * @param niceLen if a match of at least <code>niceLen</code> |
| * bytes is found, be happy with it and don't |
| * stop looking for longer matches |
| * |
| * @param matchLenMax don't test for matches longer than |
| * <code>matchLenMax</code> bytes |
| * |
| * @param mf match finder ID |
| * |
| * @param depthLimit match finder search depth limit |
| */ |
| public static LZEncoder getInstance( |
| int dictSize, int extraSizeBefore, int extraSizeAfter, |
| int niceLen, int matchLenMax, int mf, int depthLimit) { |
| switch (mf) { |
| case MF_HC4: |
| return new HC4(dictSize, extraSizeBefore, extraSizeAfter, |
| niceLen, matchLenMax, depthLimit); |
| |
| case MF_BT4: |
| return new BT4(dictSize, extraSizeBefore, extraSizeAfter, |
| niceLen, matchLenMax, depthLimit); |
| } |
| |
| throw new IllegalArgumentException(); |
| } |
| |
| /** |
| * Creates a new LZEncoder. See <code>getInstance</code>. |
| */ |
| LZEncoder(int dictSize, int extraSizeBefore, int extraSizeAfter, |
| int niceLen, int matchLenMax) { |
| buf = new byte[getBufSize(dictSize, extraSizeBefore, extraSizeAfter, |
| matchLenMax)]; |
| |
| keepSizeBefore = extraSizeBefore + dictSize; |
| keepSizeAfter = extraSizeAfter + matchLenMax; |
| |
| this.matchLenMax = matchLenMax; |
| this.niceLen = niceLen; |
| } |
| |
| /** |
| * Sets a preset dictionary. If a preset dictionary is wanted, this |
| * function must be called immediately after creating the LZEncoder |
| * before any data has been encoded. |
| */ |
| public void setPresetDict(int dictSize, byte[] presetDict) { |
| assert !isStarted(); |
| assert writePos == 0; |
| |
| if (presetDict != null) { |
| // If the preset dictionary buffer is bigger than the dictionary |
| // size, copy only the tail of the preset dictionary. |
| int copySize = Math.min(presetDict.length, dictSize); |
| int offset = presetDict.length - copySize; |
| System.arraycopy(presetDict, offset, buf, 0, copySize); |
| writePos += copySize; |
| skip(copySize); |
| } |
| } |
| |
| /** |
| * Moves data from the end of the buffer to the beginning, discarding |
| * old data and making space for new input. |
| */ |
| private void moveWindow() { |
| // Align the move to a multiple of 16 bytes. LZMA2 needs this |
| // because it uses the lowest bits from readPos to get the |
| // alignment of the uncompressed data. |
| int moveOffset = (readPos + 1 - keepSizeBefore) & ~15; |
| int moveSize = writePos - moveOffset; |
| System.arraycopy(buf, moveOffset, buf, 0, moveSize); |
| |
| readPos -= moveOffset; |
| readLimit -= moveOffset; |
| writePos -= moveOffset; |
| } |
| |
| /** |
| * Copies new data into the LZEncoder's buffer. |
| */ |
| public int fillWindow(byte[] in, int off, int len) { |
| assert !finishing; |
| |
| // Move the sliding window if needed. |
| if (readPos >= buf.length - keepSizeAfter) |
| moveWindow(); |
| |
| // Try to fill the dictionary buffer. If it becomes full, |
| // some of the input bytes may be left unused. |
| if (len > buf.length - writePos) |
| len = buf.length - writePos; |
| |
| System.arraycopy(in, off, buf, writePos, len); |
| writePos += len; |
| |
| // Set the new readLimit but only if there's enough data to allow |
| // encoding of at least one more byte. |
| if (writePos >= keepSizeAfter) |
| readLimit = writePos - keepSizeAfter; |
| |
| processPendingBytes(); |
| |
| // Tell the caller how much input we actually copied into |
| // the dictionary. |
| return len; |
| } |
| |
| /** |
| * Process pending bytes remaining from preset dictionary initialization |
| * or encoder flush operation. |
| */ |
| private void processPendingBytes() { |
| // After flushing or setting a preset dictionary there will be |
| // pending data that hasn't been ran through the match finder yet. |
| // Run it through the match finder now if there is enough new data |
| // available (readPos < readLimit) that the encoder may encode at |
| // least one more input byte. This way we don't waste any time |
| // looping in the match finder (and marking the same bytes as |
| // pending again) if the application provides very little new data |
| // per write call. |
| if (pendingSize > 0 && readPos < readLimit) { |
| readPos -= pendingSize; |
| int oldPendingSize = pendingSize; |
| pendingSize = 0; |
| skip(oldPendingSize); |
| assert pendingSize < oldPendingSize; |
| } |
| } |
| |
| /** |
| * Returns true if at least one byte has already been run through |
| * the match finder. |
| */ |
| public boolean isStarted() { |
| return readPos != -1; |
| } |
| |
| /** |
| * Marks that all the input needs to be made available in |
| * the encoded output. |
| */ |
| public void setFlushing() { |
| readLimit = writePos - 1; |
| processPendingBytes(); |
| } |
| |
| /** |
| * Marks that there is no more input remaining. The read position |
| * can be advanced until the end of the data. |
| */ |
| public void setFinishing() { |
| readLimit = writePos - 1; |
| finishing = true; |
| processPendingBytes(); |
| } |
| |
| /** |
| * Tests if there is enough input available to let the caller encode |
| * at least one more byte. |
| */ |
| public boolean hasEnoughData(int alreadyReadLen) { |
| return readPos - alreadyReadLen < readLimit; |
| } |
| |
| public void copyUncompressed(OutputStream out, int backward, int len) |
| throws IOException { |
| out.write(buf, readPos + 1 - backward, len); |
| } |
| |
| /** |
| * Get the number of bytes available, including the current byte. |
| * <p> |
| * Note that the result is undefined if <code>getMatches</code> or |
| * <code>skip</code> hasn't been called yet and no preset dictionary |
| * is being used. |
| */ |
| public int getAvail() { |
| assert isStarted(); |
| return writePos - readPos; |
| } |
| |
| /** |
| * Gets the lowest four bits of the absolute offset of the current byte. |
| * Bits other than the lowest four are undefined. |
| */ |
| public int getPos() { |
| return readPos; |
| } |
| |
| /** |
| * Gets the byte from the given backward offset. |
| * <p> |
| * The current byte is at <code>0</code>, the previous byte |
| * at <code>1</code> etc. To get a byte at zero-based distance, |
| * use <code>getByte(dist + 1)<code>. |
| * <p> |
| * This function is equivalent to <code>getByte(0, backward)</code>. |
| */ |
| public int getByte(int backward) { |
| return buf[readPos - backward] & 0xFF; |
| } |
| |
| /** |
| * Gets the byte from the given forward minus backward offset. |
| * The forward offset is added to the current position. This lets |
| * one read bytes ahead of the current byte. |
| */ |
| public int getByte(int forward, int backward) { |
| return buf[readPos + forward - backward] & 0xFF; |
| } |
| |
| /** |
| * Get the length of a match at the given distance. |
| * |
| * @param dist zero-based distance of the match to test |
| * @param lenLimit don't test for a match longer than this |
| * |
| * @return length of the match; it is in the range [0, lenLimit] |
| */ |
| public int getMatchLen(int dist, int lenLimit) { |
| int backPos = readPos - dist - 1; |
| int len = 0; |
| |
| while (len < lenLimit && buf[readPos + len] == buf[backPos + len]) |
| ++len; |
| |
| return len; |
| } |
| |
| /** |
| * Get the length of a match at the given distance and forward offset. |
| * |
| * @param forward forward offset |
| * @param dist zero-based distance of the match to test |
| * @param lenLimit don't test for a match longer than this |
| * |
| * @return length of the match; it is in the range [0, lenLimit] |
| */ |
| public int getMatchLen(int forward, int dist, int lenLimit) { |
| int curPos = readPos + forward; |
| int backPos = curPos - dist - 1; |
| int len = 0; |
| |
| while (len < lenLimit && buf[curPos + len] == buf[backPos + len]) |
| ++len; |
| |
| return len; |
| } |
| |
| /** |
| * Verifies that the matches returned by the match finder are valid. |
| * This is meant to be used in an assert statement. This is totally |
| * useless for actual encoding since match finder's results should |
| * naturally always be valid if it isn't broken. |
| * |
| * @param matches return value from <code>getMatches</code> |
| * |
| * @return true if matches are valid, false if match finder is broken |
| */ |
| public boolean verifyMatches(Matches matches) { |
| int lenLimit = Math.min(getAvail(), matchLenMax); |
| |
| for (int i = 0; i < matches.count; ++i) |
| if (getMatchLen(matches.dist[i], lenLimit) != matches.len[i]) |
| return false; |
| |
| return true; |
| } |
| |
| /** |
| * Moves to the next byte, checks if there is enough input available, |
| * and returns the amount of input available. |
| * |
| * @param requiredForFlushing |
| * minimum number of available bytes when |
| * flushing; encoding may be continued with |
| * new input after flushing |
| * @param requiredForFinishing |
| * minimum number of available bytes when |
| * finishing; encoding must not be continued |
| * after finishing or the match finder state |
| * may be corrupt |
| * |
| * @return the number of bytes available or zero if there |
| * is not enough input available |
| */ |
| int movePos(int requiredForFlushing, int requiredForFinishing) { |
| assert requiredForFlushing >= requiredForFinishing; |
| |
| ++readPos; |
| int avail = writePos - readPos; |
| |
| if (avail < requiredForFlushing) { |
| if (avail < requiredForFinishing || !finishing) { |
| ++pendingSize; |
| avail = 0; |
| } |
| } |
| |
| return avail; |
| } |
| |
| /** |
| * Runs match finder for the next byte and returns the matches found. |
| */ |
| public abstract Matches getMatches(); |
| |
| /** |
| * Skips the given number of bytes in the match finder. |
| */ |
| public abstract void skip(int len); |
| } |