| /* |
| * LZMA2Options |
| * |
| * Author: Lasse Collin <lasse.collin@tukaani.org> |
| * |
| * This file has been put into the public domain. |
| * You can do whatever you want with this file. |
| */ |
| |
| package org.tukaani.xz; |
| |
| import java.io.InputStream; |
| import java.io.IOException; |
| import org.tukaani.xz.lz.LZEncoder; |
| import org.tukaani.xz.lzma.LZMAEncoder; |
| |
| /** |
| * LZMA2 compression options. |
| * <p> |
| * While this allows setting the LZMA2 compression options in detail, |
| * often you only need <code>LZMA2Options()</code> or |
| * <code>LZMA2Options(int)</code>. |
| */ |
| public class LZMA2Options extends FilterOptions { |
| /** |
| * Minimum valid compression preset level is 0. |
| */ |
| public static final int PRESET_MIN = 0; |
| |
| /** |
| * Maximum valid compression preset level is 9. |
| */ |
| public static final int PRESET_MAX = 9; |
| |
| /** |
| * Default compression preset level is 6. |
| */ |
| public static final int PRESET_DEFAULT = 6; |
| |
| /** |
| * Minimum dictionary size is 4 KiB. |
| */ |
| public static final int DICT_SIZE_MIN = 4096; |
| |
| /** |
| * Maximum dictionary size for compression is 768 MiB. |
| * <p> |
| * The decompressor supports bigger dictionaries, up to almost 2 GiB. |
| * With HC4 the encoder would support dictionaries bigger than 768 MiB. |
| * The 768 MiB limit comes from the current implementation of BT4 where |
| * we would otherwise hit the limits of signed ints in array indexing. |
| * <p> |
| * If you really need bigger dictionary for decompression, |
| * use {@link LZMA2InputStream} directly. |
| */ |
| public static final int DICT_SIZE_MAX = 768 << 20; |
| |
| /** |
| * The default dictionary size is 8 MiB. |
| */ |
| public static final int DICT_SIZE_DEFAULT = 8 << 20; |
| |
| /** |
| * Maximum value for lc + lp is 4. |
| */ |
| public static final int LC_LP_MAX = 4; |
| |
| /** |
| * The default number of literal context bits is 3. |
| */ |
| public static final int LC_DEFAULT = 3; |
| |
| /** |
| * The default number of literal position bits is 0. |
| */ |
| public static final int LP_DEFAULT = 0; |
| |
| /** |
| * Maximum value for pb is 4. |
| */ |
| public static final int PB_MAX = 4; |
| |
| /** |
| * The default number of position bits is 2. |
| */ |
| public static final int PB_DEFAULT = 2; |
| |
| /** |
| * Compression mode: uncompressed. |
| * The data is wrapped into a LZMA2 stream without compression. |
| */ |
| public static final int MODE_UNCOMPRESSED = 0; |
| |
| /** |
| * Compression mode: fast. |
| * This is usually combined with a hash chain match finder. |
| */ |
| public static final int MODE_FAST = LZMAEncoder.MODE_FAST; |
| |
| /** |
| * Compression mode: normal. |
| * This is usually combined with a binary tree match finder. |
| */ |
| public static final int MODE_NORMAL = LZMAEncoder.MODE_NORMAL; |
| |
| /** |
| * Minimum value for <code>niceLen</code> is 8. |
| */ |
| public static final int NICE_LEN_MIN = 8; |
| |
| /** |
| * Maximum value for <code>niceLen</code> is 273. |
| */ |
| public static final int NICE_LEN_MAX = 273; |
| |
| /** |
| * Match finder: Hash Chain 2-3-4 |
| */ |
| public static final int MF_HC4 = LZEncoder.MF_HC4; |
| |
| /** |
| * Match finder: Binary tree 2-3-4 |
| */ |
| public static final int MF_BT4 = LZEncoder.MF_BT4; |
| |
| private static final int[] presetToDictSize = { |
| 1 << 18, 1 << 20, 1 << 21, 1 << 22, 1 << 22, |
| 1 << 23, 1 << 23, 1 << 24, 1 << 25, 1 << 26 }; |
| |
| private static final int[] presetToDepthLimit = { 4, 8, 24, 48 }; |
| |
| private int dictSize; |
| private byte[] presetDict = null; |
| private int lc; |
| private int lp; |
| private int pb; |
| private int mode; |
| private int niceLen; |
| private int mf; |
| private int depthLimit; |
| |
| /** |
| * Creates new LZMA2 options and sets them to the default values. |
| * This is equivalent to <code>LZMA2Options(PRESET_DEFAULT)</code>. |
| */ |
| public LZMA2Options() { |
| try { |
| setPreset(PRESET_DEFAULT); |
| } catch (UnsupportedOptionsException e) { |
| assert false; |
| throw new RuntimeException(); |
| } |
| } |
| |
| /** |
| * Creates new LZMA2 options and sets them to the given preset. |
| * |
| * @throws UnsupportedOptionsException |
| * <code>preset</code> is not supported |
| */ |
| public LZMA2Options(int preset) throws UnsupportedOptionsException { |
| setPreset(preset); |
| } |
| |
| /** |
| * Creates new LZMA2 options and sets them to the given custom values. |
| * |
| * @throws UnsupportedOptionsException |
| * unsupported options were specified |
| */ |
| public LZMA2Options(int dictSize, int lc, int lp, int pb, int mode, |
| int niceLen, int mf, int depthLimit) |
| throws UnsupportedOptionsException { |
| setDictSize(dictSize); |
| setLcLp(lc, lp); |
| setPb(pb); |
| setMode(mode); |
| setNiceLen(niceLen); |
| setMatchFinder(mf); |
| setDepthLimit(depthLimit); |
| } |
| |
| /** |
| * Sets the compression options to the given preset. |
| * <p> |
| * The presets 0-3 are fast presets with medium compression. |
| * The presets 4-6 are fairly slow presets with high compression. |
| * The default preset (<code>PRESET_DEFAULT</code>) is 6. |
| * <p> |
| * The presets 7-9 are like the preset 6 but use bigger dictionaries |
| * and have higher compressor and decompressor memory requirements. |
| * Unless the uncompressed size of the file exceeds 8 MiB, |
| * 16 MiB, or 32 MiB, it is waste of memory to use the |
| * presets 7, 8, or 9, respectively. |
| * |
| * @throws UnsupportedOptionsException |
| * <code>preset</code> is not supported |
| */ |
| public void setPreset(int preset) throws UnsupportedOptionsException { |
| if (preset < 0 || preset > 9) |
| throw new UnsupportedOptionsException( |
| "Unsupported preset: " + preset); |
| |
| lc = LC_DEFAULT; |
| lp = LP_DEFAULT; |
| pb = PB_DEFAULT; |
| dictSize = presetToDictSize[preset]; |
| |
| if (preset <= 3) { |
| mode = MODE_FAST; |
| mf = MF_HC4; |
| niceLen = preset <= 1 ? 128 : NICE_LEN_MAX; |
| depthLimit = presetToDepthLimit[preset]; |
| } else { |
| mode = MODE_NORMAL; |
| mf = MF_BT4; |
| niceLen = (preset == 4) ? 16 : (preset == 5) ? 32 : 64; |
| depthLimit = 0; |
| } |
| } |
| |
| /** |
| * Sets the dictionary size in bytes. |
| * <p> |
| * The dictionary (or history buffer) holds the most recently seen |
| * uncompressed data. Bigger dictionary usually means better compression. |
| * However, using a dictioanary bigger than the size of the uncompressed |
| * data is waste of memory. |
| * <p> |
| * Any value in the range [DICT_SIZE_MIN, DICT_SIZE_MAX] is valid, |
| * but sizes of 2^n and 2^n + 2^(n-1) bytes are somewhat |
| * recommended. |
| * |
| * @throws UnsupportedOptionsException |
| * <code>dictSize</code> is not supported |
| */ |
| public void setDictSize(int dictSize) throws UnsupportedOptionsException { |
| if (dictSize < DICT_SIZE_MIN) |
| throw new UnsupportedOptionsException( |
| "LZMA2 dictionary size must be at least 4 KiB: " |
| + dictSize + " B"); |
| |
| if (dictSize > DICT_SIZE_MAX) |
| throw new UnsupportedOptionsException( |
| "LZMA2 dictionary size must not exceed " |
| + (DICT_SIZE_MAX >> 20) + " MiB: " + dictSize + " B"); |
| |
| this.dictSize = dictSize; |
| } |
| |
| /** |
| * Gets the dictionary size in bytes. |
| */ |
| public int getDictSize() { |
| return dictSize; |
| } |
| |
| /** |
| * Sets a preset dictionary. Use null to disable the use of |
| * a preset dictionary. By default there is no preset dictionary. |
| * <p> |
| * <b>The .xz format doesn't support a preset dictionary for now. |
| * Do not set a preset dictionary unless you use raw LZMA2.</b> |
| * <p> |
| * Preset dictionary can be useful when compressing many similar, |
| * relatively small chunks of data independently from each other. |
| * A preset dictionary should contain typical strings that occur in |
| * the files being compressed. The most probable strings should be |
| * near the end of the preset dictionary. The preset dictionary used |
| * for compression is also needed for decompression. |
| */ |
| public void setPresetDict(byte[] presetDict) { |
| this.presetDict = presetDict; |
| } |
| |
| /** |
| * Gets the preset dictionary. |
| */ |
| public byte[] getPresetDict() { |
| return presetDict; |
| } |
| |
| /** |
| * Sets the number of literal context bits and literal position bits. |
| * <p> |
| * The sum of <code>lc</code> and <code>lp</code> is limited to 4. |
| * Trying to exceed it will throw an exception. This function lets |
| * you change both at the same time. |
| * |
| * @throws UnsupportedOptionsException |
| * <code>lc</code> and <code>lp</code> |
| * are invalid |
| */ |
| public void setLcLp(int lc, int lp) throws UnsupportedOptionsException { |
| if (lc < 0 || lp < 0 || lc > LC_LP_MAX || lp > LC_LP_MAX |
| || lc + lp > LC_LP_MAX) |
| throw new UnsupportedOptionsException( |
| "lc + lp must not exceed " + LC_LP_MAX + ": " |
| + lc + " + " + lp); |
| |
| this.lc = lc; |
| this.lp = lp; |
| } |
| |
| /** |
| * Sets the number of literal context bits. |
| * <p> |
| * All bytes that cannot be encoded as matches are encoded as literals. |
| * That is, literals are simply 8-bit bytes that are encoded one at |
| * a time. |
| * <p> |
| * The literal coding makes an assumption that the highest <code>lc</code> |
| * bits of the previous uncompressed byte correlate with the next byte. |
| * For example, in typical English text, an upper-case letter is often |
| * followed by a lower-case letter, and a lower-case letter is usually |
| * followed by another lower-case letter. In the US-ASCII character set, |
| * the highest three bits are 010 for upper-case letters and 011 for |
| * lower-case letters. When <code>lc</code> is at least 3, the literal |
| * coding can take advantage of this property in the uncompressed data. |
| * <p> |
| * The default value (3) is usually good. If you want maximum compression, |
| * try <code>setLc(4)</code>. Sometimes it helps a little, and sometimes it |
| * makes compression worse. If it makes it worse, test for example |
| * <code>setLc(2)</code> too. |
| * |
| * @throws UnsupportedOptionsException |
| * <code>lc</code> is invalid, or the sum |
| * of <code>lc</code> and <code>lp</code> |
| * exceed LC_LP_MAX |
| */ |
| public void setLc(int lc) throws UnsupportedOptionsException { |
| setLcLp(lc, lp); |
| } |
| |
| /** |
| * Sets the number of literal position bits. |
| * <p> |
| * This affets what kind of alignment in the uncompressed data is |
| * assumed when encoding literals. See {@link #setPb(int) setPb} for |
| * more information about alignment. |
| * |
| * @throws UnsupportedOptionsException |
| * <code>lp</code> is invalid, or the sum |
| * of <code>lc</code> and <code>lp</code> |
| * exceed LC_LP_MAX |
| */ |
| public void setLp(int lp) throws UnsupportedOptionsException { |
| setLcLp(lc, lp); |
| } |
| |
| /** |
| * Gets the number of literal context bits. |
| */ |
| public int getLc() { |
| return lc; |
| } |
| |
| /** |
| * Gets the number of literal position bits. |
| */ |
| public int getLp() { |
| return lp; |
| } |
| |
| /** |
| * Sets the number of position bits. |
| * <p> |
| * This affects what kind of alignment in the uncompressed data is |
| * assumed in general. The default (2) means four-byte alignment |
| * (2^<code>pb</code> = 2^2 = 4), which is often a good choice when |
| * there's no better guess. |
| * <p> |
| * When the alignment is known, setting the number of position bits |
| * accordingly may reduce the file size a little. For example with text |
| * files having one-byte alignment (US-ASCII, ISO-8859-*, UTF-8), using |
| * <code>setPb(0)</code> can improve compression slightly. For UTF-16 |
| * text, <code>setPb(1)</code> is a good choice. If the alignment is |
| * an odd number like 3 bytes, <code>setPb(0)</code> might be the best |
| * choice. |
| * <p> |
| * Even though the assumed alignment can be adjusted with |
| * <code>setPb</code> and <code>setLp</code>, LZMA2 still slightly favors |
| * 16-byte alignment. It might be worth taking into account when designing |
| * file formats that are likely to be often compressed with LZMA2. |
| * |
| * @throws UnsupportedOptionsException |
| * <code>pb</code> is invalid |
| */ |
| public void setPb(int pb) throws UnsupportedOptionsException { |
| if (pb < 0 || pb > PB_MAX) |
| throw new UnsupportedOptionsException( |
| "pb must not exceed " + PB_MAX + ": " + pb); |
| |
| this.pb = pb; |
| } |
| |
| /** |
| * Gets the number of position bits. |
| */ |
| public int getPb() { |
| return pb; |
| } |
| |
| /** |
| * Sets the compression mode. |
| * <p> |
| * This specifies the method to analyze the data produced by |
| * a match finder. The default is <code>MODE_FAST</code> for presets |
| * 0-3 and <code>MODE_NORMAL</code> for presets 4-9. |
| * <p> |
| * Usually <code>MODE_FAST</code> is used with Hash Chain match finders |
| * and <code>MODE_NORMAL</code> with Binary Tree match finders. This is |
| * also what the presets do. |
| * <p> |
| * The special mode <code>MODE_UNCOMPRESSED</code> doesn't try to |
| * compress the data at all (and doesn't use a match finder) and will |
| * simply wrap it in uncompressed LZMA2 chunks. |
| * |
| * @throws UnsupportedOptionsException |
| * <code>mode</code> is not supported |
| */ |
| public void setMode(int mode) throws UnsupportedOptionsException { |
| if (mode < MODE_UNCOMPRESSED || mode > MODE_NORMAL) |
| throw new UnsupportedOptionsException( |
| "Unsupported compression mode: " + mode); |
| |
| this.mode = mode; |
| } |
| |
| /** |
| * Gets the compression mode. |
| */ |
| public int getMode() { |
| return mode; |
| } |
| |
| /** |
| * Sets the nice length of matches. |
| * Once a match of at least <code>niceLen</code> bytes is found, |
| * the algorithm stops looking for better matches. Higher values tend |
| * to give better compression at the expense of speed. The default |
| * depends on the preset. |
| * |
| * @throws UnsupportedOptionsException |
| * <code>niceLen</code> is invalid |
| */ |
| public void setNiceLen(int niceLen) throws UnsupportedOptionsException { |
| if (niceLen < NICE_LEN_MIN) |
| throw new UnsupportedOptionsException( |
| "Minimum nice length of matches is " |
| + NICE_LEN_MIN + " bytes: " + niceLen); |
| |
| if (niceLen > NICE_LEN_MAX) |
| throw new UnsupportedOptionsException( |
| "Maximum nice length of matches is " + NICE_LEN_MAX |
| + ": " + niceLen); |
| |
| this.niceLen = niceLen; |
| } |
| |
| /** |
| * Gets the nice length of matches. |
| */ |
| public int getNiceLen() { |
| return niceLen; |
| } |
| |
| /** |
| * Sets the match finder type. |
| * <p> |
| * Match finder has a major effect on compression speed, memory usage, |
| * and compression ratio. Usually Hash Chain match finders are faster |
| * than Binary Tree match finders. The default depends on the preset: |
| * 0-3 use <code>MF_HC4</code> and 4-9 use <code>MF_BT4</code>. |
| * |
| * @throws UnsupportedOptionsException |
| * <code>mf</code> is not supported |
| */ |
| public void setMatchFinder(int mf) throws UnsupportedOptionsException { |
| if (mf != MF_HC4 && mf != MF_BT4) |
| throw new UnsupportedOptionsException( |
| "Unsupported match finder: " + mf); |
| |
| this.mf = mf; |
| } |
| |
| /** |
| * Gets the match finder type. |
| */ |
| public int getMatchFinder() { |
| return mf; |
| } |
| |
| /** |
| * Sets the match finder search depth limit. |
| * <p> |
| * The default is a special value of <code>0</code> which indicates that |
| * the depth limit should be automatically calculated by the selected |
| * match finder from the nice length of matches. |
| * <p> |
| * Reasonable depth limit for Hash Chain match finders is 4-100 and |
| * 16-1000 for Binary Tree match finders. Using very high values can |
| * make the compressor extremely slow with some files. Avoid settings |
| * higher than 1000 unless you are prepared to interrupt the compression |
| * in case it is taking far too long. |
| * |
| * @throws UnsupportedOptionsException |
| * <code>depthLimit</code> is invalid |
| */ |
| public void setDepthLimit(int depthLimit) |
| throws UnsupportedOptionsException { |
| if (depthLimit < 0) |
| throw new UnsupportedOptionsException( |
| "Depth limit cannot be negative: " + depthLimit); |
| |
| this.depthLimit = depthLimit; |
| } |
| |
| /** |
| * Gets the match finder search depth limit. |
| */ |
| public int getDepthLimit() { |
| return depthLimit; |
| } |
| |
| public int getEncoderMemoryUsage() { |
| return (mode == MODE_UNCOMPRESSED) |
| ? UncompressedLZMA2OutputStream.getMemoryUsage() |
| : LZMA2OutputStream.getMemoryUsage(this); |
| } |
| |
| public FinishableOutputStream getOutputStream(FinishableOutputStream out) { |
| if (mode == MODE_UNCOMPRESSED) |
| return new UncompressedLZMA2OutputStream(out); |
| |
| return new LZMA2OutputStream(out, this); |
| } |
| |
| /** |
| * Gets how much memory the LZMA2 decoder will need to decompress the data |
| * that was encoded with these options and stored in a .xz file. |
| * <p> |
| * The returned value may bigger than the value returned by a direct call |
| * to {@link LZMA2InputStream#getMemoryUsage(int)} if the dictionary size |
| * is not 2^n or 2^n + 2^(n-1) bytes. This is because the .xz |
| * headers store the dictionary size in such a format and other values |
| * are rounded up to the next such value. Such rounding is harmess except |
| * it might waste some memory if an unsual dictionary size is used. |
| * <p> |
| * If you use raw LZMA2 streams and unusual dictioanary size, call |
| * {@link LZMA2InputStream#getMemoryUsage} directly to get raw decoder |
| * memory requirements. |
| */ |
| public int getDecoderMemoryUsage() { |
| // Round the dictionary size up to the next 2^n or 2^n + 2^(n-1). |
| int d = dictSize - 1; |
| d |= d >>> 2; |
| d |= d >>> 3; |
| d |= d >>> 4; |
| d |= d >>> 8; |
| d |= d >>> 16; |
| return LZMA2InputStream.getMemoryUsage(d + 1); |
| } |
| |
| public InputStream getInputStream(InputStream in) throws IOException { |
| return new LZMA2InputStream(in, dictSize); |
| } |
| |
| FilterEncoder getFilterEncoder() { |
| return new LZMA2Encoder(this); |
| } |
| |
| public Object clone() { |
| try { |
| return super.clone(); |
| } catch (CloneNotSupportedException e) { |
| assert false; |
| throw new RuntimeException(); |
| } |
| } |
| } |