blob: d30badaa4163202e840e15a8f9d640bd84e66d7f [file] [log] [blame]
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package java.util.regex;
import java.util.ArrayList;
import java.util.List;
/**
* Used to make {@code String.split} fast (and to help {@code Pattern.split} too).
* @hide
*/
public class Splitter {
// The RI allows regular expressions beginning with ] or }, but that's probably a bug.
private static final String METACHARACTERS = "\\?*+[](){}^$.|";
private Splitter() {
}
/**
* Returns a result equivalent to {@code s.split(separator, limit)} if it's able
* to compute it more cheaply than ICU, or null if the caller should fall back to
* using ICU.
*/
public static String[] fastSplit(String re, String input, int limit) {
// Can we do it cheaply?
int len = re.length();
if (len == 0) {
return null;
}
char ch = re.charAt(0);
if (len == 1 && METACHARACTERS.indexOf(ch) == -1) {
// We're looking for a single non-metacharacter. Easy.
} else if (len == 2 && ch == '\\') {
// We're looking for a quoted character.
// Quoted metacharacters are effectively single non-metacharacters.
ch = re.charAt(1);
if (METACHARACTERS.indexOf(ch) == -1) {
return null;
}
} else {
return null;
}
// We can do this cheaply...
// Unlike Perl, which considers the result of splitting the empty string to be the empty
// array, Java returns an array containing the empty string.
if (input.isEmpty()) {
return new String[] { "" };
}
// Collect text preceding each occurrence of the separator, while there's enough space.
ArrayList<String> list = new ArrayList<String>();
int maxSize = limit <= 0 ? Integer.MAX_VALUE : limit;
int begin = 0;
int end;
while ((end = input.indexOf(ch, begin)) != -1 && list.size() + 1 < maxSize) {
list.add(input.substring(begin, end));
begin = end + 1;
}
return finishSplit(list, input, begin, maxSize, limit);
}
public static String[] split(Pattern pattern, String re, String input, int limit) {
String[] fastResult = fastSplit(re, input, limit);
if (fastResult != null) {
return fastResult;
}
// Unlike Perl, which considers the result of splitting the empty string to be the empty
// array, Java returns an array containing the empty string.
if (input.isEmpty()) {
return new String[] { "" };
}
// Collect text preceding each occurrence of the separator, while there's enough space.
ArrayList<String> list = new ArrayList<String>();
int maxSize = limit <= 0 ? Integer.MAX_VALUE : limit;
Matcher matcher = new Matcher(pattern, input);
int begin = 0;
while (matcher.find() && list.size() + 1 < maxSize) {
list.add(input.substring(begin, matcher.start()));
begin = matcher.end();
}
return finishSplit(list, input, begin, maxSize, limit);
}
private static String[] finishSplit(List<String> list, String input, int begin, int maxSize, int limit) {
// Add trailing text.
if (begin < input.length()) {
list.add(input.substring(begin));
} else if (limit != 0) { // No point adding the empty string if limit == 0, just to remove it below.
list.add("");
}
// Remove all trailing empty matches in the limit == 0 case.
if (limit == 0) {
int i = list.size() - 1;
while (i >= 0 && list.get(i).isEmpty()) {
list.remove(i);
i--;
}
}
// Convert to an array.
return list.toArray(new String[list.size()]);
}
}