因为字符比较是从右往左比较的,所以第一层循环 needle.length + 1 <= i < haystack.length。
1 2 3 4 5 6 7 8 9 10 11 12 13
start=needle.length - 1 end=haystack.length - 1 + + | | | | v-------------------------------------------------------------------v-> 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ | t | h | i | s | | i | s | | a | | s | i | m | p | l | e | | e | x | a | m | p | l | e | +-------------------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ +---------------------------+ | e | x | a | m | p | l | e | +---+---+---+---+---+---+---+
第二层循环中i变量表示坏字符的位置、j表示搜索坏字符开始位置
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
i + | v 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ | t | h | i | s | | i | s | | a | | s | i | m | p | l | e | | e | x | a | m | p | l | e | +-------------------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ +---------------------------+ | e | x | a | m | p | l | e | +---+---+---+---+---+---+-+-+ ^ | + j
/** * Returns the index within this string of the first occurrence of the * specified substring. If it is not a substring, return -1. * * There is no Galil because it only generates one match. * * @param haystack The string to be scanned * @param needle The target string to search * @return The start index of the substring */ publicstaticintindexOf(char[] haystack, char[] needle) { if (needle.length == 0) { return0; } int charTable[] = makeCharTable(needle); int offsetTable[] = makeOffsetTable(needle); for (inti= needle.length - 1, j; i < haystack.length;) { for (j = needle.length - 1; needle[j] == haystack[i]; --i, --j) { if (j == 0) { return i; } } // i += needle.length - j; // For naive method i += Math.max(offsetTable[needle.length - 1 - j], charTable[haystack[i]]); } return -1; }
/** * Makes the jump table based on the mismatched character information. */ privatestaticint[] makeCharTable(char[] needle) { finalintALPHABET_SIZE= Character.MAX_VALUE + 1; // 65536 int[] table = newint[ALPHABET_SIZE]; for (inti=0; i < table.length; ++i) { table[i] = needle.length; } for (inti=0; i < needle.length - 2; ++i) { table[needle[i]] = needle.length - 1 - i; } return table; }
/** * Makes the jump table based on the scan offset which mismatch occurs. * (bad character rule). */ privatestaticint[] makeOffsetTable(char[] needle) { int[] table = newint[needle.length]; intlastPrefixPosition= needle.length; for (inti= needle.length; i > 0; --i) { if (isPrefix(needle, i)) { lastPrefixPosition = i; } table[needle.length - i] = lastPrefixPosition - i + needle.length; } for (inti=0; i < needle.length - 1; ++i) { intslen= suffixLength(needle, i); table[slen] = needle.length - 1 - i + slen; } return table; }
/** * Is needle[p:end] a prefix of needle? */ privatestaticbooleanisPrefix(char[] needle, int p) { for (inti= p, j = 0; i < needle.length; ++i, ++j) { if (needle[i] != needle[j]) { returnfalse; } } returntrue; }
/** * Returns the maximum length of the substring ends at p and is a suffix. * (good suffix rule) */ privatestaticintsuffixLength(char[] needle, int p) { intlen=0; for (inti= p, j = needle.length - 1; i >= 0 && needle[i] == needle[j]; --i, --j) { len += 1; } return len; }
e a b c b c f
+---+ +---+---+---+---+---+---+---+
a | 0 | | 1 | 0 | 0 | 0 | 0 | 0 | a
+-------+ +-----------------------+
b | 0 | 0 | | 2 | 0 | 1 | 0 | 0 | b
+-----------+ --------------------+
c | 0 | 0 | 0 | | 3 | 0 | 2 | 0 | c
+---------------+ ----------------+
d | 0 | 0 | 0 | 0 | | 0 | 0 | 0 | d
+-------------------+ +-----------+
e | 1 | 0 | 0 | 0 | 0 | | 0 | 0 | e
+---+---+---+---+---+-------+ +---+---+
e a b c b c f
+ e | +--+ e a b c b c f a | 1| +---+---+---+---+---+---+---+ +-----+ | 1 | 0 | 0 | 0 | 0 | 0 | a b | 0| 2| +-----------------------+ +--------+ | 2 | 0 | 1 | 0 | 0 | b c | 0| 0| 3| --------------------+ 翻 折 +-----------+ | 3 | 0 | 2 | 0 | c +------------> b | 0| 1| 0| 0| ----------------+ +--------------+ | 0 | 0 | 0 | d c | 0| 0| 2| 0| 0| +-----------+ +--------------+ | 0 | 0 | e f | 0| 0| 0| 0| 0| +---+---+ +--------------+ a b c d e
+ e | +--+ a | 1| +---+ +-----+ a | 0 | b | 0| 2| +-------+ +--------+ b | 0 | 0 | c | 0| 0| 3| +-----------+ +-----------+ c | 0 | 0 | 0 | b | 0| 1| 0| 0| +---------------+ +--------------+ d | 0 | 0 | 0 | 0 | c | 0| 0| 2| 0| 0| +-------------------+ +--------------+ e | 1 | 0 | 0 | 0 | 0 | f | 0| 0| 0| 0| 0| +---+---+---+---+---+------ +--------------+ e a b c b c f a b c d e