- 数据结构与算法
- DSA - 首页
- DSA - 概述
- DSA - 环境搭建
- DSA - 算法基础
- DSA - 渐进分析
- 数据结构
- DSA - 数据结构基础
- DSA - 数据结构和类型
- DSA - 数组数据结构
- 链表
- DSA - 链表数据结构
- DSA - 双向链表数据结构
- DSA - 循环链表数据结构
- 栈与队列
- DSA - 栈数据结构
- DSA - 表达式解析
- DSA - 队列数据结构
- 搜索算法
- DSA - 搜索算法
- DSA - 线性搜索算法
- DSA - 二分搜索算法
- DSA - 插值搜索
- DSA - 跳跃搜索算法
- DSA - 指数搜索
- DSA - 斐波那契搜索
- DSA - 子列表搜索
- DSA - 哈希表
- 排序算法
- DSA - 排序算法
- DSA - 冒泡排序算法
- DSA - 插入排序算法
- DSA - 选择排序算法
- DSA - 归并排序算法
- DSA - 希尔排序算法
- DSA - 堆排序
- DSA - 桶排序算法
- DSA - 计数排序算法
- DSA - 基数排序算法
- DSA - 快速排序算法
- 图数据结构
- DSA - 图数据结构
- DSA - 深度优先遍历
- DSA - 广度优先遍历
- DSA - 生成树
- 树数据结构
- DSA - 树数据结构
- DSA - 树的遍历
- DSA - 二叉搜索树
- DSA - AVL 树
- DSA - 红黑树
- DSA - B 树
- DSA - B+ 树
- DSA - 伸展树
- DSA - 字典树
- DSA - 堆数据结构
- 递归
- DSA - 递归算法
- DSA - 使用递归实现汉诺塔
- DSA - 使用递归实现斐波那契数列
- 分治法
- DSA - 分治法
- DSA - 最大最小问题
- DSA - Strassen 矩阵乘法
- DSA - Karatsuba 算法
- 贪心算法
- DSA - 贪心算法
- DSA - 旅行商问题(贪心算法)
- DSA - Prim 最小生成树
- DSA - Kruskal 最小生成树
- DSA - Dijkstra 最短路径算法
- DSA - 地图着色算法
- DSA - 分数背包问题
- DSA - 带截止日期的作业排序
- DSA - 最优合并模式算法
- 动态规划
- DSA - 动态规划
- DSA - 矩阵链乘法
- DSA - Floyd-Warshall 算法
- DSA - 0-1 背包问题
- DSA - 最长公共子序列算法
- DSA - 旅行商问题(动态规划)
- 近似算法
- DSA - 近似算法
- DSA - 顶点覆盖算法
- DSA - 集合覆盖问题
- DSA - 旅行商问题(近似算法)
- 随机化算法
- DSA - 随机化算法
- DSA - 随机化快速排序算法
- DSA - Karger 最小割算法
- DSA - Fisher-Yates 洗牌算法
- DSA 有用资源
- DSA - 问答
- DSA - 快速指南
- DSA - 有用资源
- DSA - 讨论
Kasai 算法
Kasai 算法 用于根据给定的后缀数组和文本构建最长公共前缀 (LCP) 数组。构建 LCP 数组后,可以有效地在给定文本中搜索模式。我们已经讨论了几种可以有效解决模式匹配问题的算法,包括 KMP 算法、Boyer-Moore 算法和 Rabin-Karp 算法。在本教程中,我们将探讨 Kasai 算法。
Kasai 算法的工作原理?
要理解Kasai 算法,我们首先需要学习该算法的两个核心概念:
后缀数组 - 这是一个数组,按字典顺序存储给定文本中所有后缀的起始索引。
LCP 数组 - 顾名思义,两个字符串的最长公共前缀 (简称 LCP) 是最长的既是两个字符串前缀的字符串。
Kasai 算法基于以下观察结果:
如果从位置i 和j 开始的两个后缀的 LCP 为k,则从i+1 和j+1 开始的后缀的 LCP 至少为k-1,除非其中一个是后缀数组中的最后一个后缀。这是因为在移除第一个字符后,后缀中字符的相对顺序保持不变,除非它们到达文本的结尾。因此,我们可以利用此属性在后缀数组的线性扫描中计算 LCP 值,从第一个后缀开始,并将当前 LCP 值保存在变量k 中。
每当我们移动到下一个后缀对时,我们将k减 1,然后只要位置i+k 和j+k 处的字符匹配,就递增k。为了找到下一个后缀对,我们使用一个反向数组,它将每个后缀索引映射到其在后缀数组中的位置。
让我们考虑 Kasai 算法的输入输出场景:
Input: string: "AABAAABCEDBABCDDEBC" Output: Suffix Array: 0 1 9 3 8 2 14 10 4 11 5 15 7 12 13 6 Common Prefix Array: 1 2 3 0 4 1 2 2 0 1 1 1 1 0 1 0
示例
以下示例在不同的编程语言中实际演示了 Kasai 算法。
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
// Defining a structure to represent suffix
struct suffixes {
int index;
int rank[2];
};
// function to compare two suffixes
int compare(const void* a, const void* b) {
struct suffixes* suf1 = (struct suffixes*)a;
struct suffixes* suf2 = (struct suffixes*)b;
// If first rank is same
if(suf1->rank[0] == suf2->rank[0]) {
// Compare second rank
return (suf1->rank[1] < suf2->rank[1]) ? -1 : 1;
}else {
return (suf1->rank[0] < suf2->rank[0]) ? -1 : 1;
}
}
// function to build suffix array
int* createSuffArray(char* orgnlString, int n) {
struct suffixes suffArray[n];
for (int i = 0; i < n; i++) {
suffArray[i].index = i;
// Rank based on character itself
suffArray[i].rank[0] = orgnlString[i] - 'a';
// Next rank is next character
suffArray[i].rank[1] = ((i+1)<n)?(orgnlString[i+1]-'a'):-1;
}
// Sorting the suffixes
qsort(suffArray, n, sizeof(struct suffixes), compare);
int index[n];
for (int k = 4; k < 2*n; k = k*2) {
int currRank = 0;
int prevRank = suffArray[0].rank[0];
suffArray[0].rank[0] = currRank;
index[suffArray[0].index] = 0;
// to assign rank and index values to first suffix
for (int i = 1; i < n; i++) {
if (suffArray[i].rank[0] == prevRank && suffArray[i].rank[1] == suffArray[i-1].rank[1]) {
prevRank = suffArray[i].rank[0];
// If same as previous rank, assign the same new rank
suffArray[i].rank[0] = currRank;
} else{
prevRank = suffArray[i].rank[0];
// increment rank and assign
suffArray[i].rank[0] = ++currRank;
}
index[suffArray[i].index] = i;
}
for (int i = 0; i < n; i++) {
int nextIndex = suffArray[i].index + k/2;
suffArray[i].rank[1] = (nextIndex < n)? suffArray[index[nextIndex]].rank[0]: -1;
}
qsort(suffArray, n, sizeof(struct suffixes), compare);
}
// to store indexes of all sorted suffixes
int* suffixVector = (int*)malloc(n * sizeof(int));
for (int i = 0; i < n; i++)
suffixVector[i] = suffArray[i].index;
return suffixVector;
}
// applying Kasai's algorithm to build LCP array
int* kasaiAlgorithm(char* orgnlString, int* suffixVector, int n) {
// To store lcp array
int* longPrefix = (int*)malloc(n * sizeof(int));
// To store inverse of suffix array elements
int* suffixInverse = (int*)malloc(n * sizeof(int));
// to fill values in suffixInverse[] array
for (int i=0; i < n; i++)
suffixInverse[suffixVector[i]] = i;
int k = 0;
for (int i=0; i<n; i++) {
if (suffixInverse[i] == n-1) {
k = 0;
continue;
}
int j = suffixVector[suffixInverse[i]+1];
while (i+k<n && j+k<n && orgnlString[i+k]==orgnlString[j+k])
k++;
longPrefix[suffixInverse[i]] = k;
if (k>0)
k--;
}
return longPrefix;
}
void displayArray(int* vec, int n) {
for (int i = 0; i < n; i++)
printf("%d ", vec[i]);
printf("\n");
}
int main() {
char orgnlString[] = "AAABCAEAAABCBDDAAAABC";
int n = strlen(orgnlString);
int* suffArray = createSuffArray(orgnlString, n);
printf("Suffix Array is: \n");
displayArray(suffArray, n);
// calling function to build LCP array
int* prefixCommn = kasaiAlgorithm(orgnlString, suffArray, n);
// Print the LCP array
printf("Common Prefix Array is: \n");
displayArray(prefixCommn, n);
return 0;
}
#include<iostream>
#include<vector>
#include<algorithm>
using namespace std;
// Defining a structure to represent suffix
struct suffixes {
int index;
int rank[2];
};
// function to compare two suffixes
bool compare(suffixes suf1, suffixes suf2) {
// If first rank is same
if(suf1.rank[0] == suf2.rank[0]) {
// Compare second rank
if(suf1.rank[1] < suf2.rank[1])
return true;
else
return false;
}else {
if(suf1.rank[0] < suf2.rank[0])
return true;
else
return false;
}
}
// function to build suffix array
vector<int> createSuffArray(string orgnlString) {
int n = orgnlString.size();
suffixes suffArray[n];
for (int i = 0; i < n; i++) {
suffArray[i].index = i;
// Rank based on character itself
suffArray[i].rank[0] = orgnlString[i] - 'a';
// Next rank is next character
suffArray[i].rank[1] = ((i+1)<n)?(orgnlString[i+1]-'a'):-1;
}
// Sorting the suffixes
sort(suffArray, suffArray+n, compare);
int index[n];
for (int k = 4; k < 2*n; k = k*2) {
int currRank = 0;
int prevRank = suffArray[0].rank[0];
suffArray[0].rank[0] = currRank;
index[suffArray[0].index] = 0;
// to assign rank and index values to first suffix
for (int i = 1; i < n; i++) {
if (suffArray[i].rank[0] == prevRank && suffArray[i].rank[1] == suffArray[i-1].rank[1]) {
prevRank = suffArray[i].rank[0];
// If same as previous rank, assign the same new rank
suffArray[i].rank[0] = currRank;
} else{
prevRank = suffArray[i].rank[0];
// increment rank and assign
suffArray[i].rank[0] = ++currRank;
}
index[suffArray[i].index] = i;
}
for (int i = 0; i < n; i++) {
int nextIndex = suffArray[i].index + k/2;
suffArray[i].rank[1] = (nextIndex < n)? suffArray[index[nextIndex]].rank[0]: -1;
}
sort(suffArray, suffArray+n, compare);
}
// to store indexes of all sorted suffixes
vector<int>suffixVector;
for (int i = 0; i < n; i++)
suffixVector.push_back(suffArray[i].index);
return suffixVector;
}
// applying Kasai's algorithm to build LCP array
vector<int> kasaiAlgorithm(string orgnlString, vector<int> suffixVector) {
int n = suffixVector.size();
// To store lcp array
vector<int> longPrefix(n, 0);
// To store inverse of suffix array elements
vector<int> suffixInverse(n, 0);
// to fill values in suffixInverse[] array
for (int i=0; i < n; i++)
suffixInverse[suffixVector[i]] = i;
int k = 0;
for (int i=0; i<n; i++) {
if (suffixInverse[i] == n-1) {
k = 0;
continue;
}
int j = suffixVector[suffixInverse[i]+1];
while (i+k<n && j+k<n && orgnlString[i+k]==orgnlString[j+k])
k++;
longPrefix[suffixInverse[i]] = k;
if (k>0)
k--;
}
return longPrefix;
}
void displayArray(vector<int> vec) {
vector<int>::iterator it;
for (it = vec.begin(); it < vec.end() ; it++)
cout << *it << " ";
cout << endl;
}
int main() {
string orgnlString = "AAABCAEAAABCBDDAAAABC";
vector<int>suffArray = createSuffArray(orgnlString);
int n = suffArray.size();
cout<< "Suffix Array is: "<<endl;
displayArray(suffArray);
// calling function to build LCP array
vector<int>prefixCommn = kasaiAlgorithm(orgnlString, suffArray);
// Print the LCP array
cout<< "Common Prefix Array is: "<<endl;
displayArray(prefixCommn);
}
import java.util.Arrays;
public class Main {
// Defining a class to represent suffix
static class suffixes {
int index;
int[] rank = new int[2];
}
// method to compare two suffixes
static int compare(suffixes suf1, suffixes suf2) {
// If first rank is same
if (suf1.rank[0] == suf2.rank[0]) {
// Compare second rank
if (suf1.rank[1] < suf2.rank[1])
return -1;
else
return 1;
} else {
if (suf1.rank[0] < suf2.rank[0])
return -1;
else
return 1;
}
}
// method to build suffix array
static int[] createSuffArray(String orgnlString) {
int n = orgnlString.length();
suffixes[] suffArray = new suffixes[n];
for (int i = 0; i < n; i++) {
suffArray[i] = new suffixes();
suffArray[i].index = i;
// Rank based on character itself
suffArray[i].rank[0] = orgnlString.charAt(i) - 'a';
// Next rank is next character
suffArray[i].rank[1] = ((i + 1) < n) ? (orgnlString.charAt(i + 1) - 'a') : -1;
}
// Sorting the suffixes
Arrays.sort(suffArray, Main::compare);
int[] index = new int[n];
for (int k = 4; k < 2 * n; k = k * 2) {
int currRank = 0;
int prevRank = suffArray[0].rank[0];
suffArray[0].rank[0] = currRank;
index[suffArray[0].index] = 0;
// to assign rank and index values to first suffix
for (int i = 1; i < n; i++) {
if (suffArray[i].rank[0] == prevRank && suffArray[i].rank[1] == suffArray[i - 1].rank[1]) {
prevRank = suffArray[i].rank[0];
// If same as previous rank, assign the same new rank
suffArray[i].rank[0] = currRank;
} else {
prevRank = suffArray[i].rank[0];
// increment rank and assign
suffArray[i].rank[0] = ++currRank;
}
index[suffArray[i].index] = i;
}
for (int i = 0; i < n; i++) {
int nextIndex = suffArray[i].index + k / 2;
suffArray[i].rank[1] = (nextIndex < n) ? suffArray[index[nextIndex]].rank[0] : -1;
}
Arrays.sort(suffArray, Main::compare);
}
// to store indexes of all sorted suffixes
int[] suffixVector = new int[n];
for (int i = 0; i < n; i++)
suffixVector[i] = suffArray[i].index;
return suffixVector;
}
// applying Kasai's algorithm to build LCP array
static int[] kasaiAlgorithm(String orgnlString, int[] suffixVector) {
int n = suffixVector.length;
// To store lcp array
int[] longPrefix = new int[n];
// To store inverse of suffix array elements
int[] suffixInverse = new int[n];
// to fill values in suffixInverse[] array
for (int i = 0; i < n; i++)
suffixInverse[suffixVector[i]] = i;
int k = 0;
for (int i = 0; i < n; i++) {
if (suffixInverse[i] == n - 1) {
k = 0;
continue;
}
int j = suffixVector[suffixInverse[i] + 1];
while (i + k < n && j + k < n && orgnlString.charAt(i + k) == orgnlString.charAt(j + k))
k++;
longPrefix[suffixInverse[i]] = k;
if (k > 0)
k--;
}
return longPrefix;
}
static void displayArray(int[] vec) {
for (int i : vec)
System.out.print(i + " ");
System.out.println();
}
public static void main(String[] args) {
String orgnlString = "AAABCAEAAABCBDDAAAABC";
int[] suffArray = createSuffArray(orgnlString);
System.out.println("Suffix Array is: ");
displayArray(suffArray);
// calling method to build LCP array
int[] prefixCommn = kasaiAlgorithm(orgnlString, suffArray);
// Print the LCP array
System.out.println("Common Prefix Array is: ");
displayArray(prefixCommn);
}
}
# Defining a class to represent suffix
class Suffix:
def __init__(self):
self.index = 0
self.rank = [0, 0]
# function to compare two suffixes
def compare(a, b):
if a.rank[0] == b.rank[0]:
if a.rank[1] < b.rank[1]:
return -1
else:
return 1
else:
if a.rank[0] < b.rank[0]:
return -1
else:
return 1
# function to build suffix array
def createSuffArray(orgnlString):
n = len(orgnlString)
suffArray = [Suffix() for _ in range(n)]
for i in range(n):
suffArray[i].index = i
suffArray[i].rank[0] = ord(orgnlString[i]) - ord('a')
suffArray[i].rank[1] = ord(orgnlString[i + 1]) - ord('a') if ((i + 1) < n) else -1
suffArray = sorted(suffArray, key=lambda x: (x.rank[0], x.rank[1]))
ind = [0]*n
k = 4
while k < 2*n:
rank = 0
prev_rank = suffArray[0].rank[0]
suffArray[0].rank[0] = rank
ind[suffArray[0].index] = 0
for i in range(1, n):
if suffArray[i].rank[0] == prev_rank and suffArray[i].rank[1] == suffArray[i - 1].rank[1]:
prev_rank = suffArray[i].rank[0]
suffArray[i].rank[0] = rank
else:
prev_rank = suffArray[i].rank[0]
rank += 1
suffArray[i].rank[0] = rank
ind[suffArray[i].index] = i
for i in range(n):
nextIndex = suffArray[i].index + k//2
suffArray[i].rank[1] = suffArray[ind[nextIndex]].rank[0] if (nextIndex < n) else -1
suffArray = sorted(suffArray, key=lambda x: (x.rank[0], x.rank[1]))
k *= 2
suffixVector = [0]*n
for i in range(n):
suffixVector[i] = suffArray[i].index
return suffixVector
# applying Kasai's algorithm to build LCP array
def kasaiAlgorithm(orgnlString, suffixVector):
n = len(suffixVector)
longPrefix = [0]*n
suffixInverse = [0]*n
for i in range(n):
suffixInverse[suffixVector[i]] = i
k = 0
for i in range(n):
if suffixInverse[i] == n - 1:
k = 0
continue
j = suffixVector[suffixInverse[i] + 1]
while i + k < n and j + k < n and orgnlString[i + k] == orgnlString[j + k]:
k += 1
longPrefix[suffixInverse[i]] = k
if k > 0:
k -= 1
return longPrefix
# Function to print an array
def displayArray(vec):
for i in vec:
print(i, end=' ')
print()
def main():
orgnlString = "AAABCAEAAABCBDDAAAABC"
suffArray = createSuffArray(orgnlString)
print("Suffix Array is: ")
displayArray(suffArray)
prefixCommn = kasaiAlgorithm(orgnlString, suffArray)
print("Common Prefix Array is: ")
displayArray(prefixCommn)
if __name__ == "__main__":
main()
输出
Suffix Array is: 15 0 7 16 17 1 8 2 9 18 5 19 3 10 12 4 11 20 14 13 6 Common Prefix Array is: 3 5 5 2 4 4 4 3 3 3 0 2 2 2 0 1 1 1 1 0 0
广告