- 数据结构与算法
- DSA - 首页
- DSA - 概述
- DSA - 环境搭建
- DSA - 算法基础
- DSA - 渐进分析
- 数据结构
- DSA - 数据结构基础
- DSA - 数据结构和类型
- DSA - 数组数据结构
- 链表
- DSA - 链表数据结构
- DSA - 双向链表数据结构
- DSA - 循环链表数据结构
- 栈与队列
- DSA - 栈数据结构
- DSA - 表达式解析
- DSA - 队列数据结构
- 搜索算法
- DSA - 搜索算法
- DSA - 线性搜索算法
- DSA - 二分搜索算法
- DSA - 插值搜索
- DSA - 跳跃搜索算法
- DSA - 指数搜索
- DSA - 斐波那契搜索
- DSA - 子列表搜索
- DSA - 哈希表
- 排序算法
- DSA - 排序算法
- DSA - 冒泡排序算法
- DSA - 插入排序算法
- DSA - 选择排序算法
- DSA - 归并排序算法
- DSA - 希尔排序算法
- DSA - 堆排序
- DSA - 桶排序算法
- DSA - 计数排序算法
- DSA - 基数排序算法
- DSA - 快速排序算法
- 图数据结构
- DSA - 图数据结构
- DSA - 深度优先遍历
- DSA - 广度优先遍历
- DSA - 生成树
- 树数据结构
- DSA - 树数据结构
- DSA - 树的遍历
- DSA - 二叉搜索树
- DSA - AVL 树
- DSA - 红黑树
- DSA - B 树
- DSA - B+ 树
- DSA - 伸展树
- DSA - 字典树
- DSA - 堆数据结构
- 递归
- DSA - 递归算法
- DSA - 使用递归实现汉诺塔
- DSA - 使用递归实现斐波那契数列
- 分治法
- DSA - 分治法
- DSA - 最大最小问题
- DSA - Strassen 矩阵乘法
- DSA - Karatsuba 算法
- 贪心算法
- DSA - 贪心算法
- DSA - 旅行商问题(贪心算法)
- DSA - Prim 最小生成树
- DSA - Kruskal 最小生成树
- DSA - Dijkstra 最短路径算法
- DSA - 地图着色算法
- DSA - 分数背包问题
- DSA - 带截止日期的作业排序
- DSA - 最佳合并模式算法
- 动态规划
- DSA - 动态规划
- DSA - 矩阵链乘法
- DSA - Floyd-Warshall 算法
- DSA - 0-1 背包问题
- DSA - 最长公共子序列算法
- DSA - 旅行商问题(动态规划)
- 近似算法
- DSA - 近似算法
- DSA - 顶点覆盖算法
- DSA - 集合覆盖问题
- DSA - 旅行商问题(近似算法)
- 随机化算法
- DSA - 随机化算法
- DSA - 随机化快速排序算法
- DSA - Karger 最小割算法
- DSA - Fisher-Yates 洗牌算法
- DSA 有用资源
- DSA - 问答
- DSA - 快速指南
- DSA - 有用资源
- DSA - 讨论
Knuth-Morris-Pratt 算法
用于模式匹配的 KMP 算法
KMP 算法用于解决模式匹配问题,模式匹配问题是在文本中查找给定模式的所有出现位置的任务。在查找多个模式时非常有用。例如,如果文本是“aabbaaccaabbaadde”,模式是“aabaa”,则模式在文本中出现了两次,分别位于索引 0 和 8 处。
此问题的朴素解决方案是从最左边的位置开始,向右移动,将模式与文本的每个可能的子字符串进行比较。这需要 O(n*m) 时间,其中 'n' 是文本的长度,'m' 是模式的长度。
当我们处理长文本文档时,蛮力法和朴素方法可能会导致冗余比较。为了避免这种冗余,Knuth、Morris 和 Pratt 开发了一种线性序列匹配算法,称为 KMP 模式匹配算法。它也称为 Knuth Morris Pratt 模式匹配算法。
KMP 算法如何工作?
KMP 算法从左到右开始搜索操作。它使用前缀函数来避免在搜索模式时进行不必要的比较。此函数存储迄今为止匹配的字符数,称为LPS 值。KMP 算法涉及以下步骤:
定义前缀函数。
滑动模式以进行比较。
如果所有字符都匹配,则表示已找到匹配项。
如果没有匹配,则使用前缀函数跳过不必要的比较。如果与不匹配字符之前的字符的 LPS 值为“0”,则从模式的索引 0 开始与文本中的下一个字符进行比较。但是,如果 LPS 值大于“0”,则从等于之前不匹配字符的 LPS 值的索引值开始比较。
KMP 算法需要 O(n + m) 时间和 O(m) 空间。它比朴素解决方案更快,因为它跳过了冗余比较,并且每个文本字符最多只比较一次。
让我们通过一个示例了解模式匹配问题的输入输出场景:
Input: main String: "AAAABCAAAABCBAAAABC" pattern: "AAABC" Output: Pattern found at position: 1 Pattern found at position: 7 Pattern found at position: 14
示例
以下示例从实践上说明了用于模式匹配的 KMP 算法。
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
// function to find prefix
void prefixSearch(char* pat, int m, int* pps) {
int length = 0;
// array to store prefix
pps[0] = 0;
int i = 1;
while(i < m) {
// to check if the current character matches the previous character
if(pat[i] == pat[length]) {
// increment the length
length++;
// store the length in the prefix array
pps[i] = length;
}else {
if(length != 0) {
// to update length of previous prefix length
length = pps[length - 1];
i--;
} else
// if the length is 0, store 0 in the prefix array
pps[i] = 0;
}
i++; // incrementing i
}
}
// function to search for pattern
void patrnSearch(char* orgnString, char* patt, int m, int *locArray, int *loc) {
int n, i = 0, j = 0;
n = strlen(orgnString);
// array to store the prefix values
int* prefixArray = (int*)malloc(m * sizeof(int)); // allocate memory for the prefix array
// calling prefix function to fill the prefix array
prefixSearch(patt, m, prefixArray);
*loc = 0; // initialize the location index
while(i < n) {
// checking if main string character matches pattern string character
if(orgnString[i] == patt[j]) {
// increment both i and j
i++;
j++;
}
// if j and m are equal pattern is found
if(j == m) {
// store the location of the pattern
locArray[*loc] = i-j;
(*loc)++; // increment the location index
// update j to the previous prefix value
j = prefixArray[j-1];
// checking if i is less than n and the current characters do not match
}else if(i < n && patt[j] != orgnString[i]) {
if(j != 0)
// update j to the previous prefix value
j = prefixArray[j-1];
// if j is zero
else
i++; // increment i
}
}
free(prefixArray); // free the memory of the prefix array
}
int main() {
// declare the original text
char* orgnStr = "AAAABCAEAAABCBDDAAAABC";
// pattern to be found
char* patrn = "AAABC";
// get the size of the pattern
int m = strlen(patrn);
// array to store the locations of the pattern
int locationArray[strlen(orgnStr)];
// to store the number of locations
int index;
// calling pattern search function
patrnSearch(orgnStr, patrn, m, locationArray, &index);
// to loop through location array
for(int i = 0; i<index; i++) {
// print the location of the pattern
printf("Pattern found at location: %d\n", locationArray[i]);
}
}
#include<iostream>
using namespace std;
// function to find prefix
void prefixSearch(string pattern, int m, int storePrefx[]) {
int length = 0;
// array to store prefix
storePrefx[0] = 0;
int i = 1;
while(i < m) {
// to check if the current character matches the previous character
if(pattern[i] == pattern[length]) {
// increment the length
length++;
// store the length in the prefix array
storePrefx[i] = length;
}else {
if(length != 0) {
// to update length of previous prefix length
length = storePrefx[length - 1];
i--;
} else
// if the length is 0, store 0 in the prefix array
storePrefx[i] = 0;
}
i++; // incrementing i
}
}
// function to search for pattern
void patrnSearch(string orgnString, string patt, int *locArray, int &loc) {
int n, m, i = 0, j = 0;
n = orgnString.size();
m = patt.size();
// array to store the prefix values
int prefixArray[m];
// calling prefix function to fill the prefix array
prefixSearch(patt, m, prefixArray);
loc = 0; // initialize the location index
while(i < n) {
// checking if main string character matches pattern string character
if(orgnString[i] == patt[j]) {
// increment both i and j
i++;
j++;
}
// if j and m are equal pattern is found
if(j == m) {
// store the location of the pattern
locArray[loc] = i-j;
loc++; // increment the location index
// update j to the previous prefix value
j = prefixArray[j-1];
// checking if i is less than n and the current characters do not match
}else if(i < n && patt[j] != orgnString[i]) {
if(j != 0)
// update j to the previous prefix value
j = prefixArray[j-1];
// if j is zero
else
i++; // increment i
}
}
}
int main() {
// declare the original text
string orgnStr = "AAAABCAEAAABCBDDAAAABC";
// pattern to be found
string patrn = "AAABC";
// array to store the locations of the pattern
int locationArray[orgnStr.size()];
// to store the number of locations
int index;
// calling pattern search function
patrnSearch(orgnStr, patrn, locationArray, index);
// to loop through location array
for(int i = 0; i<index; i++) {
// print the location of the pattern
cout << "Pattern found at location: " <<locationArray[i] << endl;
}
}
import java.io.*;
// class to implement the KMP algorithm
public class KMPalgo {
// function to find prefix
public static void prefixSearch(String pat, int m, int[] storePrefx) {
int length = 0;
// array to store prefix
storePrefx[0] = 0;
int i = 1;
while (i < m) {
// to check if the current character matches the previous character
if (pat.charAt(i) == pat.charAt(length)) {
// increment the length
length++;
// store the length in the prefix array
storePrefx[i] = length;
} else {
if (length != 0) {
// to update length of previous prefix length
length = storePrefx[length - 1];
i--;
} else
// if the length is 0, store 0 in the prefix array
storePrefx[i] = 0;
}
i++; // incrementing i
}
}
// function to search for pattern
public static int patrnSearch(String orgnString, String patt, int[] locArray) {
int n, m, i = 0, j = 0;
n = orgnString.length();
m = patt.length();
// array to store the prefix values
int[] prefixArray = new int[m]; // allocate memory for the prefix array
// calling prefix function to fill the prefix array
prefixSearch(patt, m, prefixArray);
int loc = 0; // initialize the location index
while (i < n) {
// checking if main string character matches pattern string character
if (orgnString.charAt(i) == patt.charAt(j)) {
// increment both i and j
i++;
j++;
}
// if j and m are equal pattern is found
if (j == m) {
// store the location of the pattern
locArray[loc] = i - j;
loc++; // increment the location index
// update j to the previous prefix value
j = prefixArray[j - 1];
// checking if i is less than n and the current characters do not match
} else if (i < n && patt.charAt(j) != orgnString.charAt(i)) {
if (j != 0)
// update j to the previous prefix value
j = prefixArray[j - 1];
// if j is zero
else
i++; // increment i
}
}
return loc;
}
public static void main(String[] args) throws IOException {
// declare the original text
String orgnStr = "AAAABCAEAAABCBDDAAAABC";
// pattern to be found
String patrn = "AAABC";
// array to store the locations of the pattern
int[] locationArray = new int[orgnStr.length()];
// calling pattern search function
int index = patrnSearch(orgnStr, patrn, locationArray);
// to loop through location array
for (int i = 0; i < index; i++) {
// print the location of pattern
System.out.println("Pattern found at location: " + locationArray[i]);
}
}
}
# function to find prefix
def prefix_search(pattern, m, store_prefx):
length = 0
# array to store prefix
store_prefx[0] = 0
i = 1
while i < m:
# to check if the current character matches the previous character
if pattern[i] == pattern[length]:
# increment the length
length += 1
# store the length in the prefix array
store_prefx[i] = length
else:
if length != 0:
# to update length of previous prefix length
length = store_prefx[length - 1]
i -= 1
else:
# if the length is 0, store 0 in the prefix array
store_prefx[i] = 0
i += 1 # incrementing i
# function to search for pattern
def pattern_search(orgn_string, patt, loc_array):
n = len(orgn_string)
m = len(patt)
i = j = loc = 0
# array to store the prefix values
prefix_array = [0] * m
# calling prefix function to fill the prefix array
prefix_search(patt, m, prefix_array)
while i < n:
# checking if main string character matches pattern string character
if orgn_string[i] == patt[j]:
# increment both i and j
i += 1
j += 1
# if j and m are equal pattern is found
if j == m:
# store the location of the pattern
loc_array[loc] = i - j
loc += 1 # increment the location index
# update j to the previous prefix value
j = prefix_array[j - 1]
# checking if i is less than n and the current characters do not match
elif i < n and patt[j] != orgn_string[i]:
if j != 0:
# update j to the previous prefix value
j = prefix_array[j - 1]
else:
i += 1 # increment i
return loc
# main function
def main():
# declare the original text
orgn_str = "AAAABCAEAAABCBDDAAAABC"
# pattern to be found
patrn = "AAABC"
# array to store the locations of the pattern
location_array = [0] * len(orgn_str)
# calling pattern search function
index = pattern_search(orgn_str, patrn, location_array)
# to loop through location array
for i in range(index):
# print the location of the pattern
print("Pattern found at location:", location_array[i])
# call the main function
if __name__ == "__main__":
main()
输出
Pattern found at location: 1 Pattern found at location: 8 Pattern found at location: 17
广告