- 数据结构与算法
- DSA - 首页
- DSA - 概述
- DSA - 环境搭建
- DSA - 算法基础
- DSA - 渐进分析
- 数据结构
- DSA - 数据结构基础
- DSA - 数据结构和类型
- DSA - 数组数据结构
- 链表
- DSA - 链表数据结构
- DSA - 双向链表数据结构
- DSA - 循环链表数据结构
- 栈与队列
- DSA - 栈数据结构
- DSA - 表达式解析
- DSA - 队列数据结构
- 搜索算法
- DSA - 搜索算法
- DSA - 线性搜索算法
- DSA - 二分搜索算法
- DSA - 插值搜索
- DSA - 跳跃搜索算法
- DSA - 指数搜索
- DSA - 斐波那契搜索
- DSA - 子列表搜索
- DSA - 哈希表
- 排序算法
- DSA - 排序算法
- DSA - 冒泡排序算法
- DSA - 插入排序算法
- DSA - 选择排序算法
- DSA - 归并排序算法
- DSA - 希尔排序算法
- DSA - 堆排序
- DSA - 桶排序算法
- DSA - 计数排序算法
- DSA - 基数排序算法
- DSA - 快速排序算法
- 图数据结构
- DSA - 图数据结构
- DSA - 深度优先遍历
- DSA - 广度优先遍历
- DSA - 生成树
- 树数据结构
- DSA - 树数据结构
- DSA - 树的遍历
- DSA - 二叉搜索树
- DSA - AVL 树
- DSA - 红黑树
- DSA - B 树
- DSA - B+ 树
- DSA - 伸展树
- DSA - 字典树
- DSA - 堆数据结构
- 递归
- DSA - 递归算法
- DSA - 使用递归实现汉诺塔
- DSA - 使用递归实现斐波那契数列
- 分治法
- DSA - 分治法
- DSA - 最大最小问题
- DSA - Strassen 矩阵乘法
- DSA - Karatsuba 算法
- 贪心算法
- DSA - 贪心算法
- DSA - 旅行商问题(贪心算法)
- DSA - Prim 最小生成树
- DSA - Kruskal 最小生成树
- DSA - Dijkstra 最短路径算法
- DSA - 地图着色算法
- DSA - 分数背包问题
- DSA - 带截止日期的作业排序
- DSA - 最佳合并模式算法
- 动态规划
- DSA - 动态规划
- DSA - 矩阵链乘法
- DSA - Floyd-Warshall 算法
- DSA - 0-1 背包问题
- DSA - 最长公共子序列算法
- DSA - 旅行商问题(动态规划)
- 近似算法
- DSA - 近似算法
- DSA - 顶点覆盖算法
- DSA - 集合覆盖问题
- DSA - 旅行商问题(近似算法)
- 随机化算法
- DSA - 随机化算法
- DSA - 随机化快速排序算法
- DSA - Karger 最小割算法
- DSA - Fisher-Yates 洗牌算法
- DSA 有用资源
- DSA - 问答
- DSA - 快速指南
- DSA - 有用资源
- DSA - 讨论
Knuth-Morris-Pratt 算法
用于模式匹配的 KMP 算法
KMP 算法用于解决模式匹配问题,模式匹配问题是在文本中查找给定模式的所有出现位置的任务。在查找多个模式时非常有用。例如,如果文本是“aabbaaccaabbaadde”,模式是“aabaa”,则模式在文本中出现了两次,分别位于索引 0 和 8 处。
此问题的朴素解决方案是从最左边的位置开始,向右移动,将模式与文本的每个可能的子字符串进行比较。这需要 O(n*m) 时间,其中 'n' 是文本的长度,'m' 是模式的长度。
当我们处理长文本文档时,蛮力法和朴素方法可能会导致冗余比较。为了避免这种冗余,Knuth、Morris 和 Pratt 开发了一种线性序列匹配算法,称为 KMP 模式匹配算法。它也称为 Knuth Morris Pratt 模式匹配算法。
KMP 算法如何工作?
KMP 算法从左到右开始搜索操作。它使用前缀函数来避免在搜索模式时进行不必要的比较。此函数存储迄今为止匹配的字符数,称为LPS 值。KMP 算法涉及以下步骤:
定义前缀函数。
滑动模式以进行比较。
如果所有字符都匹配,则表示已找到匹配项。
如果没有匹配,则使用前缀函数跳过不必要的比较。如果与不匹配字符之前的字符的 LPS 值为“0”,则从模式的索引 0 开始与文本中的下一个字符进行比较。但是,如果 LPS 值大于“0”,则从等于之前不匹配字符的 LPS 值的索引值开始比较。
KMP 算法需要 O(n + m) 时间和 O(m) 空间。它比朴素解决方案更快,因为它跳过了冗余比较,并且每个文本字符最多只比较一次。
让我们通过一个示例了解模式匹配问题的输入输出场景:
Input: main String: "AAAABCAAAABCBAAAABC" pattern: "AAABC" Output: Pattern found at position: 1 Pattern found at position: 7 Pattern found at position: 14
示例
以下示例从实践上说明了用于模式匹配的 KMP 算法。
#include<stdio.h> #include<stdlib.h> #include<string.h> // function to find prefix void prefixSearch(char* pat, int m, int* pps) { int length = 0; // array to store prefix pps[0] = 0; int i = 1; while(i < m) { // to check if the current character matches the previous character if(pat[i] == pat[length]) { // increment the length length++; // store the length in the prefix array pps[i] = length; }else { if(length != 0) { // to update length of previous prefix length length = pps[length - 1]; i--; } else // if the length is 0, store 0 in the prefix array pps[i] = 0; } i++; // incrementing i } } // function to search for pattern void patrnSearch(char* orgnString, char* patt, int m, int *locArray, int *loc) { int n, i = 0, j = 0; n = strlen(orgnString); // array to store the prefix values int* prefixArray = (int*)malloc(m * sizeof(int)); // allocate memory for the prefix array // calling prefix function to fill the prefix array prefixSearch(patt, m, prefixArray); *loc = 0; // initialize the location index while(i < n) { // checking if main string character matches pattern string character if(orgnString[i] == patt[j]) { // increment both i and j i++; j++; } // if j and m are equal pattern is found if(j == m) { // store the location of the pattern locArray[*loc] = i-j; (*loc)++; // increment the location index // update j to the previous prefix value j = prefixArray[j-1]; // checking if i is less than n and the current characters do not match }else if(i < n && patt[j] != orgnString[i]) { if(j != 0) // update j to the previous prefix value j = prefixArray[j-1]; // if j is zero else i++; // increment i } } free(prefixArray); // free the memory of the prefix array } int main() { // declare the original text char* orgnStr = "AAAABCAEAAABCBDDAAAABC"; // pattern to be found char* patrn = "AAABC"; // get the size of the pattern int m = strlen(patrn); // array to store the locations of the pattern int locationArray[strlen(orgnStr)]; // to store the number of locations int index; // calling pattern search function patrnSearch(orgnStr, patrn, m, locationArray, &index); // to loop through location array for(int i = 0; i<index; i++) { // print the location of the pattern printf("Pattern found at location: %d\n", locationArray[i]); } }
#include<iostream> using namespace std; // function to find prefix void prefixSearch(string pattern, int m, int storePrefx[]) { int length = 0; // array to store prefix storePrefx[0] = 0; int i = 1; while(i < m) { // to check if the current character matches the previous character if(pattern[i] == pattern[length]) { // increment the length length++; // store the length in the prefix array storePrefx[i] = length; }else { if(length != 0) { // to update length of previous prefix length length = storePrefx[length - 1]; i--; } else // if the length is 0, store 0 in the prefix array storePrefx[i] = 0; } i++; // incrementing i } } // function to search for pattern void patrnSearch(string orgnString, string patt, int *locArray, int &loc) { int n, m, i = 0, j = 0; n = orgnString.size(); m = patt.size(); // array to store the prefix values int prefixArray[m]; // calling prefix function to fill the prefix array prefixSearch(patt, m, prefixArray); loc = 0; // initialize the location index while(i < n) { // checking if main string character matches pattern string character if(orgnString[i] == patt[j]) { // increment both i and j i++; j++; } // if j and m are equal pattern is found if(j == m) { // store the location of the pattern locArray[loc] = i-j; loc++; // increment the location index // update j to the previous prefix value j = prefixArray[j-1]; // checking if i is less than n and the current characters do not match }else if(i < n && patt[j] != orgnString[i]) { if(j != 0) // update j to the previous prefix value j = prefixArray[j-1]; // if j is zero else i++; // increment i } } } int main() { // declare the original text string orgnStr = "AAAABCAEAAABCBDDAAAABC"; // pattern to be found string patrn = "AAABC"; // array to store the locations of the pattern int locationArray[orgnStr.size()]; // to store the number of locations int index; // calling pattern search function patrnSearch(orgnStr, patrn, locationArray, index); // to loop through location array for(int i = 0; i<index; i++) { // print the location of the pattern cout << "Pattern found at location: " <<locationArray[i] << endl; } }
import java.io.*; // class to implement the KMP algorithm public class KMPalgo { // function to find prefix public static void prefixSearch(String pat, int m, int[] storePrefx) { int length = 0; // array to store prefix storePrefx[0] = 0; int i = 1; while (i < m) { // to check if the current character matches the previous character if (pat.charAt(i) == pat.charAt(length)) { // increment the length length++; // store the length in the prefix array storePrefx[i] = length; } else { if (length != 0) { // to update length of previous prefix length length = storePrefx[length - 1]; i--; } else // if the length is 0, store 0 in the prefix array storePrefx[i] = 0; } i++; // incrementing i } } // function to search for pattern public static int patrnSearch(String orgnString, String patt, int[] locArray) { int n, m, i = 0, j = 0; n = orgnString.length(); m = patt.length(); // array to store the prefix values int[] prefixArray = new int[m]; // allocate memory for the prefix array // calling prefix function to fill the prefix array prefixSearch(patt, m, prefixArray); int loc = 0; // initialize the location index while (i < n) { // checking if main string character matches pattern string character if (orgnString.charAt(i) == patt.charAt(j)) { // increment both i and j i++; j++; } // if j and m are equal pattern is found if (j == m) { // store the location of the pattern locArray[loc] = i - j; loc++; // increment the location index // update j to the previous prefix value j = prefixArray[j - 1]; // checking if i is less than n and the current characters do not match } else if (i < n && patt.charAt(j) != orgnString.charAt(i)) { if (j != 0) // update j to the previous prefix value j = prefixArray[j - 1]; // if j is zero else i++; // increment i } } return loc; } public static void main(String[] args) throws IOException { // declare the original text String orgnStr = "AAAABCAEAAABCBDDAAAABC"; // pattern to be found String patrn = "AAABC"; // array to store the locations of the pattern int[] locationArray = new int[orgnStr.length()]; // calling pattern search function int index = patrnSearch(orgnStr, patrn, locationArray); // to loop through location array for (int i = 0; i < index; i++) { // print the location of pattern System.out.println("Pattern found at location: " + locationArray[i]); } } }
# function to find prefix def prefix_search(pattern, m, store_prefx): length = 0 # array to store prefix store_prefx[0] = 0 i = 1 while i < m: # to check if the current character matches the previous character if pattern[i] == pattern[length]: # increment the length length += 1 # store the length in the prefix array store_prefx[i] = length else: if length != 0: # to update length of previous prefix length length = store_prefx[length - 1] i -= 1 else: # if the length is 0, store 0 in the prefix array store_prefx[i] = 0 i += 1 # incrementing i # function to search for pattern def pattern_search(orgn_string, patt, loc_array): n = len(orgn_string) m = len(patt) i = j = loc = 0 # array to store the prefix values prefix_array = [0] * m # calling prefix function to fill the prefix array prefix_search(patt, m, prefix_array) while i < n: # checking if main string character matches pattern string character if orgn_string[i] == patt[j]: # increment both i and j i += 1 j += 1 # if j and m are equal pattern is found if j == m: # store the location of the pattern loc_array[loc] = i - j loc += 1 # increment the location index # update j to the previous prefix value j = prefix_array[j - 1] # checking if i is less than n and the current characters do not match elif i < n and patt[j] != orgn_string[i]: if j != 0: # update j to the previous prefix value j = prefix_array[j - 1] else: i += 1 # increment i return loc # main function def main(): # declare the original text orgn_str = "AAAABCAEAAABCBDDAAAABC" # pattern to be found patrn = "AAABC" # array to store the locations of the pattern location_array = [0] * len(orgn_str) # calling pattern search function index = pattern_search(orgn_str, patrn, location_array) # to loop through location array for i in range(index): # print the location of the pattern print("Pattern found at location:", location_array[i]) # call the main function if __name__ == "__main__": main()
输出
Pattern found at location: 1 Pattern found at location: 8 Pattern found at location: 17
广告