Z算法



用于模式匹配的Z算法

Z算法是一种线性时间字符串匹配算法,用于在字符串中搜索给定模式。其目的是搜索字符串中给定模式的所有出现。Z算法依赖于Z数组来查找模式出现。Z数组是一个整数数组,存储模式与文本任何子字符串之间最长公共前缀的长度。它与字符串的长度相同。

Z算法如何工作?

Z算法通过构建一个名为Z数组的辅助数组来工作,该数组存储给定文本与文本任何子字符串之间最长公共前缀的长度。此数组中的每个索引都存储匹配字符的数量,从第0个索引到当前索引。

Z算法需要以下步骤:

  • 首先,将模式和给定字符串合并在一起。我们还需要在两者之间添加一个特殊字符,该字符不在任何指定的字符串中。假设我们使用美元符号($)作为特殊字符。

  • 然后,为这个新创建的字符串构建Z数组。

  • 现在,检查Z数组的每个索引以查找其值是否与正在搜索的模式的长度匹配。如果值和长度匹配,则将模式标记为已找到。

  • 在最后一步中,从模式长度+1中减去索引号,这将导致模式的索引。

下图说明了上述方法:

Z-Algorithm

让我们了解输入输出场景:

Input:
Main String: "ABAAABCDBBABCDDEBCABC" 
Pattern: "ABC"
Output:
Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18

在上述场景中,我们正在主字符串“ABAAABCDBBABCDDEBCABC”中查找模式“ABC”。我们将检查主字符串中的每个位置并记下我们在哪里找到匹配项。我们在位置4、10和18找到了模式“ABC”。

示例

以下是演示各种编程语言中Z算法的示例:

#include <stdio.h>
#include <string.h>
// function to fill Z array 
void fillZArray(const char* conStr, int zArr[]) {
   int n = strlen(conStr);
   int windLeft, windRight, k;
   // Initialize the window size to 0
   windLeft = windRight = 0; 
   // iterating over the characters of the new string
   for (int i = 1; i < n; i++) {
      // checking if current index is greater than right bound of window
      if (i > windRight) {
         // reset the window size to 0 and position it at the current index
         windLeft = windRight = i; 
         // extend right bound of window as long as characters match
         while (windRight < n && conStr[windRight - windLeft] == conStr[windRight]) {
             windRight++; 
         }
         // setting the Z value for the current index
         zArr[i] = windRight - windLeft;
         // decrementing right bound 
         windRight--;
      } else {
         // calculating corresponding index in window
         k = i - windLeft;
         // if Z value at corresponding index is less than remaining interval
         if (zArr[k] < windRight - i + 1) {
             zArr[i] = zArr[k]; 
         } else {
            // reset left bound of window to current index
            windLeft = i;
            // extend right bound of window as long as characters match
            while (windRight < n && conStr[windRight - windLeft] == conStr[windRight]) {
               windRight++;
            }
            // Setting the Z value for the current index
            zArr[i] = windRight - windLeft;
            // Decrement the right bound of the window
            windRight--;
         }
      }
   }
}
// function to implement the Z algorithm for pattern searching
void zAlgorithm(const char* mainString, const char* pattern, int array[], int *index) {
   // concatenate the pattern, a special character, and the main string
   char concatedStr[strlen(mainString) + strlen(pattern) + 1];
   strcpy(concatedStr, pattern);
   strcat(concatedStr, "$");
   strcat(concatedStr, mainString); 
   int patLen = strlen(pattern);
   int len = strlen(concatedStr);
   // Initialize the Z array
   int zArr[len];
   // Fill the Z array 
   fillZArray(concatedStr, zArr);
   // iterate over the Z array
   for (int i = 0; i < len; i++) {
      // if Z value equals length of the pattern, the pattern is found
      if (zArr[i] == patLen) {
         (*index)++;
         array[(*index)] = i - patLen - 1;
      }
   }
}
int main() {
   const char* mainString = "ABAAABCDBBABCDDEBCABC";
   const char* pattern = "ABC";
   // Initialize the location array and the index
   int locArray[strlen(mainString)];
   int index = -1;
   // Calling the Z algorithm function
   zAlgorithm(mainString, pattern, locArray, &index);
   // to print the result
   for (int i = 0; i <= index; i++) {
      printf("Pattern found at position: %d\n", locArray[i]);
   }
   return 0;
}

输出

Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18
#include<iostream>
using namespace std;
// function to fill Z array 
void fillZArray(string conStr, int zArr[]) {
   int n = conStr.size();
   int windLeft, windRight, k;
   // initially window size is 0
   windLeft = windRight = 0;    
   // iterating over the characters of the new string
   for(int i = 1; i < n; i++) {
      // checking if current index is greater than right bound of window
      if(i > windRight) {
	     // reset the window size to 0 and position it at the current index
         windLeft = windRight = i; 
		 // extend right bound of window as long as characters match	
         while(windRight < n && conStr[windRight-windLeft] == conStr[windRight]) {
            windRight++;    
         }
		 // setting the Z value for the current index
         zArr[i] = windRight-windLeft;
		 // decrementing right bound 
         windRight--;
      }else {
	     // calculating corresponding index in window
         k = i-windLeft;
		 // if Z value at corresponding index is less than remaining interval
         if(zArr[k] < windRight-i+1)
            zArr[i] = zArr[k];    
         else {
		    // reset left bound of window to current index
            windLeft = i;
			// extend right bound of window as long as characters match
            while(windRight < n && conStr[windRight - windLeft] == conStr[windRight]) {
               windRight++;
            }
			// Setting the Z value for the current index
            zArr[i] = windRight - windLeft;
			// Decrement the right bound of the window
            windRight--;
         }
      }
   }
}
// function to implement the Z algorithm for pattern searching
void zAlgorithm(string mainString, string pattern, int array[], int *index) {
   // concatenate the pattern, a special character, and the main string
   string concatedStr = pattern + "$" + mainString;    
   int patLen = pattern.size();
   int len = concatedStr.size();
   // Initialize the Z array
   int zArr[len];
   // Fill the Z array 
   fillZArray(concatedStr, zArr);
   // iterate over the Z array
   for(int i = 0; i<len; i++) {
       // if Z value equals length of the pattern, the pattern is found
      if(zArr[i] == patLen) {
         (*index)++;
         array[(*index)] = i - patLen -1;
      }
   }
}
int main() {
   string mainString = "ABAAABCDBBABCDDEBCABC";
   string pattern = "ABC";
   // Initialize the location array and the index
   int locArray[mainString.size()];
   int index = -1;
   // Calling the Z algorithm function
   zAlgorithm(mainString, pattern, locArray, &index);
   // to print the result
   for(int i = 0; i <= index; i++) {
      cout << "Pattern found at position: " << locArray[i]<<endl;
   }
}

输出

Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18
public class ZAlgorithm {
   // method to fill Z array    
   public static void fillZArray(String conStr, int[] zArr) {
      int n = conStr.length();
      int windLeft, windRight, k;
      // initially window size is 0
      windLeft = windRight = 0; 
      // iterating over the characters of the new string
      for (int i = 1; i < n; i++) {
         // checking if current index is greater than right bound of window
         if (i > windRight) {
            // reset the window size to 0 and position it at the current index
            windLeft = windRight = i;
            while (windRight < n && conStr.charAt(windRight - windLeft) == conStr.charAt(windRight)) {
               windRight++; 
            }
            // setting the Z value for the current index
            zArr[i] = windRight - windLeft;
            windRight--;
         } else {
            k = i - windLeft;
            if (zArr[k] < windRight - i + 1)
               zArr[i] = zArr[k]; 
            else {
               windLeft = i;
               while (windRight < n && conStr.charAt(windRight - windLeft) == conStr.charAt(windRight)) {
                  windRight++;
               }
               zArr[i] = windRight - windLeft;
               windRight--;
            }
         }
      }
   }
   // method to implement the Z algorithm for pattern searching
   public static void zAlgorithm(String mainString, String pattern, int[] array) {
      // concatenate the pattern, a special character, and the main string
      String concatedStr = pattern + "$" + mainString; 
      int patLen = pattern.length();
      int len = concatedStr.length();
      // Initialize the Z array
      int[] zArr = new int[len];
      // Fill the Z array 
      fillZArray(concatedStr, zArr);
      int index = -1;
      // iterate over the Z array
      for (int i = 0; i < len; i++) {
         // if Z value equals length of the pattern, the pattern is found
         if (zArr[i] == patLen) {
            index++;
            array[index] = i - patLen - 1;
         }
      }
      // Print the results
      for (int i = 0; i <= index; i++) {
         System.out.println("Pattern found at position: " + array[i]);
      }
   }
   public static void main(String[] args) {
      String mainString = "ABAAABCDBBABCDDEBCABC";
      String pattern = "ABC";
      // Initialize the location array and the index
      int[] locArray = new int[mainString.length()];
      // Calling the Z algorithm method
      zAlgorithm(mainString, pattern, locArray);
   }
}

输出

Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18
# function to fill Z array 
def fillZArray(conStr, zArr):
    n = len(conStr)
    windLeft, windRight, k = 0, 0, 0  
    # iterating over the characters of the new string
    for i in range(1, n):
        if i > windRight:
            windLeft, windRight = i, i  
            while windRight < n and conStr[windRight - windLeft] == conStr[windRight]:
                windRight += 1  
            zArr[i] = windRight - windLeft
            windRight -= 1
        else:
            k = i - windLeft
            if zArr[k] < windRight - i + 1:
                zArr[i] = zArr[k] 
            else:
                windLeft = i
                while windRight < n and conStr[windRight - windLeft] == conStr[windRight]:
                    windRight += 1
                zArr[i] = windRight - windLeft
                windRight -= 1
# function to implement the Z algorithm for pattern searching
def zAlgorithm(mainString, pattern, array):
    concatedStr = pattern + "$" + mainString  
    patLen = len(pattern)
    length = len(concatedStr)
    zArr = [0] * length
    fillZArray(concatedStr, zArr)
    index = -1
    for i in range(length):
        if zArr[i] == patLen:
            index += 1
            array[index] = i - patLen - 1
    return index, array
def main():
    mainString = "ABAAABCDBBABCDDEBCABC"
    pattern = "ABC"
    locArray = [0] * len(mainString)
    index, locArray = zAlgorithm(mainString, pattern, locArray)
    for i in range(index + 1):
        print("Pattern found at position:", locArray[i])
if __name__ == "__main__":
    main()

输出

Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18

Z算法的复杂度

Z算法用于模式搜索,其运行时间为线性时间。因此,其时间复杂度为O(m + n),其中n是被搜索字符串的长度,m是被搜索模式的长度。

广告