有限自动机的有效构造



什么是有限自动机?

有限自动机是一种用于识别输入文本中模式的计算数学模型。它由一组状态和它们之间的转换组成。这种机器可以接受或拒绝输入字符串,这表示它只有两种状态。

用于模式匹配的有限自动机

要使用有限自动机进行模式匹配,我们需要构建一个仅接受与给定模式匹配的字符串的FA机器。假设我们创建了一个有限自动机,它有四个状态,即q0、q1、q2和q3。初始状态将是'q0',最终状态是'q3'。转换用模式的字符标记。

现在,我们从起始状态开始匹配,并从左到右读取字符串,根据输入符号遵循转换。如果我们在读取整个字符串后到达最终状态,则字符串与模式匹配。否则,字符串与模式不匹配。

以下是输入输出场景,以增强我们对问题的理解:

Input:
main String: "ABAAABCDBBABCDDEBCABC" 
pattern: "ABC"
Output:
Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18

给定一个输入字符串“ABAAABCDBBABCDDEBCABC”和模式“ABC”,我们使用有限自动机程序在字符串中搜索模式的出现。程序在索引位置4、10和18处识别模式。

示例

现在,让我们使用有限自动机方法来实现不同的编程语言解决模式匹配问题:

#include <stdio.h>
#include <string.h>
#define MAXCHAR 256
// to fill Finite Automata transition table based on given pattern
void fillTransitionTable(const char *pattern, int transTable[][MAXCHAR]) {
   int longPS = 0;
   // initializing the first state with 0 for all characters
   for (int i = 0; i < MAXCHAR; i++) {
      transTable[0][i] = 0; 
   }
   // For the first character of pattern, move to the first state
   transTable[0][(int)pattern[0]] = 1;  
   // For rest of the pattern's characters, create states using prefix and suffix
   for (int i = 1; i <= strlen(pattern); i++) {
      // Copy values from LPS length to the current state
      for (int j = 0; j < MAXCHAR; j++)  
         transTable[i][j] = transTable[longPS][j];
      // For the current pattern character, move to the next state
      transTable[i][(int)pattern[i]] = i + 1;
      // Updating LPS length for the next state
      if (i < strlen(pattern))
         longPS = transTable[longPS][(int)pattern[i]];  
   }
}
// to search for the pattern in main string using Finite Automata approach
void FAPatternSearch(const char *mainString, const char *pattern, int array[], int *index) {
   int patLen = strlen(pattern);
   int strLen = strlen(mainString);
   // Creating transition table for the pattern
   int transTable[patLen + 1][MAXCHAR];  
   fillTransitionTable(pattern, transTable);
   int presentState = 0;
   // iterating over the main string
   for (int i = 0; i <= strLen; i++) {
      // Move to the next state if the transition is possible
      presentState = transTable[presentState][(int)mainString[i]]; 
      // If the present state is the final state, the pattern is found
      if (presentState == patLen) {  
         (*index)++;
         array[(*index)] = i - patLen + 1;
      }
   }
}
int main() {
   const char *mainString = "ABAAABCDBBABCDDEBCABC";
   // pattern to be searched
   const char *pattern = "ABC";
   int locArray[strlen(mainString)];
   int index = -1;
   // Calling the pattern search function
   FAPatternSearch(mainString, pattern, locArray, &index);
   // to print the result
   for (int i = 0; i <= index; i++) {
      printf("Pattern found at position: %d\n", locArray[i]);
   }
   return 0;
}
#include<iostream>
#define MAXCHAR 256
using namespace std;
// to fill Finite Automata transition table based on given pattern
void fillTransitionTable(string pattern, int transTable[][MAXCHAR]) {
   int longPS = 0;
   // initializing the first state with 0 for all characters
   for (int i = 0; i < MAXCHAR; i++) {
      transTable[0][i] = 0;        
   }
   // For the first character of pattern, move to the first state
   transTable[0][pattern[0]] = 1;  
   // For rest of the characters, create states using prefix and suffix
   for (int i = 1; i<= pattern.size(); i++) {
      // Copy values from LPS length to the current state
      for (int j = 0; j < MAXCHAR ; j++)    
         transTable[i][j] = transTable[longPS][j];
      // For the current pattern character, move to the next state     
      transTable[i][pattern[i]] = i + 1;
      // Updating LPS length for the next state
      if (i < pattern.size())
         longPS = transTable[longPS][pattern[i]];
   }
}
// to search for the pattern in main string using Finite Automata approach
void FAPatternSearch(string mainString, string pattern, int array[], int *index) {
   int patLen = pattern.size();
   int strLen = mainString.size();
   // Creating transition table for the pattern
   int transTable[patLen+1][MAXCHAR];     
   fillTransitionTable(pattern, transTable);
   int presentState = 0;
   // iterating over the main string
   for(int i = 0; i<=strLen; i++) {
      // Move to the next state if the transition is possible 
      presentState = transTable[presentState][mainString[i]];    
      // If the present state is the final state, the pattern is found
      if(presentState == patLen) {    
         (*index)++;
         array[(*index)] = i - patLen + 1 ;
      }
   }
}
int main() {
   string mainString = "ABAAABCDBBABCDDEBCABC";
   // pattern to be searched
   string pattern = "ABC";
   int locArray[mainString.size()];
   int index = -1;
   // Calling the pattern search function
   FAPatternSearch(mainString, pattern, locArray, &index);
   // to print the result
   for(int i = 0; i <= index; i++) {
      cout << "Pattern found at position: " << locArray[i]<<endl;
   }
}
public class Main {
   static final int MAXCHAR = 256;
   // to fill Finite Automata transition table based on given pattern
   public static void fillTransitionTable(String pattern, int[][] transTable) {
      int longPS = 0;
      // initializing the first state with 0 for all characters
      for (int i = 0; i < MAXCHAR; i++) {
         transTable[0][i] = 0;  
      }
      // For the first character of pattern, move to the first state
      transTable[0][pattern.charAt(0)] = 1;  
      // For rest of the characters, create states using prefix and suffix
      for (int i = 1; i < pattern.length(); i++) {  
         // Copy values from LPS length to the current state
         for (int j = 0; j < MAXCHAR; j++)  
            transTable[i][j] = transTable[longPS][j];
         // For the current pattern character, move to the next state
         transTable[i][pattern.charAt(i)] = i + 1;
         // Updating LPS length for the next state
         longPS = transTable[longPS][pattern.charAt(i)];  
      }
   }
   // to search for the pattern in main string using Finite Automata approach
   public static void FAPatternSearch(String mainString, String pattern, int[] array, int[] index) {
      int patLen = pattern.length();
      int strLen = mainString.length();
      // Creating transition table for the pattern
      int[][] transTable = new int[patLen + 1][MAXCHAR];  
      fillTransitionTable(pattern, transTable);
      int presentState = 0;
      // iterating over the main string
      for (int i = 0; i < strLen; i++) {
          // Move to the next state if the transition is possible  
         presentState = transTable[presentState][mainString.charAt(i)];  
         // If the present state is the final state, the pattern is found
         if (presentState == patLen) {  
            index[0]++;
            array[index[0]] = i - patLen + 1;
         }
      }
   }
   public static void main(String[] args) {
      String mainString = "ABAAABCDBBABCDDEBCABC";
      // pattern to be searched
      String pattern = "ABC";
      int[] locArray = new int[mainString.length()];
      int[] index = { -1 };
      // Calling the pattern search method
      FAPatternSearch(mainString, pattern, locArray, index);
      // to print the result
      for (int i = 0; i <= index[0]; i++) {
         System.out.println("Pattern found at position: " + locArray[i]);
      }
   }
}
MAXCHAR = 256
# to fill Finite Automata transition table based on given pattern
def fillTransitionTable(pattern, transTable):
    longPS = 0
    # initializing the first state with 0 for all characters
    for i in range(MAXCHAR):
        transTable[0][i] = 0
    # For the first character of pattern, move to the first state    
    transTable[0][ord(
        pattern[0])] = 1 
    # For rest of the pattern's characters, create states using prefix and suffix    
    for i in range(1, len(pattern)):
        # Copy values from LPS length to the current state
        for j in range(MAXCHAR):  
            transTable[i][j] = transTable[longPS][j]
        # For the current pattern character, move to the next state    
        transTable[i][ord(pattern[i])] = i + 1
        # Updating LPS length for the next state
        longPS = transTable[longPS][ord(
            pattern[i]
        )]  
# to search for the pattern in main string using Finite Automata approach
def FAPatternSearch(mainString, pattern, array, index):
    patLen = len(pattern)
    strLen = len(mainString)
    # create a transition table for each pattern
    transTable = [[0] * MAXCHAR for _ in range(patLen + 1)
                  ]  
    fillTransitionTable(pattern, transTable)
    presentState = 0
    # iterating over the main string
    for i in range(strLen):
        # Move to the next state if the transition is possible
        presentState = transTable[presentState][ord(
            mainString[i]
        )]  
        # If the present state is the final state, the pattern is found
        if presentState == patLen:
            index[0] += 1
            array[index[0]] = i - patLen + 1

mainString = "ABAAABCDBBABCDDEBCABC"
pattern = "ABC"
locArray = [0] * len(mainString)
index = [-1]
#Calling the pattern search function
FAPatternSearch(mainString, pattern, locArray, index)
for i in range(index[0] + 1):
    print("Pattern found at position:", locArray[i])

输出

Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18
广告