通过仅设置一个大小为 K 的子字符串的位来最小化二进制字符串中的汉明距离
两个等长字符串之间的汉明距离是在所有位置上,一个字符串在对应位置上与另一个字符串具有不同值的个数。我们可以通过以下示例来理解这一点:
S = “ramanisgoing” T = “dishaisgoing”
这里,5 是两个字符串 S 和 T 之间的汉明距离,因为 raman 和 disha 是两个单词,它们使字符串发生变化以变得相等。
问题陈述
然而,在这个问题中,我们需要找到两个仅包含二进制数字的字符串之间的汉明距离。一个字符串将由用户提供,例如 S,另一个字符串,例如 T,并且最初,我们将假设它只包含“0”位,并且其大小与给定字符串的大小相同。我们将得到一个数字“k”,其值将表示一个子字符串可以包含的元素个数,这些元素仅包含 1 作为其元素,以便我们将该大小为 k 的子字符串放在我们字符串 (T) 的任何位置,以最小化两个子字符串 S 和 T 之间的汉明距离。
让我们尝试借助一些示例来理解这个问题。
输入
S = "100111” K = 5
输出
3
解释
由于初始字符串 T 将等于“000000”,并且字符串 T 将更改为与字符串 S 进行比较,以在 k=5 时找到最小汉明距离,如下所示:“111110”和“011111”。
100111 和 000000 将给出 4 作为它们的汉明距离。100111 和 111110 将为我们提供 3 作为它们的汉明距离,而 100111 和 011111 将给出 3 作为它们的汉明距离。
但最小汉明距离将为 3,因为 3 小于 4。因此,3 是我们的答案。
− S = "100101” K = 5 − 3
由于初始字符串 T 将等于“000000”,并且字符串 T 将更改为与字符串 S 进行比较,以在 k=5 时找到最小汉明距离,如下所示:“111110”和“011111”。
100101 和 000000 将给出 3 作为它们的汉明距离。100101 和 111110 将为我们提供 4 作为它们的汉明距离,而 100101 和 011111 将给出 4 作为它们的汉明距离。
但最小汉明距离将为 3,因为 3 小于 4。因此,3 是我们的答案。
问题解释
让我们尝试理解问题并找到它的解决方案。
方法 1:暴力解法
我们将通过更改子字符串在不同初始点和结束点的位置来更改字符串 T,以便我们可以在所有可能的字符串中获得最小的汉明距离。
示例
以下是各种编程语言中暴力解法的实现:
#include <stdio.h> #include <string.h> // Function to get minimum hamming distance through iteration int helper(char* S, int k) { // n is the size of the string int n = strlen(S); // Take another string T and initiate it with zero bits size equal to that of S char T[n+1]; memset(T, '0', sizeof(T)); T[n] = '\0'; // Take another string v to initiate it same as T char v[n+1]; memset(v, '0', sizeof(v)); v[n] = '\0'; // Define mini as the hamming distance between T and S int mini = 0; int l = 0; while (l < n) { if (S[l] != T[l]) mini++; l++; } for (int i = 0; i < n - k + 1; i++) { int j = 0, a = 0, l = 0; // Alter string v by changing bits of size k while (j < k) { v[j + i] = '1'; j++; } // Calculate hamming distance while (l < n) { if (S[l] != v[l]) a++; l++; } // Check if the previous hamming distance is greater than the current hamming distance, if yes, then replace that distance element if (mini > a) { mini = a; } // Again assign v as the T string strcpy(v, T); } // Return the minimum hamming distance found through the above iterations return mini; } int main() { // Give input string S char S[] = "100101"; // Give the value of k that is the substring size int K = 5; // Call the helper function printf("The minimum hamming distance is: %d\n", helper(S, K)); return 0; }
输出
The minimum hamming distance is: 3
#include <bits/stdc++.h> using namespace std; // Make a function to get minimum hamming distance through iteration int helper(string S,int k){ // n is the size of the string int n=S.size(); // Take another string T and initiate it with zero bits size equal to that of S string T; for(int i=0; i<n; i++) { T+="0"; } // Take another string v to initiate it same as T string v=T; // Define mini as the hamming distance between T and S int mini=0; int l=0; while(l<n) { if(S[l]!=T[l])mini++; l++; } for(int i=0; i<n-k+1; i++) { int j=0,a=0,l=0; // alter string v by changing bits of size k while(j<k) { v[j+i]='1'; j++; } // calculate hamming distance while(l<n) { if(S[l]!=v[l])a++; l++; } // Check if the previous hamming distance is greater than the current hamming distance, if yes then replace that distance element if(mini>a) { mini=a; } // Again assign v as the T string v=T; } // return the minimum hamming distance found through the above iterations return mini; } int main(){ // Give input string S string S = "100101"; // Give the value of k that is the substring size int K = 5; // Call the helper function cout << "The minimum hamming distance is: "<< helper(S,K); return 0; }
输出
The minimum hamming distance is: 3
public class Main { // Function to get minimum hamming distance through iteration static int helper(String S, int k) { // n is the size of the string int n = S.length(); // Take another string T and initiate it with zero bits size equal to that of S StringBuilder T = new StringBuilder(); for (int i = 0; i < n; i++) { T.append('0'); } // Take another string v to initiate it same as T StringBuilder v = new StringBuilder(T); // Define mini as the hamming distance between T and S int mini = 0; int l = 0; while (l < n) { if (S.charAt(l) != T.charAt(l)) mini++; l++; } for (int i = 0; i < n - k + 1; i++) { int j = 0, a = 0; // Alter string v by changing bits of size k while (j < k) { v.setCharAt(j + i, '1'); j++; } // Calculate hamming distance l = 0; // Reinitialize l while (l < n) { if (S.charAt(l) != v.charAt(l)) a++; l++; } // Check if the previous hamming distance is greater than the current hamming distance, if yes, then replace that distance element if (mini > a) { mini = a; } // Again assign v as the T string v = new StringBuilder(T); } // Return the minimum hamming distance found through the above iterations return mini; } public static void main(String[] args) { // Give input string S String S = "100101"; // Give the value of k that is the substring size int K = 5; // Call the helper function System.out.println("The minimum hamming distance is: " + helper(S, K)); } }
输出
The minimum hamming distance is: 3
def helper(S, k): n = len(S) # Take another string T and initiate it with zero bits size equal to that of S T = '0' * n # Take another string v to initiate it same as T v = T # Define mini as the hamming distance between T and S mini = 0 l = 0 while l < n: if S[l] != T[l]: mini += 1 l += 1 for i in range(n - k + 1): j, a, l = 0, 0, 0 # Alter string v by changing bits of size k while j < k: v = v[:j + i] + '1' + v[j + i + 1:] j += 1 # Calculate hamming distance while l < n: if S[l] != v[l]: a += 1 l += 1 # Check if the previous hamming distance is greater than the current hamming distance, if yes, then replace that distance element if mini > a: mini = a # Again assign v as the T string v = T # Return the minimum hamming distance found through the above iterations return mini # Give input string S S = "100101" # Give the value of k that is the substring size K = 5 # Call the helper function print("The minimum hamming distance is:", helper(S, K))
输出
The minimum hamming distance is: 3
方法 2:优化解法
算法
使用前缀和数组计算存在的 1 的个数,并将其存储为我们的最小汉明距离
遍历 S 字符串以查找字符串 S 中 K 个不同子字符串之间 1 的值。
如果 (i-1<0) 将值 v 作为 arr[i+K-1],否则,将值 v 作为 (arr[i+K-1]-arr[i-1])
通过找到先前汉明距离和当前汉明距离之间的最小值来存储最小值。
当前汉明距离可以通过对来自 K 个子字符串的零元素个数 (K - v) 和当前 S 子字符串中存在的零元素个数 (cnt - v) 的加法进行操作来找到
最后,返回总的最小距离。
示例
以下是各种编程语言中优化解法的实现:
#include <stdio.h> #include <stdlib.h> #include <string.h> // Make a helper function to get minimum hamming distance through iteration int helper(char *S, int K){ // n is the size of the string int n = strlen(S); // initialize an array of size 'n' int arr[n]; if(S[0]=='0')arr[0]=0; else arr[0]=1; // Count the number of 1's in the string S for (int i = 1; i < n; i++) { if(S[i]=='0')arr[i]=arr[i-1]; else arr[i]=arr[i-1]+1; } int cnt = arr[n - 1]; // Define mini as the hamming distance between T and S int mini = cnt; // Traverse through S to find the minimum for (int i = 0; i < n - K; i++) { int v; if(i-1==-1)v=arr[i+K-1]; else v= arr[i+K-1]-arr[i-1]; // Store the minimum if (cnt - v + (K - v) < mini) mini = cnt - v + (K - v); } // Return the minimum hamming distance return mini; } int main(){ // Give input string S char *S = "100101"; // Give the value of k that is the substring size int K = 5; // Call the helper function printf("The minimum hamming distance is: %d", helper(S,K)); return 0; }
输出
The minimum hamming distance is: 3
#include <bits/stdc++.h> using namespace std; // Make a helper function to get minimum hamming distance through iteration int helper(string S, int K){ // n is the size of the string int n = S.size(); // initialize an array of size 'n' int arr[n]; if(S[0]=='0')arr[0]=0; else arr[0]=1; // Count the number of 1's in the string S for (int i = 1; i < n; i++) { if(S[i]=='0')arr[i]=arr[i-1]; else arr[i]=arr[i-1]+1; } int cnt = arr[n - 1]; // Define mini as the hamming distance between T and S int mini = cnt; // Traverse through S to find the minimum for (int i = 0; i < n - K; i++) { int v; if(i-1==-1)v=arr[i+K-1]; else v= arr[i+K-1]-arr[i-1]; // Store the minimum mini = min(mini, cnt - v + (K - v)); } // Return the minimum hamming distance return mini; } int main(){ // Give input string S string S = "100101"; // Give the value of k that is the substring size int K = 5; // Call the helper function cout << "The minimum hamming distance is: "<< helper(S,K); return 0; }
输出
The minimum hamming distance is: 3
public class Main { // Make a helper function to get the minimum hamming distance through iteration static int helper(String S, int K) { // n is the size of the string int n = S.length(); // Initialize an array of size 'n' int[] arr = new int[n]; if (S.charAt(0) == '0') { arr[0] = 0; } else { arr[0] = 1; } // Count the number of 1's in the string S for (int i = 1; i < n; i++) { if (S.charAt(i) == '0') { arr[i] = arr[i - 1]; } else { arr[i] = arr[i - 1] + 1; } } int cnt = arr[n - 1]; // Define mini as the hamming distance between T and S int mini = cnt; // Traverse through S to find the minimum for (int i = 0; i < n - K; i++) { int v; if (i - 1 == -1) { v = arr[i + K - 1]; } else { v = arr[i + K - 1] - arr[i - 1]; } // Store the minimum mini = Math.min(mini, cnt - v + (K - v)); } // Return the minimum hamming distance return mini; } public static void main(String[] args) { // Give input string S String S = "100101"; // Give the value of K, which is the substring size int K = 5; // Call the helper function System.out.println("The minimum hamming distance is: " + helper(S, K)); } }
输出
The minimum hamming distance is: 3
def helper(S, K): # n is the size of the string n = len(S) # Initialize an array of size 'n' arr = [0] * n # Initialize the first element of the array based on the first character of S if S[0] == '0': arr[0] = 0 else: arr[0] = 1 # Count the number of 1's in the string S for i in range(1, n): if S[i] == '0': arr[i] = arr[i - 1] else: arr[i] = arr[i - 1] + 1 # Calculate the total count of 1's in the string cnt = arr[n - 1] # Initialize mini as the total count mini = cnt # Traverse through S to find the minimum hamming distance for i in range(n - K): v = 0 # Calculate the difference in counts for the sliding window if i - 1 == -1: v = arr[i + K - 1] else: v = arr[i + K - 1] - arr[i - 1] # Update mini with the minimum hamming distance mini = min(mini, cnt - v + (K - v)) # Return the minimum hamming distance return mini # Input string S S = "100101" # Value of K, which is the substring size K = 5 # Call the helper function and display the result print("The minimum hamming distance is:", helper(S, K))
输出
The minimum hamming distance is: 3
结论
在本文中,为了找到最小汉明距离,我们将首先看到一种朴素的方法,但为了提高其时间复杂度,我们将使用前缀和数组的概念,通过该概念,我们可以在一个循环中避免在不同循环中重复计数。