Problem Statement:
You are given 2 strings, you need to find the longest common substring.
Example
Example:
str 1 = abcdef
str 2 = bcxyez
Output: 2
Here the number of substring possible are:
{b, c}
{e}
The characters should be continuous then it is a substring.
So by looking at the above definition we can see that, {b, c} are continuous.
Then the next is {e}, according to the question, we need to return the longest common substring, so it is 3.
Solution
This is a variation of LCS.
similar to LCS we shall solve this problem.
1. Recursive
First we will establish the base case:
Here if any one of the string is empty we return the count.
If the char is same at an index, then we increment the count and reduce the string length by 1.
If both the char are not same, then in the first case we decrement string 1 and keep the string 2 as it is.
else
We keep the first string same and decrement the second string. While calling the function we reset the count to 0.
2. Top Down
We will create a DP array to hold the previously computed values. And if we need to calculate the same sub-problem then we check the DP array.
If the sub problem is already calculated, then we use that value, else we will store the value for next use.
3. Bottom UP
In bottom up approach, if both the strings matches we increment by one, else we will update that cell as 0.
Solution in C++
#include <algorithm>
#include <iostream>
#include <string>
#include <queue>
#include <vector>
#include <unordered_map>
#include <list>
using namespace std;
int lcs_recursion(string text1, string text2, int i, int j, int lcs_count) {
if (i <= 0 || j <= 0) {
return lcs_count;
}
int lcs_count_1 = lcs_count;
if (text1[i-1] == text2[j-1]) {
lcs_count_1 = lcs_recursion(text1, text2, i - 1, j - 1, lcs_count+1);
}
int lcs_count_2 = lcs_recursion(text1, text2, i , j - 1, 0);
int lcs_count_3 = lcs_recursion(text1, text2, i - 1, j, 0);
return max(lcs_count_1, max(lcs_count_2, lcs_count_3));
}
int top_down_util(const string &text1, const string &text2, int i, int j, int lcs_count, vector<vector <vector<int>>> &memo) {
if (i <= 0 || j <= 0) {
return lcs_count;
}
if (memo[i][j][lcs_count] != -1) {
return memo[i][j][lcs_count];
}
int lcs_count_1 = lcs_count;
if (text1[i - 1] == text2[j - 1])
lcs_count_1 = top_down_util(text1, text2, i - 1, j - 1, lcs_count+1, memo);
int lcs_count_2 = top_down_util(text1, text2, i , j - 1, 0, memo);
int lcs_count_3 = top_down_util(text1, text2, i - 1, j, 0, memo);
return memo[i][j][lcs_count] = max(lcs_count_1, max(lcs_count_2, lcs_count_3));
}
/*
In memoization, we need to have a 3D array because we need to store the count along with the strings.
As a rule of thumb, we usually need to store all the elements that are getting changed in a recursive call.
*/
int lcs_top_down_dp(string text1, string text2) {
int max_lcs_count_size = max(text1.size(), text2.size());
vector<vector <vector<int>>> memo(text1.size()+1, vector<vector<int>>(text2.size()+1, vector<int> (max_lcs_count_size+1, -1)));
return top_down_util(text1, text2, text1.size(), text2.size(), 0, memo);
}
int lcs_bottom_up(string text1, string text2) {
vector<vector<int>> dp(text1.size() + 1, vector<int>(text2.size() + 1, 0));
int result = 0;
for (int i = 0; i <= text1.size(); i++) {
for (int j = 0; j <= text2.size(); j++) {
if (i == 0 || j == 0) {
dp[i][j] = 0;
} else if (text1[i - 1] == text2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
result = max(result, dp[i][j]);
} else {
dp[i][j] = 0;
}
}
}
return result;
}
int main()
{
string str1 = "abcdefg";
string str2 = "abscdvef";
cout << "LCS length recursion = "<<lcs_recursion(str1, str2, str1.size(), str2.size(), 0)<<endl;
cout << "LCS length top down = "<<lcs_top_down_dp(str1, str2)<<endl;
cout << "LCS length bottom up = "<<lcs_bottom_up(str1, str2)<<endl;
}
Output:
LCS length recursion = 2
LCS length top down = 2
LCS length bottom up = 2