Edit Distance In C,CPP,JAVA,PYTHON,C#,JS

Spread the love

Problem Statement: Edit Distance

Edit Distance (also known as Levenshtein distance) is a measure of the difference between two strings. It is the minimum number of operations required to convert one string into another. The operations allowed are:

  1. Insertion of a character.
  2. Deletion of a character.
  3. Substitution of one character for another.

The goal is to determine the minimum number of operations required to transform one string into another.

Example

Let’s consider the two strings:

  • Source String (s1) = “kitten”
  • Target String (s2) = “sitting”

Step-by-Step Transformation:

  1. kittensitten (Substitute ‘k’ with ‘s’)
  2. sittensittin (Remove ‘e’)
  3. sittinsitting (Insert ‘g’)

Thus, the minimum edit distance between “kitten” and “sitting” is 3.

Approach & Algorithm (Dynamic Programming)

The Dynamic Programming (DP) approach to solving the Edit Distance problem works by filling a 2D table where each cell represents the edit distance between two substrings. We fill this table iteratively, starting from the base cases where one string is empty, and then using previously computed values to compute the distances for larger substrings.

Steps:

  1. Define a 2D array dp where dp[i][j] represents the edit distance between the first i characters of string s1 and the first j characters of string s2.
  2. Initialize the base cases:
    • If one string is empty, the distance is the length of the other string (because we need to insert all the characters).
  3. For each pair of characters s1[i] and s2[j], determine if they are equal. If they are, no operation is needed, so take the value from dp[i-1][j-1]. If they are not equal, take the minimum of the three possible operations: insertion, deletion, and substitution.

Time and Space Complexity

  • Time Complexity: O(m * n), where m is the length of string s1 and n is the length of string s2. This is because we fill a table of size m x n.
  • Space Complexity: O(m * n), for the DP table. However, this can be optimized to O(min(m, n)) by using a 1D array if we only care about the current and previous rows of the table.

Code Implementations

1. C

#include <stdio.h>
#include <string.h>
#include <algorithm>
using namespace std;

int min(int a, int b, int c) {
return std::min(std::min(a, b), c);
}

int editDistance(char *s1, char *s2) {
int m = strlen(s1);
int n = strlen(s2);
int dp[m + 1][n + 1];

for (int i = 0; i <= m; i++) {
for (int j = 0; j <= n; j++) {
if (i == 0) dp[i][j] = j;
else if (j == 0) dp[i][j] = i;
else if (s1[i - 1] == s2[j - 1]) dp[i][j] = dp[i - 1][j - 1];
else dp[i][j] = min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1);
}
}
return dp[m][n];
}

int main() {
char s1[] = "kitten";
char s2[] = "sitting";
printf("Edit Distance: %d\n", editDistance(s1, s2));
return 0;
}

2. C++

#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;

int editDistance(string s1, string s2) {
int m = s1.size();
int n = s2.size();
vector<vector<int>> dp(m + 1, vector<int>(n + 1));

for (int i = 0; i <= m; i++) {
for (int j = 0; j <= n; j++) {
if (i == 0) dp[i][j] = j;
else if (j == 0) dp[i][j] = i;
else if (s1[i - 1] == s2[j - 1]) dp[i][j] = dp[i - 1][j - 1];
else dp[i][j] = min({dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1});
}
}
return dp[m][n];
}

int main() {
string s1 = "kitten", s2 = "sitting";
cout << "Edit Distance: " << editDistance(s1, s2) << endl;
return 0;
}

3. Java

public class EditDistance {
public static int min(int a, int b, int c) {
return Math.min(Math.min(a, b), c);
}

public static int editDistance(String s1, String s2) {
int m = s1.length();
int n = s2.length();
int[][] dp = new int[m + 1][n + 1];

for (int i = 0; i <= m; i++) {
for (int j = 0; j <= n; j++) {
if (i == 0) dp[i][j] = j;
else if (j == 0) dp[i][j] = i;
else if (s1.charAt(i - 1) == s2.charAt(j - 1)) dp[i][j] = dp[i - 1][j - 1];
else dp[i][j] = min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1);
}
}
return dp[m][n];
}

public static void main(String[] args) {
String s1 = "kitten", s2 = "sitting";
System.out.println("Edit Distance: " + editDistance(s1, s2));
}
}

4. Python

def edit_distance(s1, s2):
m, n = len(s1), len(s2)
dp = [[0] * (n + 1) for _ in range(m + 1)]

for i in range(m + 1):
for j in range(n + 1):
if i == 0:
dp[i][j] = j
elif j == 0:
dp[i][j] = i
elif s1[i - 1] == s2[j - 1]:
dp[i][j] = dp[i - 1][j - 1]
else:
dp[i][j] = min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1)

return dp[m][n]

# Example usage
s1 = "kitten"
s2 = "sitting"
print("Edit Distance:", edit_distance(s1, s2))

5. C#

using System;

public class EditDistance
{
public static int Min(int a, int b, int c)
{
return Math.Min(Math.Min(a, b), c);
}

public static int EditDistanceAlgorithm(string s1, string s2)
{
int m = s1.Length, n = s2.Length;
int[,] dp = new int[m + 1, n + 1];

for (int i = 0; i <= m; i++)
{
for (int j = 0; j <= n; j++)
{
if (i == 0) dp[i, j] = j;
else if (j == 0) dp[i, j] = i;
else if (s1[i - 1] == s2[j - 1]) dp[i, j] = dp[i - 1, j - 1];
else dp[i, j] = Min(dp[i - 1, j] + 1, dp[i, j - 1] + 1, dp[i - 1, j - 1] + 1);
}
}
return dp[m, n];
}

public static void Main()
{
string s1 = "kitten", s2 = "sitting";
Console.WriteLine("Edit Distance: " + EditDistanceAlgorithm(s1, s2));
}
}

6. JavaScript

function editDistance(s1, s2) {
const m = s1.length, n = s2.length;
const dp = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));

for (let i = 0; i <= m; i++) {
for (let j = 0; j <= n; j++) {
if (i === 0) dp[i][j] = j;
else if (j === 0) dp[i][j] = i;
else if (s1[i - 1] === s2[j - 1]) dp[i][j] = dp[i - 1][j - 1];
else dp[i][j] = Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1);
}
}

return dp[m][n];
}

// Example usage
const s1 = "kitten", s2 = "sitting";
console.log("Edit Distance:", editDistance(s1, s2));

Conclusion

The Edit Distance problem is a classical example of dynamic programming. The time and space complexities are quadratic in terms of the lengths of the two strings. By using a 2D DP table, we can solve this problem efficiently for reasonably sized strings.