187. Repeated DNA Sequences

https://leetcode.com/problems/repeated-dna-sequences/

Approach 1: Linear-time Slice Using Substring + HashSet

1
2
3
4
5
6
7
8
9
10
11
12
class Solution {
public List<String> findRepeatedDnaSequences(String s) {
int L=10,n=s.length();
HashSet<String> seen=new HashSet<>(),output=new HashSet<>();

for(int i=0;i<=n-L;i++){
String tmp=s.substring(i,i+L);
if(!seen.add(tmp)) output.add(tmp);
}
return new ArrayList<String>(output);
}
}

Approach 2: Rabin-Karp : Constant-time Slice Using Rolling Hash\
Rolling Hash Youtube

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
class Solution {
public List<String> findRepeatedDnaSequences(String s) {
int L=10,n=s.length();
if(n<=L) return new ArrayList<String>();

Map<Character,Integer> map=new HashMap<>(){
{
put('A', 0);
put('C', 1);
put('G', 2);
put('T', 3);
}
};
// for(char ch:s.toCharArray()) map.put(ch,ch-'A'+1);

int[] strToIntArr=new int[n];
for(int i=0;i<n;i++) strToIntArr[i]=map.get(s.charAt(i));

Set<Integer> seen=new HashSet<>();
Set<String> output=new HashSet<>();
int a=4,aL=(int)Math.pow(a,L);
int h=0;
for(int start=0;start<=n-L;start++){
if(start!=0){
h=h*a-strToIntArr[start-1]*aL+strToIntArr[start+L-1];
}else{
for(int i=0;i<L;i++) h=h*a+strToIntArr[i];
}
if(!seen.add(h)) output.add(s.substring(start,start+L));
}
return new ArrayList<String>(output);
}
}

Approach 3: Bit Manipulation : Constant-time Slice Using Bitmask/
Bitmask Youtube

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class Solution {
public List<String> findRepeatedDnaSequences(String s) {
int L = 10, n = s.length();
if (n <= L) return new ArrayList<>();

// convert string to array of integers
Map<Character, Integer> toInt = new HashMap<>() {
{
put('A', 0);
put('C', 1);
put('G', 2);
put('T', 3);
}
};
int[] nums = new int[n];
for (int i = 0; i < n; ++i) nums[i] = toInt.get(s.charAt(i));

int bitmask = 0;
Set<Integer> seen = new HashSet<>();
Set<String> output = new HashSet<>();
// iterate over all sequences of length L
for (int start = 0; start < n - L + 1; ++start) {
// compute bitmask of the current sequence in O(1) time
if (start != 0) {
// left shift to free the last 2 bit
bitmask <<= 2;
// add a new 2-bits number in the last two bits
bitmask |= nums[start + L - 1];
// unset first two bits: 2L-bit and (2L + 1)-bit
bitmask &= ~(3 << 2 * L);
}
// compute hash of the first sequence in O(L) time
else {
for (int i = 0; i < L; ++i) {
bitmask <<= 2;
bitmask |= nums[i];
}
}
// update output and hashset of seen sequences
if (!seen.add(bitmask)) output.add(s.substring(start, start + L));
}
return new ArrayList<String>(output);
}
}

0%