Last active
September 4, 2015 14:22
-
-
Save junjiah/c608ece3d951a9c43be9 to your computer and use it in GitHub Desktop.
solved 'Repeated DNA Sequences' on LeetCode https://leetcode.com/problems/repeated-dna-sequences/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Rolling hash. Runtime: 92ms. | |
class Solution { | |
public: | |
vector<string> findRepeatedDnaSequences(string s) { | |
// Parameter for rolling hash. | |
const int ROLL_EXP = 5; | |
// Step size in problem description. | |
const int STEP = 10; | |
// Most significant value for the rolling hash. | |
const int MOST_SIGNIFICANT = pow(ROLL_EXP, STEP - 1); | |
int sz = s.size(); | |
if (sz <= 10) { | |
return {}; | |
} | |
vector<string> repeated; | |
unordered_map<int, int> freq; | |
int roll_hash = 0; | |
for (int i = 0; i < STEP; ++i) { | |
roll_hash = roll_hash * ROLL_EXP + repr(s[i]); | |
} | |
freq[roll_hash] = 1; | |
for (int i = 1; i <= sz - STEP; ++i) { | |
// Update the rolling hash, drop the most significant value and | |
// append a new least significant value. | |
roll_hash = (roll_hash - repr(s[i - 1]) * MOST_SIGNIFICANT) * ROLL_EXP | |
+ repr(s[i - 1 + STEP]); | |
if (freq[roll_hash]++ == 1) { | |
repeated.push_back(s.substr(i, STEP)); | |
} | |
} | |
return repeated; | |
} | |
private: | |
static inline int repr(char ch) { | |
switch (ch) { | |
case 'A': | |
return 1; | |
case 'C': | |
return 2; | |
case 'G': | |
return 3; | |
case 'T': | |
return 4; | |
} | |
} | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Others' code from https://leetcode.com/discuss/44689/10-lines-c-code-8-ms-passed | |
// Carefully picked hashmap size, repr for each char and rolling step. | |
// Runtime: 8ms. | |
// A potential problem is if a sequence happens more than 256 times. Fix is easy. | |
vector<string> findRepeatedDnaSequences(string s) { | |
char hashMap[1048576] = {0}; | |
vector<string> ans; | |
int len = s.size(),hashNum = 0; | |
if (len < 11) return ans; | |
for (int i = 0;i < 9;++i) | |
hashNum = hashNum << 2 | (s[i] - 'A' + 1) % 5; | |
for (int i = 9;i < len;++i) | |
if (hashMap[hashNum = (hashNum << 2 | (s[i] - 'A' + 1) % 5) & 0xfffff]++ == 1) | |
ans.push_back(s.substr(i-9,10)); | |
return ans; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment