Skip to content

Instantly share code, notes, and snippets.

@junjiah
Last active September 4, 2015 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save junjiah/c608ece3d951a9c43be9 to your computer and use it in GitHub Desktop.
Save junjiah/c608ece3d951a9c43be9 to your computer and use it in GitHub Desktop.
solved 'Repeated DNA Sequences' on LeetCode https://leetcode.com/problems/repeated-dna-sequences/
// Rolling hash. Runtime: 92ms.
class Solution {
public:
vector<string> findRepeatedDnaSequences(string s) {
// Parameter for rolling hash.
const int ROLL_EXP = 5;
// Step size in problem description.
const int STEP = 10;
// Most significant value for the rolling hash.
const int MOST_SIGNIFICANT = pow(ROLL_EXP, STEP - 1);
int sz = s.size();
if (sz <= 10) {
return {};
}
vector<string> repeated;
unordered_map<int, int> freq;
int roll_hash = 0;
for (int i = 0; i < STEP; ++i) {
roll_hash = roll_hash * ROLL_EXP + repr(s[i]);
}
freq[roll_hash] = 1;
for (int i = 1; i <= sz - STEP; ++i) {
// Update the rolling hash, drop the most significant value and
// append a new least significant value.
roll_hash = (roll_hash - repr(s[i - 1]) * MOST_SIGNIFICANT) * ROLL_EXP
+ repr(s[i - 1 + STEP]);
if (freq[roll_hash]++ == 1) {
repeated.push_back(s.substr(i, STEP));
}
}
return repeated;
}
private:
static inline int repr(char ch) {
switch (ch) {
case 'A':
return 1;
case 'C':
return 2;
case 'G':
return 3;
case 'T':
return 4;
}
}
};
// Others' code from https://leetcode.com/discuss/44689/10-lines-c-code-8-ms-passed
// Carefully picked hashmap size, repr for each char and rolling step.
// Runtime: 8ms.
// A potential problem is if a sequence happens more than 256 times. Fix is easy.
vector<string> findRepeatedDnaSequences(string s) {
char hashMap[1048576] = {0};
vector<string> ans;
int len = s.size(),hashNum = 0;
if (len < 11) return ans;
for (int i = 0;i < 9;++i)
hashNum = hashNum << 2 | (s[i] - 'A' + 1) % 5;
for (int i = 9;i < len;++i)
if (hashMap[hashNum = (hashNum << 2 | (s[i] - 'A' + 1) % 5) & 0xfffff]++ == 1)
ans.push_back(s.substr(i-9,10));
return ans;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment