-
-
Save samusz/694c834d11bb2e21578d08babd4f6151 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define C_SHIFT 1 //01 | |
#define G_SHIFT 2 //10 | |
#define T_SHIFT 3 //11 | |
/** | |
* | |
* @param readString: The DNA sequence to be encoded | |
* @param stringLength : The number of characters in the DNA sequence | |
* @return uint8_t type array of bits | |
*/ | |
uint8_t *bitEncode(char *readString, int stringLength) { | |
uint8_t *binaryFormat; | |
int requiredLength; //length of the array to be created | |
if (stringLength % 4 == 0) { | |
requiredLength = stringLength / 4; // stringLength * 2 / 8 to be precise | |
} else { | |
requiredLength = stringLength / 4 + 1; | |
} | |
binaryFormat = calloc(sizeof(char), requiredLength); //calloc ensures bits are set to 0 | |
for (int i = 0; i < stringLength; i++) { | |
switch (readString[i]) { | |
case 'A': | |
//since A is encoded to 00 and we have used calloc, nothign to be done | |
break; | |
case 'C': | |
binaryFormat[i / 4] |= C_SHIFT << (6 - (i % 4) * 2); | |
break; | |
case 'G': | |
binaryFormat[i / 4] |= G_SHIFT << (6 - (i % 4) * 2); | |
break; | |
case 'T': | |
binaryFormat[i / 4] |= T_SHIFT << (6 - (i % 4) * 2); | |
break; | |
default: | |
printf("Invalid Alphabet \n"); | |
return binaryFormat; | |
} | |
} | |
return binaryFormat; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment