Last active
April 22, 2023 12:30
-
-
Save gunavaran/ddf30f4a8daa97892ec294b9f454c031 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define C_SHIFT 1 //01 | |
#define G_SHIFT 2 //10 | |
#define T_SHIFT 3 //11 | |
/** | |
* | |
* @param readString: The DNA sequence to be encoded | |
* @param stringLength : The number of characters in the DNA sequence | |
* @return uint8_t type array of bits | |
*/ | |
uint8_t *bitEncode(char *readString, int stringLength) { | |
uint8_t *binaryFormat; | |
int requiredLength; //length of the array to be created | |
if (stringLength % 4 == 0) { | |
requiredLength = stringLength / 4; // stringLength * 2 / 8 to be precise | |
} else { | |
requiredLength = stringLength / 4 + 1; | |
} | |
binaryFormat = calloc(sizeof(char), requiredLength); //calloc ensures bits are set to 0 | |
for (int i = 0; i < stringLength; i++) { | |
switch (readString[i]) { | |
case 'A': | |
//since A is encoded to 00 and we have used calloc, nothign to be done | |
break; | |
case 'C': | |
binaryFormat[i / 4] |= C_SHIFT << (6 - (i % 4) * 2); | |
break; | |
case 'G': | |
binaryFormat[i / 4] |= G_SHIFT << (6 - (i % 4) * 2); | |
break; | |
case 'T': | |
binaryFormat[i / 4] |= T_SHIFT << (6 - (i % 4) * 2); | |
break; | |
default: | |
printf("Invalid Alphabet \n"); | |
return binaryFormat; | |
} | |
} | |
return binaryFormat; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment