Last active
April 5, 2020 14:01
-
-
Save gireeshkbogu/bc919121e955cc439e444de8a3dce5d7 to your computer and use it in GitHub Desktop.
Simulate Omics style data for Machine Learning programs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Simulate Omics style data for ML | |
import uuid | |
import itertools | |
features = 700 | |
subjects = 40 | |
columns_list = [] | |
for _ in itertools.repeat(1, features): | |
columns_list.append(uuid.uuid4().hex.upper()[0:9]) | |
rows_list = [] | |
for _ in itertools.repeat(1, subjects): | |
rows_list.append(uuid.uuid4().hex.upper()[0:6]) | |
df = pd.DataFrame(np.random.randint(0,999999,size=(subjects, features)), | |
columns=columns_list) | |
df['Subject_ID'] = rows_list | |
df.loc[ : , 'Subject_ID'] = rows_list | |
df.insert(0,'Subject_ID', df.pop("Subject_ID")) # pop the "rownames" to 1st column | |
df.head() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment