Certification Exam Guide
- future advances in data technology
- changes to business requirements
- awareness of current state and how to migrate the design to a future state
- data modeling
- tradeoffs
- distributed systems
import numpy as np | |
import pandas as pd | |
from scipy.sparse import csr_matrix, linalg | |
data="""h_tract w_tract total | |
01001020100 01001020200 54 | |
01001020400 01001020500 79 | |
01001020200 01001020400 11 | |
01001020100 01001020300 3 | |
01001020400 01001020100 25 |
Certification Exam Guide
1 1 44 | |
1 2 100 | |
1 3 7 | |
1 4 59 | |
1 5 58 | |
1 6 58 | |
1 7 94 | |
1 8 70 | |
1 9 95 | |
1 10 3 |
from mrjob.job import MRJob, MRStep | |
import os | |
import sys | |
class MRMatrixMultiply(MRJob): | |
def steps(self): | |
return [MRStep(mapper=self.mapper_1, reducer=self.reducer_1)] | |
def mapper_1(self, _, line): |
SELECT | |
'Regi', | |
TRUNC(A.createtimestamp) AS dt, | |
COUNT(*) AS db_regi, | |
COUNT(CASE WHEN C.user_id IS NOT NULL THEN 1 ELSE NULL END) AS segment_regi, | |
CAST(100 * COUNT(CASE WHEN C.user_id IS NOT NULL THEN 1 ELSE NULL END)/COUNT(*) AS VARCHAR) + '%' AS perc_missing | |
FROM db_userlogininfo A | |
INNER JOIN db_user B ON B.userid = A.userid | |
LEFT OUTER JOIN vroom.registration C ON C.user_id = B.uniqueid | |
WHERE |