Do Quoc Anh aqd14

## notes.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                aqd14
                / notes.md
            
            
              Created
              February 24, 2020 03:35
            
          
    Notes

column 0 = year, column 1 = month, column 2 = day
data = pd.read_csv('./wind.data', sep='\s+', parse_dates=[[0, 1, 2]])

  
## notes.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                aqd14
                / notes.md
            
            
              Created
              February 22, 2020 17:40
            
          
    Notes

df.drop[['unselected_columns'], axis=1]

  
## notes.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                aqd14
                / notes.md
            
            
              Created
              February 22, 2020 04:19
            
          
    Notes

Calculate the ratio of male grouped by occupation
occupation_grouped = users.groupby('occupation')

# Get back a multiindex pandas' Series containing ratio 
# for each gender in each occupation

  
## notes.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                aqd14
                / notes.md
            
            
              Created
              February 22, 2020 03:53
            
          
    Notes

// Assume we have a 'gender' column with only M and F values
// solution 1
users['gender_n'] = [1 if gender == 'M' else 0 for gender in users['gender']]

// solution 2
users['gender_n'] = np.where(users['gender'] == 'M', 1, 0)

  
## notes.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                aqd14
                / notes.md
            
            
              Created
              February 21, 2020 03:13
            
          
    Notes

# rename column by passing tuples
grouped_pct.agg([('foo', 'mean'), ('bar', np.std)])

  
## unpacking
## Problem 1.1
### You have an N-element tuple or sequence that you would like to unpack into a collection of N variables.
```
p = (4, 5)
x, y = p # x = 4, y = 5
```

## Problem 1.2
#### You need to unpack N elements from an iterable, but the iterable may be longer than N elements, causing a “too many values to unpack” exception.

## array_vs_vector
# Defining and initializing vectors

vector<T> v1; // vector that holds object of type T, default constructor v1 is empty

vector<T> v2 (v1); // v2 is a copy of v1

vector<T> v3(n, i); // v2 has n elements with value i

vector<T> v4(n); // v4 has n copies of a value-initialized object

## replace_na_many_columns
college_data_processed <- college_data_processed %>%
    group_by(STATENAME) %>%
    mutate_at(vars(-UNITID:-REGION, -INSTNM), ~ifelse(is.na(.x), mean(.x, na.rm = TRUE), .x))

## difftime
# Notes

grand_slam_ages <- player_dob %>%
    select(name, date_of_birth) %>%
    inner_join(grand_slams, by = "name") %>%
    mutate(age = as.numeric(difftime(tournament_date, date_of_birth, unit = "days") / 365.25))

## group-interaction
grand_slam_ages %>%
    mutate(decade = 10 * (year %/% 10)) %>%
    ggplot(aes(decade, age, fill = gender, group = interaction(decade, gender))) +
    geom_boxplot() +
    scale_x_continuous(breaks = seq(1950, 2021, 10)) +
    expand_limits(x = 2020)
	## Problem 1.1
	### You have an N-element tuple or sequence that you would like to unpack into a collection of N variables.
	```
	p = (4, 5)
	x, y = p # x = 4, y = 5
	```

	## Problem 1.2
	#### You need to unpack N elements from an iterable, but the iterable may be longer than N elements, causing a “too many values to unpack” exception.
	# Defining and initializing vectors

	vector<T> v1; // vector that holds object of type T, default constructor v1 is empty

	vector<T> v2 (v1); // v2 is a copy of v1

	vector<T> v3(n, i); // v2 has n elements with value i

	vector<T> v4(n); // v4 has n copies of a value-initialized object
	college_data_processed <- college_data_processed %>%
	group_by(STATENAME) %>%
	mutate_at(vars(-UNITID:-REGION, -INSTNM), ~ifelse(is.na(.x), mean(.x, na.rm = TRUE), .x))
	# Notes

	grand_slam_ages <- player_dob %>%
	select(name, date_of_birth) %>%
	inner_join(grand_slams, by = "name") %>%
	mutate(age = as.numeric(difftime(tournament_date, date_of_birth, unit = "days") / 365.25))
	grand_slam_ages %>%
	mutate(decade = 10 * (year %/% 10)) %>%
	ggplot(aes(decade, age, fill = gender, group = interaction(decade, gender))) +
	geom_boxplot() +
	scale_x_continuous(breaks = seq(1950, 2021, 10)) +
	expand_limits(x = 2020)