Skip to content

Instantly share code, notes, and snippets.

View mepsrajput's full-sized avatar
🎯
Focusing

Pradeep Singh mepsrajput

🎯
Focusing
View GitHub Profile
@mepsrajput
mepsrajput / Dockerfile
Last active May 28, 2022 15:36
Apache Airflow / Cloud Composer
# This basically installs some dependencies, adds two SQL scripts and runs a provided SH script.
FROM apache/airflow:2.0.0-python3.7
USER root
# INSTALL TOOLS
RUN apt-get update \
&& apt-get -y install libaio-dev \
&& apt-get install postgresql-client
RUN mkdir extra
USER airflow
title "Simple proc means";
/* Simple proc means */
PROC MEANS DATA=SASHELP.CARS;
RUN;
title "Select the required variables & drop the labels";
/* Select the variables & drop the labels */
PROC MEANS DATA=SASHELP.CARS nolabels;
var
from IPython.display import display
def multiFreq(dataset, variable_list):
for i in variable_list:
datax = dataset[f'{i}'].value_counts()
datay = pd.DataFrame({
f'{i}': datax.index,
'Frequency': datax.values,
'Percent': ((datax.values/datax.values.sum())*100).round(2),
'Cumulative Frequency': datax.values.cumsum(),
/* freq procedure with multiple variables */
proc freq data=hgrosser;
tables GENRE MOVIE;
run;
datax = data['GENRE'].value_counts(dropna=False)
datay = pd.DataFrame({
'GENRE': datax.index,
'Frequency': datax.values,
'Percent': ((datax.values/datax.values.sum())*100).round(2),
'Cumulative Frequency': datax.values.cumsum(),
'Cumulative Percent': ((datax.values.cumsum()/datax.values.sum())*100).round(2)
})
datay
/* freq procedure with missing */
proc freq data=Gov_C_SAS;
tables GENRE / missing;
run;
@mepsrajput
mepsrajput / simple_freq_procedure.sas
Last active April 14, 2022 14:52
Simple proc freq
/* Import the CSV */
FILENAME Gov_C "/folders/myfolders/Assignments/governors_county.csv";
PROC IMPORT DATAFILE=Gov_C DBMS=CSV OUT=WORK.Gov_C_SAS;
GETNAMES=YES;
RUN;
/* freq procedure */
proc freq data=Gov_C_SAS;
datab = pd.crosstab(data.county, data.state, margins=True, margins_name='Total')
datab
proc freq data=Gov_C_SAS;
tables county*state / norow nocol nopercent;
run;
datax = data['state'].value_counts().sort_index()
datay = pd.DataFrame({
'state': datax.index,
'Frequency': datax.values
})
datay