SolClover SolClover

## Imp_pandas.py
import pandas as pd

## Art001_SASst001.sas
data new_ds;
  input Col_A $ Col_B Col_C;
  datalines;
  X 2 3
  Y 5 6
  Z 12 13
  ;
run;

## Art001_Python_001a.py
# Initialise list of lists
my_data = [['X', 2, 3], ['Y', 5, 6], ['Z', 12, 13]]

# Create Pandas DataFrame
df = pd.DataFrame(my_data, columns = ['Col_A', 'Col_B', 'Col_C'])

# Print DataFrame
df

## Art001_Python_001b.py
# Initialise dictionary of lists
my_data = {'Col_A':['X', 'Y', 'Z'], 'Col_B':[2, 5, 12], 'Col_C':[3, 6, 13]}

# Create DataFrame
df = pd.DataFrame(my_data)

# Print DataFrame
df

## Art001_SASst002.sas
proc import datafile="C:\temp\test.csv"
  out=test_dataset
  dbms=csv
  replace;
  getnames=yes;
run;

## Art001_Python_002.py
# Import csv into a new Pandas DataFrame
new_df=pd.read_csv('C:/temp/test.csv')

# You can also specify which columns you want to read in with usecols by either
   # specifying their name or position
# Using column names
new_df=pd.read_csv('C:/temp/test.csv', usecols=['Name','Surname', 'Height', 'Age'])
# Using column positions
new_df=pd.read_csv('C:/temp/test.csv', usecols=[0,1,2,3])

## Art001_SASst003.sas
data new_ds;
  set old_ds;
run;

## Art001_Python_003.py
# Create new DataFrame based on old DataFrame
# Note, in this example you will create a view.
   # Hence, your future changes to new_df will affect old_df too
new_df=old_df

# To make an actual copy of a DataFrame use this.
   # In this case, future changes to new_df will not have an impact on old_df
new_df=old_df.copy()

# Print top 5 records of your new DataFrame

## Art001_SASst004.sas
/* Filter on one condition */
data my_new_ds;
  set my_ds;
where Col_A>=6;
run;

/* Filter on multiple conditions using AND */
data my_ds;
  set my_ds;
where Col_A>=6 and Col_B=5;

## Art001_Python_004.py
# Filter on one condition
new_df=df[df['Col_A']>=6]

# Filter on multiple conditions using AND
new_df=df[(df['Col_A']>=6) & (df['Col_B']==5)]

# Filter on multiple conditions using OR
new_df=df[(df['Col_A']>=6) | (df['Col_B']==1)]

# Create a list and use isin to filter
	data new_ds;
	input Col_A $ Col_B Col_C;
	datalines;
	X 2 3
	Y 5 6
	Z 12 13
	;
	run;
	# Initialise list of lists
	my_data = [['X', 2, 3], ['Y', 5, 6], ['Z', 12, 13]]

	# Create Pandas DataFrame
	df = pd.DataFrame(my_data, columns = ['Col_A', 'Col_B', 'Col_C'])

	# Print DataFrame
	df
	# Initialise dictionary of lists
	my_data = {'Col_A':['X', 'Y', 'Z'], 'Col_B':[2, 5, 12], 'Col_C':[3, 6, 13]}

	# Create DataFrame
	df = pd.DataFrame(my_data)

	# Print DataFrame
	df
	proc import datafile="C:\temp\test.csv"
	out=test_dataset
	dbms=csv
	replace;
	getnames=yes;
	run;
	# Import csv into a new Pandas DataFrame
	new_df=pd.read_csv('C:/temp/test.csv')

	# You can also specify which columns you want to read in with usecols by either
	# specifying their name or position
	# Using column names
	new_df=pd.read_csv('C:/temp/test.csv', usecols=['Name','Surname', 'Height', 'Age'])
	# Using column positions
	new_df=pd.read_csv('C:/temp/test.csv', usecols=[0,1,2,3])
	# Create new DataFrame based on old DataFrame
	# Note, in this example you will create a view.
	# Hence, your future changes to new_df will affect old_df too
	new_df=old_df

	# To make an actual copy of a DataFrame use this.
	# In this case, future changes to new_df will not have an impact on old_df
	new_df=old_df.copy()

	# Print top 5 records of your new DataFrame
	/* Filter on one condition */
	data my_new_ds;
	set my_ds;
	where Col_A>=6;
	run;

	/* Filter on multiple conditions using AND */
	data my_ds;
	set my_ds;
	where Col_A>=6 and Col_B=5;
	# Filter on one condition
	new_df=df[df['Col_A']>=6]

	# Filter on multiple conditions using AND
	new_df=df[(df['Col_A']>=6) & (df['Col_B']==5)]

	# Filter on multiple conditions using OR
	new_df=df[(df['Col_A']>=6) \| (df['Col_B']==1)]

	# Create a list and use isin to filter