Created
July 10, 2019 20:14
-
-
Save harpiechoise/2dd79768f2c0fab52300e7263f3b2305 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd # Importamos pandas bajo el pseudonimo | |
# Rango de fechas 8 dias | |
fechas = pd.date_range('27/2/2019', periods=8) | |
# Creo un dataframe | |
df = pd.DataFrame(np.random.random((8, 4)), index=fechas, | |
columns=['A', 'B', 'C', 'D']) | |
# Extraigo una Serie del dataframe | |
s = df['A'] | |
# Selecciono una de las fechas de mi lista | |
# Que defini como indice | |
print(s[fechas[1]]) | |
# 0.9948947196784879 | |
# Si queremos intercambiar columnas | |
df[['B', 'A']] = df[['A', 'B']] # No se debe hacer aunque funcione | |
# Forma correcta | |
df.loc[:, ['B', 'A']] = df[['A', 'B']].to_numpy() | |
print(df[['A', 'B']]) | |
# A B | |
# 2019-02-27 0.319319 0.671448 | |
# 2019-02-28 0.405093 0.960463 | |
# 2019-03-01 0.542734 0.261184 | |
# 2019-03-02 0.172890 0.700732 | |
# 2019-03-03 0.665918 0.585571 | |
# 2019-03-04 0.459141 0.068089 | |
# 2019-03-05 0.447317 0.493405 | |
# 2019-03-06 0.661500 0.877922 | |
# Anotacion Location Choices | |
# Series | |
sa = pd.Series([1, 2, 3], index=list('abc')) | |
# Seleccionar Indices Rapido | |
print(sa.b) | |
# 2 | |
# Seleccionar columnas de un dataset rapido | |
print(df.A) | |
# 2019-02-27 0.319319 | |
# 2019-02-28 0.405093 | |
# 2019-03-01 0.542734 | |
# 2019-03-02 0.172890 | |
# 2019-03-03 0.665918 | |
# 2019-03-04 0.459141 | |
# 2019-03-05 0.447317 | |
# 2019-03-06 0.661500 | |
# Freq: D, Name: A, dtype: float64 | |
# Reasignar un indice | |
sa.a = 5 | |
print(sa) | |
# a 5 | |
# b 2 | |
# c 3 | |
# dtype: int64 | |
# Solo si existe | |
df.A = list(range(len(df))) | |
print(df) | |
# A B C D | |
# 2019-02-27 0 0.671448 0.161066 0.059666 | |
# 2019-02-28 1 0.960463 0.822715 0.814657 | |
# 2019-03-01 2 0.261184 0.885155 0.660747 | |
# 2019-03-02 3 0.700732 0.935873 0.901624 | |
# 2019-03-03 4 0.585571 0.983047 0.062070 | |
# 2019-03-04 5 0.068089 0.737237 0.233875 | |
# 2019-03-05 6 0.493405 0.226257 0.809233 | |
# 2019-03-06 7 0.877922 0.786466 0.182065 | |
# Si no existe | |
df.X = list(range(len(df))) # Lanza Error | |
# Pandas doesn't allow columns to be created via a new attribute name | |
df['X'] = list(range(len(df))) # Forma correcta | |
df | |
# Asignar una fila | |
df.iloc[1] = [1, 2, 3, 4] | |
print(df) | |
# A B C D | |
# 2019-02-27 0 0.671448 0.161066 0.0596664 | |
# 2019-02-28 1 2 3 4 | |
# 2019-03-01 2 0.261184 0.885155 0.660747 | |
# 2019-03-02 3 0.700732 0.935873 0.901624 | |
# 2019-03-03 4 0.585571 0.983047 0.0620701 | |
# 2019-03-04 5 0.0680889 0.737237 0.233875 | |
# 2019-03-05 6 0.493405 0.226257 0.809233 | |
# 2019-03-06 7 0.877922 0.786466 0.182065 | |
# Slices | |
# Series | |
s[:2] # Primeros 2 Elementos | |
# 2019-02-27 0.686824 | |
# 2019-02-28 0.868448 | |
# Freq: D, Name: A, dtype: float64 | |
s[::2] # Todos los elementos con paso 2 | |
# 2019-02-27 0.686824 | |
# 2019-03-01 0.199276 | |
# 2019-03-03 0.805328 | |
# 2019-03-05 0.006065 | |
# Freq: 2D, Name: A, dtype: float64 | |
s[::-1] # Reversa de indices | |
# 2019-03-06 0.764515 | |
# 2019-03-05 0.006065 | |
# 2019-03-04 0.120355 | |
# 2019-03-03 0.805328 | |
# 2019-03-02 0.966972 | |
# 2019-03-01 0.199276 | |
# 2019-02-28 0.868448 | |
# 2019-02-27 0.686824 | |
# Freq: -1D, Name: A, dtype: float64 | |
s[5::-1] # ultimos 6 elementos al revez | |
# 2019-03-04 0.120355 | |
# 2019-03-03 0.805328 | |
# 2019-03-02 0.966972 | |
# 2019-03-01 0.199276 | |
# 2019-02-28 0.868448 | |
# 2019-02-27 0.686824 | |
# Freq: -1D, Name: A, dtype: float64 | |
s2 = s.copy() # Pasa lo mismo que numpy con la memoria | |
s2[:5] = 0 | |
print(s2) | |
# 2019-02-27 0.000000 | |
# 2019-02-28 0.000000 | |
# 2019-03-01 0.000000 | |
# 2019-03-02 0.000000 | |
# 2019-03-03 0.000000 | |
# 2019-03-04 0.120355 | |
# 2019-03-05 0.006065 | |
# 2019-03-06 0.764515 | |
# Freq: D, Name: A, dtype: float64 | |
# Dataframe | |
df[:3] # Primeros 3 elementos | |
# A B C D | |
# 2019-02-27 0 0.671448 0.161066 0.0596664 | |
# 2019-02-28 1 2 3 4 | |
# 2019-03-01 2 0.261184 0.885155 0.660747 | |
df[2::-1] # Ultimos 3 elementos al revez | |
# A B C D | |
# 2019-03-01 2 0.261184 0.885155 0.660747 | |
# 2019-02-28 1 2 3 4 | |
# 2019-02-27 0 0.671448 0.161066 0.0596664- | |
# Advertencia | |
df.loc[2:3] # No se pueden usar Slice con loc | |
# TypeError: cannot do slice indexing on | |
# Anot3 | |
s1 = pd.Series(np.random.randn(6), index=list('abcdef')) | |
print(s1[3:]) | |
# d -0.047107 | |
# e -0.180345 | |
# f 0.334778 | |
# dtype: float64 | |
s1.loc['d':] | |
# d -0.047107 | |
# e -0.180345 | |
# f 0.334778 | |
# dtype: float64 | |
s1.loc['b'] | |
# 0.8256215434880477 | |
# Seleccionar filas del dataframe | |
df.index = list('abcedfgh') # Reasignar indice | |
df.loc[['a', 'e', 'f'], :] # Todas las columnas de los indices a e f | |
# A B C D | |
# a 0 0.671448 0.161066 0.0596664 | |
# e 3 0.700732 0.935873 0.901624 | |
# f 5 0.0680889 0.737237 0.233875 | |
df.loc['a':'c', 'A':'B'] # Filas: a hasta c, columnas: 'A' hasta 'B' | |
# A B | |
# a 0 0.671448 | |
# b 1 2 | |
# c 2 0.261184 | |
df.loc['a'] | |
# A 0 | |
# B 0.671448 | |
# C 0.161066 | |
# D 0.0596664 | |
# Name: a, dtype: object | |
# Arrays de booleanos | |
df.loc['a'] > 0.1 | |
# Selecciona todas las columnas donde a sea mayor a 0.1 | |
# A False | |
# B True | |
# C True | |
# D False | |
# Name: a, dtype: bool | |
df.loc[:, df.loc['a'] > 0.5] # Queries | |
# a 0.671448 | |
# b 2 | |
df.loc['a', 'A'] # Indice a de la columna A | |
# 0 | |
# Con indices | |
s1 = pd.Series(np.random.randint(5), index=list(range(0, 10, 2))) | |
print(s1) | |
# 0 2 | |
# 2 2 | |
# 4 2 | |
# 6 2 | |
# 8 2 | |
# dtype: int64 | |
s1.iloc[:3] # Primeros 3 elementos de una serie | |
# 0 2 | |
# 2 2 | |
# 4 2 | |
# dtype: int64 | |
df.iloc[:3] # Primeros 3 elementos de un dataframe | |
# A B C D | |
# a 0 0.671448 0.161066 0.0596664 | |
# b 1 2 3 4 | |
# c 2 0.261184 0.885155 0.660747 | |
df.iloc[:3, :2] # Primeras 3 filas y primeras 2 columnas | |
# A B | |
# a 0 0.671448 | |
# b 1 2 | |
# c 2 0.261184 | |
df.iloc[[1, 4, 5], [1, 2]] # Segundo, Quito, Sexto elemento | |
# De segunda y tercera columna | |
# B C | |
# b 2 3 | |
# d 0.585571 0.983047 | |
# f 0.0680889 0.737237 | |
df.iloc[1:3, :] # Todas las columnas del segundo al cuarto indice | |
# A B C D | |
# b 1 2 3 4 | |
# c 2 0.261184 0.885155 0.660747 | |
df.head().iloc[:, 1:3] # Todos los indices | |
# De la segunda a cuarta columna | |
# B C | |
# a 0.671448 0.161066 | |
# b 2 3 | |
# c 0.261184 0.885155 | |
# e 0.700732 0.935873 | |
# d 0.585571 0.983047 | |
# Querys | |
# SQL | |
# SELECT FROM df WHERE B>1 | |
df.loc[lambda df: df.B > 1, :] | |
# A B C D | |
# b 1 2 3 4 | |
# SQL | |
# SELECT A, B FROM df | |
df.loc[:, lambda df: ['A', 'B']] | |
# A B | |
# a 0 0.671448 | |
# b 1 2 | |
# c 2 0.261184 | |
# e 3 0.700732 | |
# d 4 0.585571 | |
# f 5 0.0680889 | |
# g 6 0.493405 | |
# h 7 0.877922 | |
# SELECT A, B FROM df | |
df.iloc[:, lambda df: [0, 1]] | |
# A B | |
# a 0 0.671448 | |
# b 1 2 | |
# c 2 0.261184 | |
# e 3 0.700732 | |
# d 4 0.585571 | |
# f 5 0.0680889 | |
# g 6 0.493405 | |
# h 7 0.877922 | |
# SELECT A FROM df | |
df[lambda df: df.columns[0]] | |
# a 0 | |
# b 1 | |
# c 2 | |
# e 3 | |
# d 4 | |
# f 5 | |
# g 6 | |
# h 7 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment