Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
- Applying `pd.to_datetime()` to a column using `mutate` seems to fail
import pandas as pd
from siuba import *
my_data = {
'name': ["Abigail Adams"],
'birth': ["1744-11-22"],
'death': ["1818-10-28"]
}
df = pd.DataFrame(my_data)
# This pipeline will raise an error:
# TypeError: Symbolic objects can not be converted to True/False, or used with these keywords: not, and, or.
(
df
>> mutate(birth_dt = pd.to_datetime(_.birth))
)
import pandas as pd
from siuba import filter
from siuba import _
my_data = pd.DataFrame({
'dates': ["1776-07-04", pd.NA]
})
# The pipeline below gives "TypeError: Cannot perform 'rand_' with a dtyped [object] array and scalar of type [bool]"
(
my_data
>> filter(_.dates)
)
from plotnine.data import mpg
from siuba import rename
# This fails with an "invalid syntax" error
model = (
mpg
>> rename(car_class = _.class)
)
import pandas as pd
from siuba import *
my_data = pd.DataFrame({
"pop": [1, 2, 3]
})
(
my_data
>> mutate(pop_doubled = _.pop * 2)
) # TypeError: unsupported operand type(s) for *: 'method' and 'int'
import statsmodels.formula.api as smf
from plotnine.data import mpg
# You'll get a syntax error, and I think it's because `class`
# is a reserved keyword in Python, even though it's also
# the name of a variable in the dataset.
model = smf.ols("cty ~ hwy + class", data=mpg)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment