Skip to content

Instantly share code, notes, and snippets.

@al102964
Last active August 25, 2020 22:48
Show Gist options
  • Save al102964/cf2d0e71cebe70aba58803c70b982717 to your computer and use it in GitHub Desktop.
Save al102964/cf2d0e71cebe70aba58803c70b982717 to your computer and use it in GitHub Desktop.
from pyspark.sql import Window
import pyspark.sql.functions as f
w = Window.orderBy("start")
consumo = df.select("central","id_dispositivo","valor","timestamp","tarifa")\
.filter("id_dispositivo = 'eta'")\
.groupBy(f.window("timestamp", "60 minutes").alias("intervalo"),"tarifa")\
.agg(f.max("valor").alias("consumo_max_hora"))\
.select("intervalo.start","tarifa","consumo_max_hora")\
.sort(f.desc("intervalo.start"))\
.withColumn("lag", f.lag("consumo_max_hora").over(w))\
.withColumn("consumo",f.col("consumo_max_hora")-f.col("lag"))\
.select("start","tarifa","consumo")\
.sort(f.desc("start"))
display(consumo)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment