Skip to content

Instantly share code, notes, and snippets.

@ghtmtt
Created July 7, 2020 12:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ghtmtt/9541ab703dd2465b84a093518b56e889 to your computer and use it in GitHub Desktop.
Save ghtmtt/9541ab703dd2465b84a093518b56e889 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************
"""
from qgis.PyQt.QtCore import QCoreApplication, QVariant
from qgis.core import (QgsProcessing,
QgsFeatureSink,
QgsCoordinateReferenceSystem,
QgsProcessingException,
QgsProcessingAlgorithm,
QgsFields,
QgsField,
QgsFeature,
QgsWkbTypes,
QgsProcessingParameterDefinition,
QgsProcessingParameterString,
QgsProcessingParameterField,
QgsProcessingParameterFeatureSource,
QgsProcessingParameterFeatureSink)
from qgis import processing
import pandas as pd
import numpy as np
class QPandasMelt(QgsProcessingAlgorithm):
INPUT = 'INPUT'
ID_VARS = 'ID_VARS'
VALUE_VARS = 'VALUE_VARS'
VARIABLE_NAME = 'VARIABLE_NAME'
VALUE_NAME = 'VALUE_NAME'
OUTPUT = 'OUTPUT'
def tr(self, string):
return QCoreApplication.translate('Processing', string)
def createInstance(self):
return QPandasMelt()
def name(self):
return 'qpandas_melt'
def displayName(self):
return self.tr('Melt Table (From Wide to Long)')
def group(self):
return self.tr('Table Transformation')
def groupId(self):
return 'table'
def shortHelpString(self):
return self.tr(
"""
Transforms a wide table to a long table. Choose one field as a category
and a set of fields that you want to reshape
Choosing "A" as the *id* field and "B" and "C" as the *value*
fields, the structure will change from this:
A B C
a 1 2
b 3 4
c 5 6
to this:
A variable value
a B 1
b B 3
c B 5
a C 2
b C 4
c C 6
"""
)
def initAlgorithm(self, config=None):
self.addParameter(
QgsProcessingParameterFeatureSource(
self.INPUT,
self.tr('Input layer'),
[QgsProcessing.TypeVectorAnyGeometry]
)
)
self.addParameter(
QgsProcessingParameterField(
self.ID_VARS,
self.tr("ID Variable"),
parentLayerParameterName=self.INPUT,
type=QgsProcessingParameterField.Any,
)
)
self.addParameter(
QgsProcessingParameterField(
self.VALUE_VARS,
self.tr("Value Variable(s) (at least one"),
parentLayerParameterName=self.INPUT,
type=QgsProcessingParameterField.Any,
allowMultiple=True
)
)
variable_name = QgsProcessingParameterString(
self.VARIABLE_NAME,
self.tr("Name of the variable column"),
'Variable'
)
variable_name.setFlags(variable_name.flags() | QgsProcessingParameterDefinition.FlagAdvanced)
self.addParameter(variable_name)
value_name = QgsProcessingParameterString(
self.VALUE_NAME,
self.tr("Name of the value column"),
'Value'
)
value_name.setFlags(value_name.flags() | QgsProcessingParameterDefinition.FlagAdvanced)
self.addParameter(value_name)
self.addParameter(
QgsProcessingParameterFeatureSink(
self.OUTPUT,
self.tr('Reshaped table')
)
)
def convert_types(self, df):
d = {}
for col in df.columns:
if df[col].dtype == np.int32:
d[col] = QVariant.Int
elif df[col].dtype == np.float64:
d[col] = QVariant.Int
elif df[col].dtype == np.float64:
d[col] = QVariant.Double
elif df[col].dtype == np.float32:
d[col] = QVariant.Double
else:
d[col] = QVariant.String
return d
def processAlgorithm(self, parameters, context, feedback):
source = self.parameterAsSource(
parameters,
self.INPUT,
context
)
if source is None:
raise QgsProcessingException(self.invalidSourceError(parameters, self.INPUT))
id_fields = self.parameterAsFields(
parameters,
self.ID_VARS,
context
)
value_fields = self.parameterAsFields(
parameters,
self.VALUE_VARS,
context
)
var_name = self.parameterAsString(
parameters,
self.VARIABLE_NAME,
context
)
val_name = self.parameterAsString(
parameters,
self.VALUE_NAME,
context
)
names = [f.name() for f in source.fields()]
data = [i.attributes() for i in source.getFeatures()]
df = pd.DataFrame(data, columns=names)
melted = pd.melt(
df,
id_vars=id_fields,
value_vars=value_fields,
var_name=var_name,
value_name=val_name
)
type_converted = self.convert_types(melted)
fields = QgsFields()
for k, v in type_converted.items():
field = QgsField(k, v)
fields.append(field)
(sink, dest_id) = self.parameterAsSink(
parameters,
self.OUTPUT,
context,
fields,
QgsWkbTypes.NoGeometry,
QgsCoordinateReferenceSystem()
)
if sink is None:
raise QgsProcessingException(self.invalidSinkError(parameters, self.OUTPUT))
feature_list = []
for row in melted.to_numpy():
f = QgsFeature()
f.setAttributes(list(row))
feature_list.append(f)
total = 100.0 / len(melted) if len(melted) else 0
for current, feature in enumerate(feature_list):
if feedback.isCanceled():
break
sink.addFeature(feature, QgsFeatureSink.FastInsert)
feedback.setProgress(int(current * total))
return {self.OUTPUT: dest_id}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment