Created
July 7, 2020 12:51
-
-
Save ghtmtt/9541ab703dd2465b84a093518b56e889 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
*************************************************************************** | |
* * | |
* This program is free software; you can redistribute it and/or modify * | |
* it under the terms of the GNU General Public License as published by * | |
* the Free Software Foundation; either version 2 of the License, or * | |
* (at your option) any later version. * | |
* * | |
*************************************************************************** | |
""" | |
from qgis.PyQt.QtCore import QCoreApplication, QVariant | |
from qgis.core import (QgsProcessing, | |
QgsFeatureSink, | |
QgsCoordinateReferenceSystem, | |
QgsProcessingException, | |
QgsProcessingAlgorithm, | |
QgsFields, | |
QgsField, | |
QgsFeature, | |
QgsWkbTypes, | |
QgsProcessingParameterDefinition, | |
QgsProcessingParameterString, | |
QgsProcessingParameterField, | |
QgsProcessingParameterFeatureSource, | |
QgsProcessingParameterFeatureSink) | |
from qgis import processing | |
import pandas as pd | |
import numpy as np | |
class QPandasMelt(QgsProcessingAlgorithm): | |
INPUT = 'INPUT' | |
ID_VARS = 'ID_VARS' | |
VALUE_VARS = 'VALUE_VARS' | |
VARIABLE_NAME = 'VARIABLE_NAME' | |
VALUE_NAME = 'VALUE_NAME' | |
OUTPUT = 'OUTPUT' | |
def tr(self, string): | |
return QCoreApplication.translate('Processing', string) | |
def createInstance(self): | |
return QPandasMelt() | |
def name(self): | |
return 'qpandas_melt' | |
def displayName(self): | |
return self.tr('Melt Table (From Wide to Long)') | |
def group(self): | |
return self.tr('Table Transformation') | |
def groupId(self): | |
return 'table' | |
def shortHelpString(self): | |
return self.tr( | |
""" | |
Transforms a wide table to a long table. Choose one field as a category | |
and a set of fields that you want to reshape | |
Choosing "A" as the *id* field and "B" and "C" as the *value* | |
fields, the structure will change from this: | |
A B C | |
a 1 2 | |
b 3 4 | |
c 5 6 | |
to this: | |
A variable value | |
a B 1 | |
b B 3 | |
c B 5 | |
a C 2 | |
b C 4 | |
c C 6 | |
""" | |
) | |
def initAlgorithm(self, config=None): | |
self.addParameter( | |
QgsProcessingParameterFeatureSource( | |
self.INPUT, | |
self.tr('Input layer'), | |
[QgsProcessing.TypeVectorAnyGeometry] | |
) | |
) | |
self.addParameter( | |
QgsProcessingParameterField( | |
self.ID_VARS, | |
self.tr("ID Variable"), | |
parentLayerParameterName=self.INPUT, | |
type=QgsProcessingParameterField.Any, | |
) | |
) | |
self.addParameter( | |
QgsProcessingParameterField( | |
self.VALUE_VARS, | |
self.tr("Value Variable(s) (at least one"), | |
parentLayerParameterName=self.INPUT, | |
type=QgsProcessingParameterField.Any, | |
allowMultiple=True | |
) | |
) | |
variable_name = QgsProcessingParameterString( | |
self.VARIABLE_NAME, | |
self.tr("Name of the variable column"), | |
'Variable' | |
) | |
variable_name.setFlags(variable_name.flags() | QgsProcessingParameterDefinition.FlagAdvanced) | |
self.addParameter(variable_name) | |
value_name = QgsProcessingParameterString( | |
self.VALUE_NAME, | |
self.tr("Name of the value column"), | |
'Value' | |
) | |
value_name.setFlags(value_name.flags() | QgsProcessingParameterDefinition.FlagAdvanced) | |
self.addParameter(value_name) | |
self.addParameter( | |
QgsProcessingParameterFeatureSink( | |
self.OUTPUT, | |
self.tr('Reshaped table') | |
) | |
) | |
def convert_types(self, df): | |
d = {} | |
for col in df.columns: | |
if df[col].dtype == np.int32: | |
d[col] = QVariant.Int | |
elif df[col].dtype == np.float64: | |
d[col] = QVariant.Int | |
elif df[col].dtype == np.float64: | |
d[col] = QVariant.Double | |
elif df[col].dtype == np.float32: | |
d[col] = QVariant.Double | |
else: | |
d[col] = QVariant.String | |
return d | |
def processAlgorithm(self, parameters, context, feedback): | |
source = self.parameterAsSource( | |
parameters, | |
self.INPUT, | |
context | |
) | |
if source is None: | |
raise QgsProcessingException(self.invalidSourceError(parameters, self.INPUT)) | |
id_fields = self.parameterAsFields( | |
parameters, | |
self.ID_VARS, | |
context | |
) | |
value_fields = self.parameterAsFields( | |
parameters, | |
self.VALUE_VARS, | |
context | |
) | |
var_name = self.parameterAsString( | |
parameters, | |
self.VARIABLE_NAME, | |
context | |
) | |
val_name = self.parameterAsString( | |
parameters, | |
self.VALUE_NAME, | |
context | |
) | |
names = [f.name() for f in source.fields()] | |
data = [i.attributes() for i in source.getFeatures()] | |
df = pd.DataFrame(data, columns=names) | |
melted = pd.melt( | |
df, | |
id_vars=id_fields, | |
value_vars=value_fields, | |
var_name=var_name, | |
value_name=val_name | |
) | |
type_converted = self.convert_types(melted) | |
fields = QgsFields() | |
for k, v in type_converted.items(): | |
field = QgsField(k, v) | |
fields.append(field) | |
(sink, dest_id) = self.parameterAsSink( | |
parameters, | |
self.OUTPUT, | |
context, | |
fields, | |
QgsWkbTypes.NoGeometry, | |
QgsCoordinateReferenceSystem() | |
) | |
if sink is None: | |
raise QgsProcessingException(self.invalidSinkError(parameters, self.OUTPUT)) | |
feature_list = [] | |
for row in melted.to_numpy(): | |
f = QgsFeature() | |
f.setAttributes(list(row)) | |
feature_list.append(f) | |
total = 100.0 / len(melted) if len(melted) else 0 | |
for current, feature in enumerate(feature_list): | |
if feedback.isCanceled(): | |
break | |
sink.addFeature(feature, QgsFeatureSink.FastInsert) | |
feedback.setProgress(int(current * total)) | |
return {self.OUTPUT: dest_id} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment