miguelvb/python_codes_from_GTP3.md

## python_codes_from_GTP3.md

      
    Raw
  

              python_codes_from_GTP3.md
            
          
    python codes from GTP3

done with text-davinci-002,
https://beta.openai.com/playground/p/default-openai-api?model=text-davinci-002
histogram

"""
Create a python function calculates  the histogram of values  of a column of a dataframe  and show the  plot using  loglog axis  via the mathplotlib library. 
arguments: "df" : dataframe; "col": column
x-axis label is  argument "col"
y-axis label is "Counts"
"""

import pandas as pd
import matplotlib.pyplot as plt

def get_loglog_hist(df,col, *args, **kwargs):
    plt.loglog(df[col].value_counts().sort_index(), marker='o', linestyle='none')
    plt.xlabel(col)
    plt.ylabel("Counts")
    plt.show()

get_loglog_hist(nodesG, 'degree')
iris training set

prompt :

"""
# Load iris data from datasets and plot the training data.
"""
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# load iris data from sklearn.datasets
from sklearn import datasets
iris = datasets.load_iris()

# convert to dataframe
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target

# convert to dataframe. species column is not present in iris_df, so we have to create it.
iris_df['species'] = iris_df['target'].map({0: iris.target_names[0], 1:iris.target_names[1], 2:iris.target_names[2]})

# iris_df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']

# plot the data
sns.set_style('whitegrid')
sns.pairplot(iris_df, hue='species', height=3.5)
plt.show()
subset igraph:

# python code
# igraph module
# make a subgraph of graph 'G' of vertex that have the top 30 values of attribute 'size':

# create a list of the top 30 values of attribute 'size'
top_30_size = sorted(G.vs['size'], reverse=True)[:30]

# create a list of the vertex that have the top 30 values of attribute 'size'
top_30_size_vertex = [v for v in G.vs if v['size'] in top_30_size]

# create a subgraph of graph 'G' of vertex that have the top 30 values of attribute 'size'
G_top_30_size = G.subgraph(top_30_size_vertex)

# plot the subgraph
plot(G_top_30_size, bbox=(0,0,500,500), margin=20)

# show the plot
show()

# close the plot
close()

# print the number of vertex of the subgraph
print(len(G_top_30_size.vs))

# print the number of edges of the subgraph
print(len(G_top_30_size.es))

# print the number of components of the subgraph
print(len(G_top_30_size.components()))

# print the number of components of the subgraph
print(len(G_top_30_size.components()