Skip to content

Instantly share code, notes, and snippets.

@joao-parana
Created February 5, 2023 17:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joao-parana/92dec48a6ccb10bb4283e5f411cd48fa to your computer and use it in GitHub Desktop.
Save joao-parana/92dec48a6ccb10bb4283e5f411cd48fa to your computer and use it in GitHub Desktop.
How to Iterate over rows in pandas, and why you shouldn't
# %%
import httpx
import pandas as pd
# %% Read CSV and rename headers
websites = pd.read_csv("resources/popular_websites.csv", index_col=0)
print(websites)
# %% Define function to check connection
def check_connection(name, url):
try:
response = httpx.get(url)
location = response.headers.get("location")
if location is None or location.startswith(url):
print(f"{name} is online!")
else:
print(f"{name} is online! But redirects to {location}")
return True
except httpx.ConnectError:
print(f"Failed to establish a connection with {url}")
return False
# %% Use .itertuples() to iterate through all rows
for website in websites.itertuples():
check_connection(website.name, website.url)
# %% You may use .iterrows() if you have dynamic columnnames
name_column = "name"
url_column = "url"
for _, website in websites.iterrows():
check_connection(website[name_column], website[url_column])
# %% Use list comprehension to iterate through all rows
# Note that this creates a list that is thrown away again
[
check_connection(website.name, website.url)
for website in websites.itertuples()
]
# %% Use the index to iterate through rows
for i in websites.index:
print({**websites.iloc[i]})
# %% Transpose and cast to dictionary to iterate through rows
for website in websites.T.to_dict().values():
print(website)
# %% Use .agg() to aggregate over columns
websites.agg(
total_views=("total_views", "sum"),
average_views=("total_views", "mean"),
)
@joao-parana
Copy link
Author

#!/usr/bin/env python3

# %%
import httpx
import pandas as pd

# %% Define function to check connection
def check_connection(name, url) -> bool :
    try:
        response = httpx.get(url)
        location = response.headers.get("location")
        if location is None or location.startswith(url):
            print(f"{name} is online!")
        else:
            print(f"{name} is online! But redirects to {location}")
        return True
    except httpx.ConnectError:
        print(f"Failed to establish a connection with {url}")
        return False

def my_main():
    print('\n**** Read CSV')
    # %% Read CSV and rename headers
    websites = pd.read_csv("resources/popular_websites.csv", index_col=0)
    print(websites)
    # %% Use .itertuples() to iterate through all rows
    print('\n**** Use .itertuples() method to iterate through all rows')
    for website in websites.itertuples():
        check_connection(website.name, website.url)

    # %% You may use .iterrows() if you have dynamic columnnames
    print('\n**** You may use .iterrows() if you have dynamic columnnames')
    name_column = "name"
    url_column = "url"
    for _, website in websites.iterrows():
        check_connection(website[name_column], website[url_column])

    print('\n**** Use list comprehension to iterate through all rows')
    # %% Use list comprehension to iterate through all rows
    #    Note that this creates a list that is thrown away again
    _ = [
        check_connection(website.name, website.url)
        for website in websites.itertuples()
    ]

    print('\n**** Use the index to iterate through rows')
    # %% Use the index to iterate through rows
    for i in websites.index:
        print({**websites.iloc[i]})

    print('\n**** Transpose and cast to dictionary to iterate through rows')
    # %% Transpose and cast to dictionary to iterate through rows
    for website in websites.T.to_dict().values():
        print(website)

    print('\n**** Use .agg() to aggregate over columns')
    # %% Use .agg() to aggregate over columns
    websites.agg(
        total_views=("total_views", "sum"),
        average_views=("total_views", "mean"),
    )
    print(websites)

if __name__ == "__main__":
    print('\n**** Begin test')
    my_main()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment