Skip to content

Instantly share code, notes, and snippets.

@carleen
Created April 30, 2023 20:49
Show Gist options
  • Save carleen/16e77934511c64d103d04a5e1eda3aa9 to your computer and use it in GitHub Desktop.
Save carleen/16e77934511c64d103d04a5e1eda3aa9 to your computer and use it in GitHub Desktop.
Query host response time from AirBnB data, and fill any '' and 'N/A' values with the most frequent values
import sqlite3
# Connect to the SQLite database
conn = sqlite3.connect(db_path)
# Create a cursor object
cur = conn.cursor()
# Execute the query
query = '''SELECT host_response_time, COUNT(*) AS cnt
FROM listings
WHERE host_response_time <> '' AND host_response_time <> 'N/A'
GROUP BY host_response_time
ORDER BY cnt DESC
LIMIT 1;
'''
cur.execute(query)
# Fetch the results
results = cur.fetchall()
most_common = results[0]
print(most_common)
# Beginning of cell 2
query = '''SELECT id, COALESCE(REPLACE(REPLACE(host_response_time, '', NULL), 'N/A', 'within an hour'), 'within an hour') AS updated_host_response_time
FROM listings;'''
cur.execute(query)
# Fetch the results
results = cur.fetchall()
# Close the cursor object
cur.close()
# Close the connection to the database
conn.close()
r_time = []
for v in results:
r_time.append(v[1])
df = pd.DataFrame({'response_time': r_time})
df.response_time.unique()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment