Created
April 30, 2023 20:49
-
-
Save carleen/16e77934511c64d103d04a5e1eda3aa9 to your computer and use it in GitHub Desktop.
Query host response time from AirBnB data, and fill any '' and 'N/A' values with the most frequent values
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlite3 | |
# Connect to the SQLite database | |
conn = sqlite3.connect(db_path) | |
# Create a cursor object | |
cur = conn.cursor() | |
# Execute the query | |
query = '''SELECT host_response_time, COUNT(*) AS cnt | |
FROM listings | |
WHERE host_response_time <> '' AND host_response_time <> 'N/A' | |
GROUP BY host_response_time | |
ORDER BY cnt DESC | |
LIMIT 1; | |
''' | |
cur.execute(query) | |
# Fetch the results | |
results = cur.fetchall() | |
most_common = results[0] | |
print(most_common) | |
# Beginning of cell 2 | |
query = '''SELECT id, COALESCE(REPLACE(REPLACE(host_response_time, '', NULL), 'N/A', 'within an hour'), 'within an hour') AS updated_host_response_time | |
FROM listings;''' | |
cur.execute(query) | |
# Fetch the results | |
results = cur.fetchall() | |
# Close the cursor object | |
cur.close() | |
# Close the connection to the database | |
conn.close() | |
r_time = [] | |
for v in results: | |
r_time.append(v[1]) | |
df = pd.DataFrame({'response_time': r_time}) | |
df.response_time.unique() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment