DiogenesAnalytics/housing_median_data_2024.csv

## housing_median_data_2024.csv

          
            State rank
            State or territory
            Median home price in US$

            
              1
              Hawaii
              839013

            
              2
              California
              765197

            
              —
              District of Columbia
              610548

            
              3
              Massachusetts
              596410

            
              4
              Washington
              575894

            
              5
              Colorado
              539151

            
              6
              Utah
              509433

            
              7
              New Jersey
              503432

            
              8
              Oregon
              487244

            
              9
              New Hampshire
              454948

            
              10
              New York
              453138

            
              11
              Montana
              448238

            
              12
              Idaho
              443500

            
              13
              Rhode Island
              438711

            
              14
              Arizona
              426680

            
              15
              Nevada
              426267

            
              16
              Maryland
              406843

            
              17
              Florida
              392306

            
              18
              Connecticut
              384244

            
              19
              Maine
              382580

            
              20
              Virginia
              377699

            
              21
              Delaware
              374252

            
              22
              Vermont
              373001

            
              23
              Alaska
              349502

            
              —
              United States
              347716

            
              24
              Wyoming
              334782

            
              25
              Minnesota
              323034

            
              26
              North Carolina
              322527

            
              27
              Georgia
              321821

            
              28
              Tennessee
              311531

            
              29
              Texas
              298624

            
              30
              South Dakota
              292551

            
              31
              New Mexico
              292280

            
              32
              South Carolina
              287882

            
              33
              Wisconsin
              286394

            
              34
              Pennsylvania
              255570

            
              35
              Nebraska
              251315

            
              36
              Illinois
              251267

            
              37
              North Dakota
              248022

            
              38
              Missouri
              238125

            
              39
              Michigan
              232511

            
              40
              Indiana
              231533

            
              41
              Alabama
              221490

            
              42
              Ohio
              217698

            
              43
              Kansas
              217315

            
              44
              Iowa
              208755

            
              45
              Oklahoma
              199378

            
              46
              Arkansas
              198838

            
              47
              Kentucky
              196550

            
              48
              Louisiana
              194308

            
              49
              Mississippi
              171613

            
              50
              West Virginia
              155491

## scrape_us_housing_data.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "zbpNWGif3una"
   },
   "source": [
    "# Scraping Wikipedia US Housing Price Data\n",
    "This notebook simply pulls the [US Median House Price Data](https://en.wikipedia.org/wiki/List_of_U.S._states_by_median_home_price) data down from *Wikipedia* and saves it to a *CSV* file."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "nLqshbIu4ZUx"
   },
   "source": [
    "## Setup\n",
    "First we need to the *scraping libraries* ..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "950N0MZz1BXK"
   },
   "outputs": [],
   "source": [
    "# get necessary scraping tools\n",
    "!pip install beautifulsoup4 requests"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "rXa1NZY54i88"
   },
   "source": [
    "## Scraping\n",
    "Now we can scrape the *Wikipedia* page and save the data into the *CSV* file `country_hdi.csv`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "MIqrdHP31JOQ"
   },
   "outputs": [],
   "source": [
    "import re\n",
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import csv\n",
    "\n",
    "# URL of the Wikipedia page containing the table\n",
    "url = \"https://en.wikipedia.org/wiki/List_of_U.S._states_by_median_home_price\"\n",
    "\n",
    "# send a GET request to the URL\n",
    "response = requests.get(url)\n",
    "\n",
    "# parse the HTML content\n",
    "soup = BeautifulSoup(response.text, 'html.parser')\n",
    "\n",
    "# find the table containing the data\n",
    "table = soup.find('table', class_='wikitable')\n",
    "\n",
    "# initialize a list to store the table data\n",
    "data = []\n",
    "\n",
    "# iterate over the rows of the table\n",
    "for row in table.find_all('tr'):\n",
    "    # initialize a list to store the data for each row\n",
    "    row_data = []\n",
    "    \n",
    "    # iterate over the cells (columns) in the row\n",
    "    for cell in row.find_all(['td', 'th']):\n",
    "        # append the cell content to the row data list\n",
    "        row_data.append(re.sub(r'[,\\n\\xa0]', '', cell.get_text().lstrip(\"$\")))\n",
    "\n",
    "     # check if the row contains any non-empty cells\n",
    "    if any(row_data):            \n",
    "        # append the row data to the main data list\n",
    "        data.append(row_data)\n",
    "\n",
    "# define the path to save the CSV file\n",
    "csv_file = \"housing_median_data.csv\"\n",
    "\n",
    "# write the data to a CSV file\n",
    "with open(csv_file, 'w', newline='') as file:\n",
    "    writer = csv.writer(file)\n",
    "    writer.writerows(data)\n",
    "\n",
    "# notify when done\n",
    "print(f\"Data has been scraped and saved to: {csv_file}\")"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
State rank	State or territory	Median home price in US$
1	Hawaii	839013
2	California	765197
—	District of Columbia	610548
3	Massachusetts	596410
4	Washington	575894
5	Colorado	539151
6	Utah	509433
7	New Jersey	503432
8	Oregon	487244
9	New Hampshire	454948
10	New York	453138
11	Montana	448238
12	Idaho	443500
13	Rhode Island	438711
14	Arizona	426680
15	Nevada	426267
16	Maryland	406843
17	Florida	392306
18	Connecticut	384244
19	Maine	382580
20	Virginia	377699
21	Delaware	374252
22	Vermont	373001
23	Alaska	349502
—	United States	347716
24	Wyoming	334782
25	Minnesota	323034
26	North Carolina	322527
27	Georgia	321821
28	Tennessee	311531
29	Texas	298624
30	South Dakota	292551
31	New Mexico	292280
32	South Carolina	287882
33	Wisconsin	286394
34	Pennsylvania	255570
35	Nebraska	251315
36	Illinois	251267
37	North Dakota	248022
38	Missouri	238125
39	Michigan	232511
40	Indiana	231533
41	Alabama	221490
42	Ohio	217698
43	Kansas	217315
44	Iowa	208755
45	Oklahoma	199378
46	Arkansas	198838
47	Kentucky	196550
48	Louisiana	194308
49	Mississippi	171613
50	West Virginia	155491
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "zbpNWGif3una"
	},
	"source": [
	"# Scraping Wikipedia US Housing Price Data\n",
	"This notebook simply pulls the [US Median House Price Data](https://en.wikipedia.org/wiki/List_of_U.S._states_by_median_home_price) data down from Wikipedia and saves it to a CSV file."
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "nLqshbIu4ZUx"
	},
	"source": [
	"## Setup\n",
	"First we need to the scraping libraries ..."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "950N0MZz1BXK"
	},
	"outputs": [],
	"source": [
	"# get necessary scraping tools\n",
	"!pip install beautifulsoup4 requests"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "rXa1NZY54i88"
	},
	"source": [
	"## Scraping\n",
	"Now we can scrape the Wikipedia page and save the data into the CSV file `country_hdi.csv`."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "MIqrdHP31JOQ"
	},
	"outputs": [],
	"source": [
	"import re\n",
	"import requests\n",
	"from bs4 import BeautifulSoup\n",
	"import csv\n",
	"\n",
	"# URL of the Wikipedia page containing the table\n",
	"url = \"https://en.wikipedia.org/wiki/List_of_U.S._states_by_median_home_price\"\n",
	"\n",
	"# send a GET request to the URL\n",
	"response = requests.get(url)\n",
	"\n",
	"# parse the HTML content\n",
	"soup = BeautifulSoup(response.text, 'html.parser')\n",
	"\n",
	"# find the table containing the data\n",
	"table = soup.find('table', class_='wikitable')\n",
	"\n",
	"# initialize a list to store the table data\n",
	"data = []\n",
	"\n",
	"# iterate over the rows of the table\n",
	"for row in table.find_all('tr'):\n",
	" # initialize a list to store the data for each row\n",
	" row_data = []\n",
	" \n",
	" # iterate over the cells (columns) in the row\n",
	" for cell in row.find_all(['td', 'th']):\n",
	" # append the cell content to the row data list\n",
	" row_data.append(re.sub(r'[,\\n\\xa0]', '', cell.get_text().lstrip(\"$\")))\n",
	"\n",
	" # check if the row contains any non-empty cells\n",
	" if any(row_data): \n",
	" # append the row data to the main data list\n",
	" data.append(row_data)\n",
	"\n",
	"# define the path to save the CSV file\n",
	"csv_file = \"housing_median_data.csv\"\n",
	"\n",
	"# write the data to a CSV file\n",
	"with open(csv_file, 'w', newline='') as file:\n",
	" writer = csv.writer(file)\n",
	" writer.writerows(data)\n",
	"\n",
	"# notify when done\n",
	"print(f\"Data has been scraped and saved to: {csv_file}\")"
	]
	}
	],
	"metadata": {
	"colab": {
	"provenance": []
	},
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.11.8"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}