Skip to content

Instantly share code, notes, and snippets.

@Yonet
Created January 12, 2023 20:34
Show Gist options
  • Save Yonet/797ca3059294e5c82ebe15682135e965 to your computer and use it in GitHub Desktop.
Save Yonet/797ca3059294e5c82ebe15682135e965 to your computer and use it in GitHub Desktop.
Cleaning the Airbnb data
const { readFile } = require('node:fs/promises');
const { writeFile } = require('node:fs/promises');
const { resolve } = require('node:path');
const deletePropertiesList = ["scrape_id", "source", "last_scraped", "scrape_id", "name", "summary", "space", "description", "experiences_offered", "price", "accomodates", "picture_url", "last_scraped", "source", "host_id", "host_name", "host_since", "host_location", "host_about", "host_response_time", "host_response_rate", "host_acceptance_rate", "host_is_superhost", "host_thumbnail_url", "host_picture_url", "host_neighbourhood", "host_listings_count", "host_total_listings_count", "host_verifications", "host_has_profile_pic", "host_identity_verified", "street", "neighbourhood", "neighbourhood_cleansed", "neighbourhood_group_cleansed", "city", "state", "zipcode", "market", "smart_location", "country_code", "country", "is_location_exact", "room_type", "accommodates", "bathrooms", "bedrooms", "beds", "bed_type", "amenities", "square_feet", "weekly_price", "cleaning_fee", "guests_included", "extra_people", "minimum_nights", "maximum_nights", "calendar_updated", "has_availability", "availability_30", "availability_60", "availability_90", "availability_365", "calendar_last_scraped", "number_of_reviews", "first_review", "review_scores_accuracy", "review_scores_cleanliness", "review_scores_checkin", "review_scores_communication", "review_scores_location", "review_scores_value", "requires_license", "license", "jurisdiction_names", "instant_bookable", "cancellation_policy", "require_guest_profile_picture", "require_guest_phone_verification", "calculated_host_listings_count", "reviews_per_month", "minimum_minimum_nights", "maximum_minimum_nights", "minimum_maximum_nights", "maximum_maximum_nights",
"minimum_nights_avg_ntm",
"maximum_nights_avg_ntm",
"number_of_reviews_ltm",
"number_of_reviews_l30d", "host_url", "calculated_host_listings_count_entire_homes",
"calculated_host_listings_count_private_rooms",
"calculated_host_listings_count_shared_rooms",];
// parse a date in yyyy-mm-dd format
function parseDate(input) {
let parts = input.split('-');
// new Date(year, month [, day [, hours[, minutes[, seconds[, ms]]]]])
return new Date(parts[0], parts[1] - 1, parts[2]); // Note: months are 0-based
}
function createSlug(title) {
return title.toLowerCase()
.replace(/ /g, '-')
.replace(/[^\w-]+/g, '');
}
function deleteProperties(data, deletePropertiesList) {
for (const property of deletePropertiesList)
{
delete data[property];
}
}
async function logFile() {
try
{
const filePath = resolve('./listings.json');
const newFilePath = resolve('./listings01.json');
const contents = JSON.parse(await readFile(filePath, { encoding: 'utf8' }));
console.log(typeof (contents));
contents.map(data => {
const slug = createSlug(data.name);
data.fees = { "rent": data.price };
data.slug = slug;
data.capacity = data.accomodates;
data.createdAt = parseDate(data.host_since);
data.address = {
"id": data.id,
"latitude": data.latitude,
"longitude": data.longitude,
"slug": slug,
"street": data.address || "1234 Main St",
"city": data.city || "San Francisco",
"state": data.state || "CA",
"zipCode": data.zip || "94103",
"country": "US",
"neighbourhood": data.neighbourhood,
"buildingNumber": Math.random() * 10,
"neighbourhood_cleansed": data.neighbourhood_cleansed,
"createdAt": parseDate(data.first_review),
};
data.title = data.name;
data.photos = [data.picture_url, data.picture_url, data.picture_url, data.picture_url];
data.isFeatured = true;
data.isRecommended = false;
data.reviews = {
"id": data.id,
"slug": slug,
"userId": data.host_id,
"rating": data.review_scores_rating,
"listingId": data.id,
"comment": ["Great place to stay", "I would stay here again", "I would not stay here again"],
createdAt: parseDate(data.last_review),
};
deleteProperties(data, deletePropertiesList);
});
const newJson = await writeFile(newFilePath, JSON.stringify(contents), console.log('done'));
// console.log(newJson);
} catch (err)
{
console.error(err.message);
}
}
logFile();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment