Skip to content

Instantly share code, notes, and snippets.

@taroyanaka
Created January 20, 2024 01:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save taroyanaka/df7f4ad17ff01e5c6be2a78b0ce6b7b4 to your computer and use it in GitHub Desktop.
Save taroyanaka/df7f4ad17ff01e5c6be2a78b0ce6b7b4 to your computer and use it in GitHub Desktop.
all year billboard_100 ranking data getter with node.js
// 同じディレクトリのほかの.jのファイルを読み込むコード
// const wiki_bil_file = require('./wiki_bil.js');
// wiki_bil_fileの中身をwiki_bilに代入
// const wiki_bil = wiki_bil_file['wiki_bil'];
// console.log(wiki_bil[0]);
const wiki_bil =
[
"https://en.wikipedia.org/wiki/Billboard_year-end_top_singles_of_1946",
"https://en.wikipedia.org/wiki/Billboard_year-end_top_singles_of_1947",
"https://en.wikipedia.org/wiki/Billboard_year-end_top_singles_of_1948",
"https://en.wikipedia.org/wiki/Billboard_year-end_top_30_singles_of_1949",
"https://en.wikipedia.org/wiki/Billboard_year-end_top_30_singles_of_1951",
"https://en.wikipedia.org/wiki/Billboard_year-end_top_30_singles_of_1952",
"https://en.wikipedia.org/wiki/Billboard_year-end_top_30_singles_of_1953",
"https://en.wikipedia.org/wiki/Billboard_year-end_top_30_singles_of_1954",
"https://en.wikipedia.org/wiki/Billboard_year-end_top_30_singles_of_1955",
"https://en.wikipedia.org/wiki/Billboard_year-end_top_50_singles_of_1956",
"https://en.wikipedia.org/wiki/Billboard_year-end_top_50_singles_of_1957",
"https://en.wikipedia.org/wiki/Billboard_year-end_top_50_singles_of_1958",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1959",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1960",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1961",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1962",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1963",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1964",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1965",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1966",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1967",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1968",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1969",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1970",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1971",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1972",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1973",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1974",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1975",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1976",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1977",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1978",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1979",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1980",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1981",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1982",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1983",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1984",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1985",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1986",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1987",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1988",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1989",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1990",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1991",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1992",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1993",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1994",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1995",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1996",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1997",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1998",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1999",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2000",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2001",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2002",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2003",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2004",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2005",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2006",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2007",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2008",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2009",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2010",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2011",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2012",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2013",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2014",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2015",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2016",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2017",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2018",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2019",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2020",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2021",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2022",
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2023",
];
// exportする
// module.exports = { wiki_bil };
const axios = require('axios');
const cheerio = require('cheerio');
const fs = require('fs/promises');
// const url = 'https://en.wikipedia.org/wiki/List_of_Billboard_Year-End_number-one_singles_and_albums';
// const url = wiki_bil[50];
const all_url = wiki_bil;
// スクレイピング関数
async function scrapeData(url) {
try {
const response = await axios.get(url);
const html = response.data;
const $ = cheerio.load(html);
// 表のデータを取得
const tableRows = $('.wikitable tbody tr');
// 結果を格納する配列
const resultArray = [];
// 表の各行を処理
tableRows.each((index, element) => {
const columns = $(element).find('td');
// 列ごとにデータを取得
const Title = columns.eq(1).text().trim().replace(/\[\d+\]/g, '');
const Artist = columns.eq(2).text().trim().replace(/\[\d+\]/g, '');
// const rbAlbum = columns.eq(3).text().trim().replace(/\[\d+\]/g, '');
// const hipHopSingle = columns.eq(4).text().trim().replace(/\[\d+\]/g, '');
// const hipHopAlbum = columns.eq(5).text().trim().replace(/\[\d+\]/g, '');
// const countrySingle = columns.eq(6).text().trim().replace(/\[\d+\]/g, '');
// const countryAlbum = columns.eq(7).text().trim().replace(/\[\d+\]/g, '');
// const year = columns.eq(8).text().trim().replace(/\[\d+\]/g, '');
// ファイル名から何年かを取得(例: 1946、末尾4桁の数字) 文字列ではなく整数にする
const Year_Str = url.match(/\d{4}$/)[0];
const Year = parseInt(Year_Str, 10);
// 一つ目の列はカラムのメタデータ(No. (Rank) Title Artist(s))なのでskip
if (index === 0) return true;
// データを配列に追加
resultArray.push({
year: Year,
// rank: rank,
rank: index,
title: Title,
artist: Artist,
// rbAlbum: rbAlbum,
// hipHopSingle: hipHopSingle,
// hipHopAlbum: hipHopAlbum,
// countrySingle: countrySingle,
// countryAlbum: countryAlbum,
});
});
return resultArray;
} catch (error) {
console.error('Error fetching data:', error);
throw error;
}
}
// メインの処理
async function main() {
console.log('Scraping data from Wikipedia...');
// const data = await scrapeData(url);
// all_urlの中身を一つずつ処理して全部書き込む
let data = [];
for (let i = 0; i < all_url.length; i++) {
const url = all_url[i];
const result = await scrapeData(url);
console.log(url);
data.push(...result);
}
// 単一のURLを処理する場合
// const url = all_url[0];
// const result = await scrapeData(url);
// console.log(url);
// data.push(...result);
// 結果をJSONファイルに保存
// await fs.writeFile('./data2.json', JSON.stringify(data, null, 2));
// const data2 = require('./data2.json');
// data2から以下のような要素を削除する
// {
// "year": 2023,
// "rank": 104,
// "title": "\"Rich Flex\"\"Spin Bout U\"\"Creepin'\"\"Peaches & Eggplants\"",
// "artist": ""
// },
// {
// "year": 2023,
// "rank": 105,
// "title": "",
// "artist": ""
// },
// titleかartistどちらかが空の要素を削除する
// その後、結果でdata3.jsonを作成する
const data3 = data.filter((item) => {
if (item.title === '' || item.artist === '') {
return false;
}
return true;
}
);
// fsを読み込む
fs.writeFile('./billboard_100.json', JSON.stringify(data3, null, 2), (err) => {
if (err) {
console.error('Error writing file:', err);
} else {
console.log('Data successfully written to billboard_100.json');
}
});
// console.log('Data successfully written to data2.json');
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment