Skip to content

Instantly share code, notes, and snippets.

@noelyahan
Last active March 4, 2024 20:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save noelyahan/5f86d4af8df3979640332e77952d25d7 to your computer and use it in GitHub Desktop.
Save noelyahan/5f86d4af8df3979640332e77952d25d7 to your computer and use it in GitHub Desktop.
const fs = require('fs');
const readline = require('readline');
const axios = require('axios');
const createCsvWriter = require('csv-writer').createObjectCsvWriter;
const regex = /github\.com\/([^\/]+)\/([^\/]+)$/;
const token = 'xxx';
// Define the header and data for the CSV file
const csvHeader = [
{ id: 'repo', title: 'Repo' },
{ id: 'forks', title: 'Forks' },
{ id: 'stars', title: 'Stars' }
];
// Create a CSV writer object
const csvWriter = createCsvWriter({
path: 'output.csv', // File path where the CSV will be saved
header: csvHeader
});
const axiosConfig = {
headers: {
Authorization: `token ${token}`
}
};
async function getGithubMetaData(owner, repo) {
let url = `https://api.github.com/repos/${owner}/${repo}`;
try {
let response = await axios.get(url, axiosConfig);
const { forks_count, stargazers_count } = response.data;
console.log("completed: ", owner, repo)
return { forks: forks_count, stars: stargazers_count };
} catch (error) {
console.error('Error fetching data:', error);
}
}
// Function to extract links from the README content
function extractLinksFromReadme(readmeContent) {
const urlPattern = /(https?|ftp):\/\/[^\s/$.?#].[^\s]*\b/g;
const links = readmeContent.match(urlPattern);
return links || [];
}
// Read the README file and extract links
function extractLinksFromReadmeFile(filePath) {
return new Promise((resolve, reject) => {
const readStream = fs.createReadStream(filePath);
const rl = readline.createInterface({
input: readStream,
output: process.stdout,
terminal: false
});
let readmeContent = '';
rl.on('line', (line) => {
readmeContent += line;
});
rl.on('close', () => {
const links = extractLinksFromReadme(readmeContent);
resolve(links);
});
rl.on('error', (err) => {
reject(err);
});
});
}
// Main function
async function main(readmeFilePath) {
try {
const links = await extractLinksFromReadmeFile(readmeFilePath);
let repos = links.filter(link => link.includes(`https://github.com/`)).map((link) => {
const match = link.match(regex);
if (match && match.length === 3) {
const owner = match[1];
const repo = match[2];
return { owner, repo };
}
});
let csvData = [];
for (let i = 0; i < repos.length; i++) {
if (!repos[i]) {
continue;
}
let owner = repos[i].owner;
let repo = repos[i].repo;
let meta = await getGithubMetaData(owner, repo);
if (!meta) {
continue;
}
csvData.push({ repo: `https://github.com/${owner}/${repo}`, forks: meta.forks, stars: meta.stars });
}
csvWriter
.writeRecords(csvData)
.then(() => console.log('CSV file created successfully'))
.catch(err => console.error('Error creating CSV file:', err));
} catch (error) {
console.error('Error reading the README file:', error);
}
}
// Call the main function with the provided README file path
const readmeFilePath = process.argv[2];
main(readmeFilePath);
@noelyahan
Copy link
Author

node github_repo_stats_extractor.js "/path_to/awesome-go/README.md"

@noelyahan
Copy link
Author

{
  "name": "awesome-go",
  "version": "1.0.0",
  "description": "<a href=\"https://awesome-go.com/\"><img align=\"right\" src=\"https://github.com/avelino/awesome-go/raw/main/tmpl/assets/logo.png\" alt=\"awesome-go\" title=\"awesome-go\" /></a>",
  "main": "app.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "",
  "license": "ISC",
  "dependencies": {
    "axios": "^1.6.7",
    "csv-writer": "^1.6.0",
    "readline": "^1.3.0"
  }
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment