ejfox/csv_to_title.js

## csv_to_title.js
const puppeteer = require('puppeteer');
const fs = require('fs');

(async () => {
  try {
    // Read the CSV file
    const inputCsv = fs.readFileSync('output.csv', 'utf-8');
    const records = inputCsv.split('\n').map((row, index) => {
      if (index === 0) return null; // Skip the header row
      const [Index, Title, URL] = row.split(',');
      const longerURL = URL?.match(/"(.*?)"/)[1]; // Extract the longer URL
      return { Index, Title, URL: longerURL };
    }).filter(record => record !== null);

    // Check if the video has already been scraped
    const scrapedCsvPath = 'updated_youtube_videos.csv';
    let scrapedCsv = '';
    if (fs.existsSync(scrapedCsvPath)) {
      scrapedCsv = fs.readFileSync(scrapedCsvPath, 'utf-8');
    }
    const scrapedRecords = scrapedCsv.split('\n').map((row, index) => {
      if (index === 0) return null; // Skip the header row
      const [Index, Title, URL] = row.split(',');
      return { Index, Title, URL };
    }).filter(record => record !== null);

    const unscrapedRecords = records.filter(record => {
      const isScraped = scrapedRecords.some(scrapedRecord => scrapedRecord.URL === record.URL);
      return !isScraped;
    });

    // Launch Puppeteer
    const browser = await puppeteer.launch({
      headless: false
    });
    const page = await browser.newPage();

    // Process each URL to get the video title
    for (const record of unscrapedRecords) {
      try {
        await page.goto(record.URL, { waitUntil: 'networkidle2', timeout: 30000 });
        const title = await page.title();
        // Update the title in your record
        record.Title = title.replace(' - YouTube', ''); // Removing additional YouTube text

        console.log(`Updated record ${record.Index} with title ${record.Title}`);
      } catch (error) {
        console.error(`Error processing record ${record.Index}: ${error.message}`);
        // Continue to the next record
        continue;
      }
    }

    // Close the browser
    await browser.close();

    // Convert the updated records back to CSV
    const outputCsv = unscrapedRecords.map(record => `${record.Index},"${record.Title}",${record.URL}`).join('\n');

    // Write the new CSV to file
    fs.writeFileSync('updated_youtube_videos.csv', outputCsv);
  } catch (error) {
    console.error(`An error occurred: ${error.message}`);
  }
})();
	const puppeteer = require('puppeteer');
	const fs = require('fs');

	(async () => {
	try {
	// Read the CSV file
	const inputCsv = fs.readFileSync('output.csv', 'utf-8');
	const records = inputCsv.split('\n').map((row, index) => {
	if (index === 0) return null; // Skip the header row
	const [Index, Title, URL] = row.split(',');
	const longerURL = URL?.match(/"(.*?)"/)[1]; // Extract the longer URL
	return { Index, Title, URL: longerURL };
	}).filter(record => record !== null);

	// Check if the video has already been scraped
	const scrapedCsvPath = 'updated_youtube_videos.csv';
	let scrapedCsv = '';
	if (fs.existsSync(scrapedCsvPath)) {
	scrapedCsv = fs.readFileSync(scrapedCsvPath, 'utf-8');
	}
	const scrapedRecords = scrapedCsv.split('\n').map((row, index) => {
	if (index === 0) return null; // Skip the header row
	const [Index, Title, URL] = row.split(',');
	return { Index, Title, URL };
	}).filter(record => record !== null);

	const unscrapedRecords = records.filter(record => {
	const isScraped = scrapedRecords.some(scrapedRecord => scrapedRecord.URL === record.URL);
	return !isScraped;
	});

	// Launch Puppeteer
	const browser = await puppeteer.launch({
	headless: false
	});
	const page = await browser.newPage();

	// Process each URL to get the video title
	for (const record of unscrapedRecords) {
	try {
	await page.goto(record.URL, { waitUntil: 'networkidle2', timeout: 30000 });
	const title = await page.title();
	// Update the title in your record
	record.Title = title.replace(' - YouTube', ''); // Removing additional YouTube text

	console.log(`Updated record ${record.Index} with title ${record.Title}`);
	} catch (error) {
	console.error(`Error processing record ${record.Index}: ${error.message}`);
	// Continue to the next record
	continue;
	}
	}

	// Close the browser
	await browser.close();

	// Convert the updated records back to CSV
	const outputCsv = unscrapedRecords.map(record => `${record.Index},"${record.Title}",${record.URL}`).join('\n');

	// Write the new CSV to file
	fs.writeFileSync('updated_youtube_videos.csv', outputCsv);
	} catch (error) {
	console.error(`An error occurred: ${error.message}`);
	}
	})();