Skip to content

Instantly share code, notes, and snippets.

@hoytzhang
Created November 18, 2025 03:24
Show Gist options
  • Select an option

  • Save hoytzhang/74f19c214a0d5a447a7057d11f6d01a9 to your computer and use it in GitHub Desktop.

Select an option

Save hoytzhang/74f19c214a0d5a447a7057d11f6d01a9 to your computer and use it in GitHub Desktop.
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Sitemap/Feed 最新URL提取工具</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 40px;
background-color: #f5f5f5;
}
.container {
max-width: 800px;
margin: 0 auto;
background: white;
padding: 30px;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
h1 {
color: #333;
text-align: center;
}
.form-group {
margin-bottom: 20px;
}
label {
display: block;
margin-bottom: 5px;
font-weight: bold;
}
input[type="url"] {
width: 100%;
padding: 12px;
border: 1px solid #ddd;
border-radius: 4px;
box-sizing: border-box;
}
button {
background-color: #007cba;
color: white;
padding: 12px 24px;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
}
button:hover {
background-color: #005a87;
}
button:disabled {
background-color: #cccccc;
cursor: not-allowed;
}
.result {
margin-top: 30px;
}
.url-item {
padding: 10px;
border-bottom: 1px solid #eee;
}
.url-link {
color: #007cba;
text-decoration: none;
}
.url-link:hover {
text-decoration: underline;
}
.lastmod {
color: #666;
font-size: 14px;
margin-top: 5px;
}
.error {
color: #d63638;
background: #fcf0f1;
padding: 15px;
border-radius: 4px;
margin: 20px 0;
}
.success {
color: #008a20;
background: #edfaef;
padding: 15px;
border-radius: 4px;
margin: 20px 0;
}
.source-type {
margin-bottom: 15px;
}
.source-type label {
display: inline-block;
margin-right: 20px;
font-weight: normal;
}
.source-type input {
margin-right: 5px;
}
.description {
color: #666;
font-size: 14px;
margin-top: 5px;
}
.loading {
text-align: center;
padding: 20px;
}
.spinner {
border: 4px solid rgba(0, 0, 0, 0.1);
border-left-color: #007cba;
border-radius: 50%;
width: 30px;
height: 30px;
animation: spin 1s linear infinite;
margin: 0 auto;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
</style>
</head>
<body>
<div class="container">
<h1>Sitemap/Feed 最新URL提取工具</h1>
<form id="urlForm">
<div class="source-type">
<label><input type="radio" name="source_type" value="sitemap" checked> Sitemap</label>
<label><input type="radio" name="source_type" value="feed"> RSS/Atom Feed</label>
</div>
<div class="form-group">
<label for="source_url">请输入地址:</label>
<input type="url" id="source_url" name="source_url" placeholder="https://example.com/sitemap.xml 或 https://example.com/feed" required>
</div>
<button type="submit">获取最新URL</button>
</form>
<div id="result"></div>
</div>
<script>
document.getElementById('urlForm').addEventListener('submit', async function(e) {
e.preventDefault();
const sourceType = document.querySelector('input[name="source_type"]:checked').value;
const sourceUrl = document.getElementById('source_url').value;
const resultDiv = document.getElementById('result');
if (!sourceUrl) {
resultDiv.innerHTML = '<div class="error">请输入有效的地址</div>';
return;
}
// 显示加载状态
resultDiv.innerHTML = `
<div class="loading">
<div class="spinner"></div>
<p>正在获取数据...</p>
</div>
`;
try {
const urls = await fetchUrls(sourceType, sourceUrl);
displayResults(urls, sourceType, sourceUrl);
} catch (error) {
resultDiv.innerHTML = `<div class="error"><strong>处理过程中发生错误:</strong><br>${error.message}</div>`;
}
});
async function fetchUrls(sourceType, sourceUrl) {
// 使用代理解决跨域问题
const proxyUrl = 'https://api.allorigins.win/raw?url=';
const response = await fetch(proxyUrl + encodeURIComponent(sourceUrl));
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const content = await response.text();
if (sourceType === 'sitemap') {
return parseSitemap(content);
} else {
return parseFeed(content);
}
}
function parseSitemap(content) {
const parser = new DOMParser();
const xmlDoc = parser.parseFromString(content, 'text/xml');
// 检查是否为索引sitemap
const sitemaps = xmlDoc.getElementsByTagName('sitemap');
if (sitemaps.length > 0) {
throw new Error('此工具不支持索引sitemap,请提供具体的sitemap文件');
}
// 解析普通sitemap
const urlElements = xmlDoc.getElementsByTagName('url');
const urls = [];
for (let i = 0; i < urlElements.length; i++) {
const urlElement = urlElements[i];
const loc = urlElement.getElementsByTagName('loc')[0]?.textContent || '';
const lastmod = urlElement.getElementsByTagName('lastmod')[0]?.textContent || '';
if (loc) {
urls.push({
loc: loc,
lastmod: lastmod
});
}
}
// 按日期排序
urls.sort((a, b) => {
const dateA = a.lastmod ? new Date(a.lastmod) : new Date(0);
const dateB = b.lastmod ? new Date(b.lastmod) : new Date(0);
return dateB - dateA;
});
return urls.slice(0, 10);
}
function parseFeed(content) {
const parser = new DOMParser();
const xmlDoc = parser.parseFromString(content, 'text/xml');
let items = [];
let urls = [];
// 尝试解析RSS
const rssItems = xmlDoc.getElementsByTagName('item');
if (rssItems.length > 0) {
for (let i = 0; i < rssItems.length; i++) {
const item = rssItems[i];
const link = item.getElementsByTagName('link')[0]?.textContent || '';
const title = item.getElementsByTagName('title')[0]?.textContent || '';
const pubDate = item.getElementsByTagName('pubDate')[0]?.textContent || '';
const description = item.getElementsByTagName('description')[0]?.textContent || '';
if (link) {
urls.push({
loc: link,
title: title,
timestamp: pubDate ? new Date(pubDate).getTime() : Date.now(),
description: description,
pubDate: pubDate
});
}
}
}
// 尝试解析Atom
else {
const atomEntries = xmlDoc.getElementsByTagName('entry');
for (let i = 0; i < atomEntries.length; i++) {
const entry = atomEntries[i];
const link = entry.getElementsByTagName('link')[0]?.getAttribute('href') || '';
const title = entry.getElementsByTagName('title')[0]?.textContent || '';
const updated = entry.getElementsByTagName('updated')[0]?.textContent || '';
const published = entry.getElementsByTagName('published')[0]?.textContent || '';
const summary = entry.getElementsByTagName('summary')[0]?.textContent || '';
const date = updated || published;
if (link) {
urls.push({
loc: link,
title: title,
timestamp: date ? new Date(date).getTime() : Date.now(),
description: summary,
pubDate: date
});
}
}
}
// 按日期排序
urls.sort((a, b) => b.timestamp - a.timestamp);
return urls.slice(0, 10);
}
function displayResults(urls, sourceType, sourceUrl) {
const resultDiv = document.getElementById('result');
if (urls.length === 0) {
resultDiv.innerHTML = `
<div class="source-info">
来源类型: ${sourceType === 'sitemap' ? 'Sitemap' : 'RSS/Atom Feed'}<br>
来源地址: ${sourceUrl}
</div>
<div class="no-results">未找到任何URL</div>
`;
return;
}
let html = `
<div class="source-info">
来源类型: ${sourceType === 'sitemap' ? 'Sitemap' : 'RSS/Atom Feed'}<br>
来源地址: ${sourceUrl}
</div>
<h1>最新更新的URL</h1>
<div class="result">
`;
urls.forEach(url => {
html += `
<div class="url-item">
<div>
<a href="${url.loc}" target="_blank" class="url-link">
${url.title || url.loc}
</a>
</div>
`;
if (url.lastmod || url.pubDate) {
const dateStr = url.lastmod || url.pubDate;
const formattedDate = new Date(dateStr).toLocaleString('zh-CN');
html += `
<div class="lastmod">
${url.lastmod ? '最后修改时间' : '发布时间'}: ${formattedDate}
</div>
`;
}
if (url.description) {
const desc = url.description.replace(/<[^>]*>/g, '').substring(0, 150) + '...';
html += `<div class="description">${desc}</div>`;
}
html += `</div>`;
});
html += `</div>`;
resultDiv.innerHTML = html;
}
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment