Created
November 18, 2025 03:24
-
-
Save hoytzhang/74f19c214a0d5a447a7057d11f6d01a9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <!DOCTYPE html> | |
| <html lang="zh-CN"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Sitemap/Feed 最新URL提取工具</title> | |
| <style> | |
| body { | |
| font-family: Arial, sans-serif; | |
| margin: 40px; | |
| background-color: #f5f5f5; | |
| } | |
| .container { | |
| max-width: 800px; | |
| margin: 0 auto; | |
| background: white; | |
| padding: 30px; | |
| border-radius: 8px; | |
| box-shadow: 0 2px 10px rgba(0,0,0,0.1); | |
| } | |
| h1 { | |
| color: #333; | |
| text-align: center; | |
| } | |
| .form-group { | |
| margin-bottom: 20px; | |
| } | |
| label { | |
| display: block; | |
| margin-bottom: 5px; | |
| font-weight: bold; | |
| } | |
| input[type="url"] { | |
| width: 100%; | |
| padding: 12px; | |
| border: 1px solid #ddd; | |
| border-radius: 4px; | |
| box-sizing: border-box; | |
| } | |
| button { | |
| background-color: #007cba; | |
| color: white; | |
| padding: 12px 24px; | |
| border: none; | |
| border-radius: 4px; | |
| cursor: pointer; | |
| font-size: 16px; | |
| } | |
| button:hover { | |
| background-color: #005a87; | |
| } | |
| button:disabled { | |
| background-color: #cccccc; | |
| cursor: not-allowed; | |
| } | |
| .result { | |
| margin-top: 30px; | |
| } | |
| .url-item { | |
| padding: 10px; | |
| border-bottom: 1px solid #eee; | |
| } | |
| .url-link { | |
| color: #007cba; | |
| text-decoration: none; | |
| } | |
| .url-link:hover { | |
| text-decoration: underline; | |
| } | |
| .lastmod { | |
| color: #666; | |
| font-size: 14px; | |
| margin-top: 5px; | |
| } | |
| .error { | |
| color: #d63638; | |
| background: #fcf0f1; | |
| padding: 15px; | |
| border-radius: 4px; | |
| margin: 20px 0; | |
| } | |
| .success { | |
| color: #008a20; | |
| background: #edfaef; | |
| padding: 15px; | |
| border-radius: 4px; | |
| margin: 20px 0; | |
| } | |
| .source-type { | |
| margin-bottom: 15px; | |
| } | |
| .source-type label { | |
| display: inline-block; | |
| margin-right: 20px; | |
| font-weight: normal; | |
| } | |
| .source-type input { | |
| margin-right: 5px; | |
| } | |
| .description { | |
| color: #666; | |
| font-size: 14px; | |
| margin-top: 5px; | |
| } | |
| .loading { | |
| text-align: center; | |
| padding: 20px; | |
| } | |
| .spinner { | |
| border: 4px solid rgba(0, 0, 0, 0.1); | |
| border-left-color: #007cba; | |
| border-radius: 50%; | |
| width: 30px; | |
| height: 30px; | |
| animation: spin 1s linear infinite; | |
| margin: 0 auto; | |
| } | |
| @keyframes spin { | |
| to { transform: rotate(360deg); } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <h1>Sitemap/Feed 最新URL提取工具</h1> | |
| <form id="urlForm"> | |
| <div class="source-type"> | |
| <label><input type="radio" name="source_type" value="sitemap" checked> Sitemap</label> | |
| <label><input type="radio" name="source_type" value="feed"> RSS/Atom Feed</label> | |
| </div> | |
| <div class="form-group"> | |
| <label for="source_url">请输入地址:</label> | |
| <input type="url" id="source_url" name="source_url" placeholder="https://example.com/sitemap.xml 或 https://example.com/feed" required> | |
| </div> | |
| <button type="submit">获取最新URL</button> | |
| </form> | |
| <div id="result"></div> | |
| </div> | |
| <script> | |
| document.getElementById('urlForm').addEventListener('submit', async function(e) { | |
| e.preventDefault(); | |
| const sourceType = document.querySelector('input[name="source_type"]:checked').value; | |
| const sourceUrl = document.getElementById('source_url').value; | |
| const resultDiv = document.getElementById('result'); | |
| if (!sourceUrl) { | |
| resultDiv.innerHTML = '<div class="error">请输入有效的地址</div>'; | |
| return; | |
| } | |
| // 显示加载状态 | |
| resultDiv.innerHTML = ` | |
| <div class="loading"> | |
| <div class="spinner"></div> | |
| <p>正在获取数据...</p> | |
| </div> | |
| `; | |
| try { | |
| const urls = await fetchUrls(sourceType, sourceUrl); | |
| displayResults(urls, sourceType, sourceUrl); | |
| } catch (error) { | |
| resultDiv.innerHTML = `<div class="error"><strong>处理过程中发生错误:</strong><br>${error.message}</div>`; | |
| } | |
| }); | |
| async function fetchUrls(sourceType, sourceUrl) { | |
| // 使用代理解决跨域问题 | |
| const proxyUrl = 'https://api.allorigins.win/raw?url='; | |
| const response = await fetch(proxyUrl + encodeURIComponent(sourceUrl)); | |
| if (!response.ok) { | |
| throw new Error(`HTTP error! status: ${response.status}`); | |
| } | |
| const content = await response.text(); | |
| if (sourceType === 'sitemap') { | |
| return parseSitemap(content); | |
| } else { | |
| return parseFeed(content); | |
| } | |
| } | |
| function parseSitemap(content) { | |
| const parser = new DOMParser(); | |
| const xmlDoc = parser.parseFromString(content, 'text/xml'); | |
| // 检查是否为索引sitemap | |
| const sitemaps = xmlDoc.getElementsByTagName('sitemap'); | |
| if (sitemaps.length > 0) { | |
| throw new Error('此工具不支持索引sitemap,请提供具体的sitemap文件'); | |
| } | |
| // 解析普通sitemap | |
| const urlElements = xmlDoc.getElementsByTagName('url'); | |
| const urls = []; | |
| for (let i = 0; i < urlElements.length; i++) { | |
| const urlElement = urlElements[i]; | |
| const loc = urlElement.getElementsByTagName('loc')[0]?.textContent || ''; | |
| const lastmod = urlElement.getElementsByTagName('lastmod')[0]?.textContent || ''; | |
| if (loc) { | |
| urls.push({ | |
| loc: loc, | |
| lastmod: lastmod | |
| }); | |
| } | |
| } | |
| // 按日期排序 | |
| urls.sort((a, b) => { | |
| const dateA = a.lastmod ? new Date(a.lastmod) : new Date(0); | |
| const dateB = b.lastmod ? new Date(b.lastmod) : new Date(0); | |
| return dateB - dateA; | |
| }); | |
| return urls.slice(0, 10); | |
| } | |
| function parseFeed(content) { | |
| const parser = new DOMParser(); | |
| const xmlDoc = parser.parseFromString(content, 'text/xml'); | |
| let items = []; | |
| let urls = []; | |
| // 尝试解析RSS | |
| const rssItems = xmlDoc.getElementsByTagName('item'); | |
| if (rssItems.length > 0) { | |
| for (let i = 0; i < rssItems.length; i++) { | |
| const item = rssItems[i]; | |
| const link = item.getElementsByTagName('link')[0]?.textContent || ''; | |
| const title = item.getElementsByTagName('title')[0]?.textContent || ''; | |
| const pubDate = item.getElementsByTagName('pubDate')[0]?.textContent || ''; | |
| const description = item.getElementsByTagName('description')[0]?.textContent || ''; | |
| if (link) { | |
| urls.push({ | |
| loc: link, | |
| title: title, | |
| timestamp: pubDate ? new Date(pubDate).getTime() : Date.now(), | |
| description: description, | |
| pubDate: pubDate | |
| }); | |
| } | |
| } | |
| } | |
| // 尝试解析Atom | |
| else { | |
| const atomEntries = xmlDoc.getElementsByTagName('entry'); | |
| for (let i = 0; i < atomEntries.length; i++) { | |
| const entry = atomEntries[i]; | |
| const link = entry.getElementsByTagName('link')[0]?.getAttribute('href') || ''; | |
| const title = entry.getElementsByTagName('title')[0]?.textContent || ''; | |
| const updated = entry.getElementsByTagName('updated')[0]?.textContent || ''; | |
| const published = entry.getElementsByTagName('published')[0]?.textContent || ''; | |
| const summary = entry.getElementsByTagName('summary')[0]?.textContent || ''; | |
| const date = updated || published; | |
| if (link) { | |
| urls.push({ | |
| loc: link, | |
| title: title, | |
| timestamp: date ? new Date(date).getTime() : Date.now(), | |
| description: summary, | |
| pubDate: date | |
| }); | |
| } | |
| } | |
| } | |
| // 按日期排序 | |
| urls.sort((a, b) => b.timestamp - a.timestamp); | |
| return urls.slice(0, 10); | |
| } | |
| function displayResults(urls, sourceType, sourceUrl) { | |
| const resultDiv = document.getElementById('result'); | |
| if (urls.length === 0) { | |
| resultDiv.innerHTML = ` | |
| <div class="source-info"> | |
| 来源类型: ${sourceType === 'sitemap' ? 'Sitemap' : 'RSS/Atom Feed'}<br> | |
| 来源地址: ${sourceUrl} | |
| </div> | |
| <div class="no-results">未找到任何URL</div> | |
| `; | |
| return; | |
| } | |
| let html = ` | |
| <div class="source-info"> | |
| 来源类型: ${sourceType === 'sitemap' ? 'Sitemap' : 'RSS/Atom Feed'}<br> | |
| 来源地址: ${sourceUrl} | |
| </div> | |
| <h1>最新更新的URL</h1> | |
| <div class="result"> | |
| `; | |
| urls.forEach(url => { | |
| html += ` | |
| <div class="url-item"> | |
| <div> | |
| <a href="${url.loc}" target="_blank" class="url-link"> | |
| ${url.title || url.loc} | |
| </a> | |
| </div> | |
| `; | |
| if (url.lastmod || url.pubDate) { | |
| const dateStr = url.lastmod || url.pubDate; | |
| const formattedDate = new Date(dateStr).toLocaleString('zh-CN'); | |
| html += ` | |
| <div class="lastmod"> | |
| ${url.lastmod ? '最后修改时间' : '发布时间'}: ${formattedDate} | |
| </div> | |
| `; | |
| } | |
| if (url.description) { | |
| const desc = url.description.replace(/<[^>]*>/g, '').substring(0, 150) + '...'; | |
| html += `<div class="description">${desc}</div>`; | |
| } | |
| html += `</div>`; | |
| }); | |
| html += `</div>`; | |
| resultDiv.innerHTML = html; | |
| } | |
| </script> | |
| </body> | |
| </html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment