Skip to content

Instantly share code, notes, and snippets.

@Igouist
Created June 15, 2021 04:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Igouist/ebfc29be9e350bb7c289f05df694535b to your computer and use it in GitHub Desktop.
Save Igouist/ebfc29be9e350bb7c289f05df694535b to your computer and use it in GitHub Desktop.
稽查目標 PTT 用戶的留言,使用 AngleSharp + pttweb
async Task Main()
{
var pttUser = "";
var page = 1;
var crawlar = new PttMessageCrawler();
var result = await crawlar.GetMessages(pttUser, page);
result.Dump();
}
/// <summary>
/// 爬蟲服務
/// </summary>
public class PttMessageCrawler
{
private readonly IBrowsingContext _browser;
/// <summary>
/// 初始化
/// </summary>
public PttMessageCrawler()
{
var config = Configuration.Default.WithDefaultLoader();
var browser = BrowsingContext.New(config);
this._browser = browser;
}
/// <summary>
/// 取得留言內容
/// </summary>
/// <returns></returns>
public async Task<IEnumerable<PttMessage>> GetMessages(
string username,
int page = 0)
{
var pttUrl = $"https://www.pttweb.cc/user/{username}?t=message&page={page}";
var document = await this._browser.OpenAsync(pttUrl);
if (document is null) return null;
var listQuery = "div.thread-item";
var titleQuery = "span.thread-title";
var messageQuery = "span.yellow--text.text--darken-2";
var contents = document.QuerySelectorAll(listQuery);
document.Close();
var messages = contents.Select(content => new PttMessage
{
Title = content
.QuerySelector(titleQuery)
.TextContent,
Content = content
.QuerySelectorAll(messageQuery)
.Select(x => x.TextContent.Replace(": ", ""))
.Where(x => x != username)
});
return messages;
}
}
public class PttMessage
{
public string Title { get; set; }
public IEnumerable<string> Content { get; set; }
}
@wellss12
Copy link

wellss12 commented Jan 5, 2024

666

@Igouist
Copy link
Author

Igouist commented Jan 5, 2024

666

 > <"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment