Skip to content

Instantly share code, notes, and snippets.

@kosorin
Created February 27, 2021 00:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kosorin/115dc339add0e8953443020901454f5c to your computer and use it in GitHub Desktop.
Save kosorin/115dc339add0e8953443020901454f5c to your computer and use it in GitHub Desktop.
ČSFD exporter (to IMDb format)
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net5.0</TargetFramework>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.11.30" />
</ItemGroup>
</Project>
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
try
{
if (args.Length < 1)
{
Console.Error.WriteLine("Missing argument: UserId");
return 2;
}
if (args.Length < 2)
{
Console.Error.WriteLine("Missing argument: OutputPath");
return 2;
}
var userId = args[0];
var outputPath = args[1];
using var api = new CsfdApi("https://www.csfd.cz");
using (var writer = new StreamWriter(outputPath, append: false, Encoding.UTF8))
{
writer.WriteLine("Const,Your Rating,Date Rated,Title,URL,Title Type,IMDb Rating,Runtime (mins),Year,Genres,Num Votes,Release Date,Directors");
foreach (var x in api.GetUserRatings(userId).Where(x => x.ImdbId != null))
{
var imdb = "tt" + x.ImdbId;
var rating = (int)Math.Round(1 + (x.Rating * (9d / 5d)));
var date = x.Date.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture);
var title = Regex.Replace(x.Title, @"\W", "-");
writer.WriteLine($"{imdb},{rating},{date},{title},,movie,,,,,,,");
}
}
return 0;
}
catch (Exception ex)
{
Console.Error.WriteLine(ex);
return 1;
}
internal record UserRating(string Title, int Rating, DateTime Date, string CsfdId, string ImdbId);
internal sealed class CsfdApi : IDisposable
{
private readonly UrlBuilder _urlBuilder;
private readonly HttpsWebClient _webClient;
private bool _disposed;
public CsfdApi(string rootUrl)
{
_urlBuilder = new UrlBuilder(rootUrl);
_webClient = new HttpsWebClient();
}
public void Dispose()
{
if (_disposed)
{
return;
}
_webClient.Dispose();
_disposed = true;
GC.SuppressFinalize(this);
}
public IEnumerable<UserRating> GetUserRatings(string userId)
{
var documentNode = DownloadHtml(_urlBuilder.UserRating(userId, 1));
var pageCount = 1 + documentNode.SelectNodes("//div[@class='paginator text'][1]/a[not(@class)]").Count;
for (var i = 1u; i <= pageCount; i++)
{
var nodes = documentNode.SelectNodes("//table[@class='ui-table-list']/tbody/tr");
foreach (var node in nodes)
{
var rating = node.SelectSingleNode("td[2]/img")?.Attributes["alt"].ValueLength ?? 0;
var date = DateTime.ParseExact(node.SelectSingleNode("td[3]").InnerText, "dd.MM.yyyy", CultureInfo.InvariantCulture);
var filmNode = node.SelectSingleNode("td[1]/a");
var csfdId = Regex.Match(filmNode.Attributes["href"].Value, @"^/film/(?<Id>[^-]+)-").Groups["Id"].Value;
var title = filmNode.InnerText;
string? imdbId = null;
var filmDocumentNode = DownloadHtml(_urlBuilder.Film(csfdId));
var shareLinkNodes = filmDocumentNode.SelectNodes("//div[@id='share']/ul[@class='links']/li/a");
foreach (var shareLinkNode in shareLinkNodes)
{
var shareLinkUrl = shareLinkNode.Attributes["href"].Value;
if (Regex.Match(shareLinkUrl, @"imdb\.com/title/tt(?<Id>[0-9]+)") is { Success: true, Groups: var groups })
{
imdbId = groups["Id"].Value;
break;
}
}
if (imdbId == null)
{
Console.Error.WriteLine($"Missing IMDb ID for ČSFD ID {csfdId} ({title})");
continue;
}
var result = new UserRating(title, rating, date, csfdId, imdbId);
Console.WriteLine(result);
yield return result;
}
if (i < pageCount)
{
documentNode = DownloadHtml(_urlBuilder.UserRating(userId, i + 1));
}
}
}
private HtmlNode DownloadHtml(string url)
{
var html = _webClient.DownloadString(url);
var document = new HtmlDocument();
document.LoadHtml(html);
return document.DocumentNode;
}
private class HttpsWebClient : WebClient
{
protected override WebRequest GetWebRequest(Uri address)
{
var request = base.GetWebRequest(address);
if (request is HttpWebRequest httpRequest)
{
httpRequest.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;
}
return request;
}
}
private class UrlBuilder
{
public UrlBuilder(string rootUrl)
{
RootUrl = rootUrl;
}
private string RootUrl { get; }
public string Film(string filmId) => $"{RootUrl}/film/{filmId}/prehled/";
public string UserRating(string userId, uint page) => $"{RootUrl}/uzivatel/{userId}/hodnoceni/strana-{page}/";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment