Skip to content

Instantly share code, notes, and snippets.

@maltegoetz
Created July 5, 2019 15:15
Show Gist options
  • Save maltegoetz/58045cf6562d56ff7c2309d62aa316dc to your computer and use it in GitHub Desktop.
Save maltegoetz/58045cf6562d56ff7c2309d62aa316dc to your computer and use it in GitHub Desktop.
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace SemModAnabinParser
{
class Program
{
static void Main(string[] args)
{
ParseUniversityList("https://anabin.kmk.org/index.php?eID=user_anabin_institutionen&conf=institutionsergebnisliste&sEcho=7&iColumns=13&iDisplayStart=0&iDisplayLength=99999&bRegex=false&sSearch_6=Finnland&iSortingCols=1&iSortCol_0=2&sSortDir_0=asc&land=15"); //you might wanna change the country here
Console.ReadLine();
}
static void ParseUniversityList(string url)
{
Console.WriteLine("Generating universites.csv...");
var unicsv = "Id, Name, Type, City" + Environment.NewLine;
var ids = new List<string>();
var wc = new WebClient();
var content = wc.DownloadString(new Uri(url));
var result = JObject.Parse(content);
var unilist = result["aaData"];
foreach(var uni in unilist)
{
var type = uni[4].ToString();
if (type != "H+")
{
var id = uni[1].ToString();
ids.Add(id);
var name = uni[2].ToString();
var city = uni[3].ToString();
unicsv += $"{id}, {EscapeString(name)}, {EscapeString(type)}, {EscapeString(city)}" + Environment.NewLine;
}
}
File.WriteAllText(@"C:\Users\xxx\universities.csv", unicsv, Encoding.Unicode);
Console.WriteLine("Generated universites.csv");
Console.WriteLine("Generating degrees.csv...");
var degreecsv = "Degree, Type, University Id" + Environment.NewLine;
foreach (var id in ids)
{
Console.WriteLine($"Parsing university (id:{id})");
degreecsv += ParseUniversityDetails(id);
}
File.WriteAllText(@"C:\Users\xxx\degrees.csv", degreecsv, Encoding.Unicode);
Console.WriteLine("Generated degrees.csv");
}
static string ParseUniversityDetails(string id)
{
var url = $"https://anabin.kmk.org/index.php?eID=user_anabin_institutionen&conf=institutionen&uid={id}";
var wc = new WebClient();
var content = wc.DownloadString(new Uri(url));
var degreeListRegex = new Regex("<tbody>((.|\n)*)</tbody>");
var degreeListResult = degreeListRegex.Match(content);
var csv = string.Empty;
foreach(var degreeEntry in degreeListResult.Groups[1].Value.Split("<tr").Skip(1))
{
var degreeRegex = new Regex("<td>(.*?)</td>");
var degreeResult = degreeRegex.Matches(degreeEntry);
var degreeName = degreeResult[0].Groups[1].Value;
var degreeType = degreeResult[1].Groups[1].Value;
csv += $"{EscapeString(degreeName)}, {EscapeString(degreeType)}, {id}" + Environment.NewLine;
}
return csv;
}
private static string EscapeString(string str)
{
bool mustQuote = (str.Contains(",") || str.Contains("\"") || str.Contains("\r") || str.Contains("\n"));
if (mustQuote)
{
StringBuilder sb = new StringBuilder();
sb.Append("\"");
foreach (char nextChar in str)
{
sb.Append(nextChar);
if (nextChar == '"')
sb.Append("\"");
}
sb.Append("\"");
return sb.ToString();
}
return str;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment