-
-
Save benjarobin/d8beea937d8c9e112f20a26c5980c6bf to your computer and use it in GitHub Desktop.
wiki-arch-migration
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python3 | |
import sys | |
import difflib | |
import re | |
FR_SUFFIX = '(Français)' | |
links_map_fr_to_org = {} | |
links_list_org = [] | |
links_map_org_casefold = {} | |
links_map_fr_casefold = {} | |
def process_wiki_link(link): | |
link_base = link | |
is_org_link = False | |
if FR_SUFFIX in link: | |
is_org_link = True | |
link_base = link.replace('_' + FR_SUFFIX, '') | |
link_base = link_base.replace(' ' + FR_SUFFIX, '') | |
if is_org_link and (link_base in links_list_org) and ('_' not in link): | |
print('{} ------ OK'.format(link)) | |
else: | |
new_link = None | |
guess_sep = '-' | |
link_base = link_base.replace('_', ' ') | |
if link_base in links_list_org: | |
new_link = link_base | |
if link_base.casefold() in links_map_org_casefold: | |
new_link = links_map_org_casefold[link_base.casefold()] | |
elif link_base in links_map_fr_to_org: | |
new_link = links_map_fr_to_org[link_base] | |
elif link_base.casefold() in links_map_fr_casefold: | |
new_link = links_map_fr_to_org[links_map_fr_casefold[link_base.casefold()]] | |
else: | |
guess_sep = '?' | |
if is_org_link: | |
close_match = difflib.get_close_matches(link_base.casefold(), | |
links_map_org_casefold.keys(), n=1, cutoff=0.7) | |
if len(close_match) > 0: | |
new_link = links_map_org_casefold[close_match[0]] | |
else: | |
close_match = difflib.get_close_matches(link_base.casefold(), | |
links_map_fr_casefold.keys(), n=1, cutoff=0.7) | |
if len(close_match) > 0: | |
new_link = links_map_fr_to_org[links_map_fr_casefold[close_match[0]]] | |
if (new_link is not None) and (new_link != ''): | |
print('{} --{}--> {} {}'.format(link, guess_sep, new_link, FR_SUFFIX)) | |
else: | |
print('{} -----> ???'.format(link)) | |
def process_wiki_links_from_stdin(): | |
for line in sys.stdin: | |
link = line.strip() | |
if (link != '') and ('Category:' not in link) and (re.match(':?[a-z]{2}:', link) is None): | |
process_wiki_link(link) | |
def add_link_to_map(line): | |
global links_map_fr_to_org | |
line = line.strip() | |
fr_link, fr_alias, org_link, *_ = line.split('\t') + ['', ''] | |
fr_link = fr_link.strip() | |
fr_alias = fr_alias.strip() | |
org_link = org_link.strip() | |
links_map_fr_to_org[fr_link] = org_link | |
if fr_alias != '': | |
for alias in fr_alias.split(';'): | |
links_map_fr_to_org[alias.strip()] = org_link | |
def build_links_map_from_file(path_file): | |
global links_list_org | |
global links_map_org_casefold | |
global links_map_fr_casefold | |
with open(path_file, mode="r", encoding="utf-8") as f: | |
for line in f: | |
add_link_to_map(line) | |
links_list_org = list(set(links_map_fr_to_org.values())) | |
for link in links_list_org: | |
links_map_org_casefold[link.casefold()] = link | |
for link in links_map_fr_to_org.keys(): | |
links_map_fr_casefold[link.casefold()] = link | |
if __name__ == '__main__': | |
sys.stdin.reconfigure(encoding='utf-8') | |
sys.stdout.reconfigure(encoding='utf-8') | |
build_links_map_from_file('links-map.txt') | |
process_wiki_links_from_stdin() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ABS Abs Arch Build System | |
AMSN AMSN | |
ATI ATI | |
AUR 4.0.0 AUR Arch User Repository | |
Accueil Par ou commencer Main page | |
Aider Getting involved | |
Alsa Advanced Linux Sound Architecture | |
Amarok Amarok | |
Ameliorer Pacman Pacman/Tips and tricks | |
Anjuta List of applications | |
Arch Linux Arch Linux | |
Arch Linux Archive Arch Rollback Machine Arch Linux Archive | |
Arch vs autres distributions Arch compared to other distributions | |
Archboot Archboot | |
Archinstall Archinstall | |
Archiso Archiso | |
ArcoLinux | |
Assistants AUR AUR helpers | |
Astuces Pacman Pacman/Tips and tricks | |
Asus m50vn | |
Asus x70af | |
AucTeX | |
Audacious Audacious | |
Autossh OpenSSH | |
Avahi Avahi | |
Awesome Awesome | |
Awesome3 Awesome | |
BIND BIND | |
BOINC BOINC | |
Bitdefender | |
Bluetooth Bluetooth | |
Bootchart Bootchart | |
Brother MFC-J5910DW CUPS/Printer-specific problems | |
Btrfs Btrfs | |
Bumblebee Bumblebee | |
Burg GRUB | |
CLAMAV ClamAV | |
Cacti Cacti | |
Capture d'écran Screen capture | |
Celestia | |
Changement d'adresse MAC MAC address spoofing | |
Chromium Chromium | |
Chroot Chroot | |
Clavier Keyboard | |
Codecs Codecs and containers | |
Communautes internationales International communities | |
Compiler un nouveau noyau Kernel/Traditional compilation | |
Compiz Compiz | |
Configuration de pare-feu simple Iptables | |
Conky Conky | |
Connexion automatique a la console Getty | |
Connexions reseau Network configuration | |
Connman ConnMan | |
Console serie Working with the serial console | |
Copier coller Clipboard | |
Cpupower Cpufreq CPU frequency scaling | |
Cron Cron | |
CUPS CUPS | |
Créer une clef USB avec l'ISO Arch Linux Support d'installation sur clef USB USB flash installation medium | |
DWM Dwm | |
Davmail | |
Debuter | |
Deepin Desktop Environment Deepin Desktop Environment | |
Deltas | |
Depot archlinuxfr Repos archlinuxfr Unofficial user repositories | |
Depots Depots officiels Official repositories | |
Desactiver IPv6 IPv6 - Desactiver le module IPv6 | |
DeveloperWiki:Building in a Clean Chroot | |
Discord Discord | |
Django Django | |
Dkms Dynamic Kernel Module Support | |
Dmenu Dmenu | |
Downgrade Downgrading packages | |
Dual Boot Arch et Windows Dual boot with Windows | |
Dvorak Dvorak | |
Dwb Dwb | |
Démon Daemons | |
Dépannage General troubleshooting | |
Dépôt local AUR compilé | |
EFISTUB EFISTUB | |
EFI Boot Stub EFISTUB EFISTUB | |
ELinks ELinks | |
ESP EFI system partition | |
Ejabberd | |
Emacs Emacs | |
Encryption avec eCryptfs ECryptfs | |
Enhancing Arch Linux Stability System maintenance | |
Enlightenment E17 Enlightenment | |
Environnement chroot | |
Ext4 Ext4 | |
Extraction de CD audio | |
FAQ Frequently asked questions | |
Fail2ban Fail2ban | |
Fbsplash Fbsplash | |
Firefox Firefox | |
Fluxbox Fluxbox | |
Folding@home Folding@home | |
Fond d'écran Feh + Nitrogen + ??? | |
Format PDF | |
Forum Forum rules | |
Fsck Fsck | |
Fstab Fstab | |
GDM GDM | |
GNOME Gnome3; Gnome GNOME | |
GRUB Burg; GRUB2; Grub; Reinstallation GRUB GRUB | |
GRUB/Trucs et Astuces GRUB/Tips and tricks | |
GRUB Legacy GRUB Legacy | |
Gedit GNOME/Gedit | |
Gestion de volumes avec udisks Udisks | |
Gestion des fichiers de configurations Pacman/Pacnew and Pacsave | |
Gestionnaire de connexions Display manager | |
Gestionnaire de fenêtres Window manager | |
Getty Connexion automatique a la console; Messages au demarrage Getty | |
Godot Godot Engine | |
Gummiboot | |
Hddtemp Hddtemp | |
Hdparm Hdparm | |
Horloge Time | |
I3 I3 | |
IPod IOS | |
IRC Irc Arch IRC channels | |
Install PXE Preboot Execution Environment | |
Install chroot Install Arch Linux from existing Linux | |
Installation Arch install scripts Installation guide | |
Intel Intel graphics | |
Iptables Iptables | |
Irssi Irssi | |
Jack JACK Audio Connection Kit | |
Java Java | |
JrEvans | |
K3b Optical disc drive | |
KDE Kde KDE | |
KMS Kernel mode setting | |
Kernel Panics General troubleshooting | |
Kernel modules Kernel module | |
Kiss Philosophie Arch Arch terminology | |
LAMP Lamp | |
LENOVO Miix510 | |
LUKS Dm-crypt | |
LVM LVM | |
LXDE Lxde LXDE | |
LXDM LXDM | |
LaTeX TeX Live | |
Laptop Mode Tools Laptop-mode Laptop Mode Tools | |
Laravel | |
Lecture DVD Optical disc drive | |
Let's Encrypt Certbot | |
LibreOffice Libreoffice; Openoffice LibreOffice | |
Libvirt Libvirt | |
LightDM LightDM | |
Liste des applications | |
LiveCD Livecd Arch-based distributions | |
Locale Locale | |
Lxqt LXQt | |
MATE MATE | |
MPD Music Player Daemon | |
MPlayer Mplayer MPlayer | |
MacBook Mac | |
Mailman Mailman | |
Maintenance Système System maintenance | |
Mainteneurs de paquets | |
Makepkg Makepkg | |
MariaDB MariaDB | |
Midnight Commander Midnight Commander | |
Midori Midori | |
Migration 64 bits Migrating between architectures | |
Miroirs Mirrors | |
Mkinitcpio Mkinitcpio | |
Moc MOC | |
Mod wsgi Apache HTTP Server/mod wsgi | |
Modem attache Android Modem attaché Android Android tethering | |
Moteur de jeu Godot | |
Mplayer MPlayer | |
MuPDF MuPDF | |
MySQL Mysql MySQL | |
NFS NFS | |
NIS NIS | |
NVIDIA Nouveau; Nvidia NVIDIA | |
Nano Nano | |
Nemo Nemo | |
Netbook | |
Netctl Netcfg Netctl | |
NetworkManager Networkmanager NetworkManager | |
Newsboat Newsbeuter Newsboat | |
Nommer un bloc de périphériques persistants Persistent block device naming | |
Noyaux Linux Kernel | |
Ntp System time | |
OSS Open Sound System | |
Oldtroffer | |
Openbox Openbox | |
Openldap OpenLDAP | |
Opera Opera | |
Optimus : rendu NVIDIA NVIDIA Optimus | |
Ordonnanceur d'E/S I/O Scheduling Improving performance | |
Owncloud Nextcloud | |
P7zip P7zip | |
PCI passthrough avec OMVF PCI passthrough avec OVMF | |
PCManFM PCManFM | |
PKGBUILD Pkgbuild PKGBUILD | |
Pacman Pacman | |
Pacman-key Pacman/Package signing | |
Pacman/Trucs et Astuces Pacman/Tips and tricks | |
Pacserve Pacserve | |
Partage de connexion Internet sharing | |
Pdnsd Pdnsd | |
Pekwm PekWM | |
Phpmyadmin PhpMyAdmin | |
Pidgin Pidgin | |
Plugins navigateur Installer flash 32 sur un systeme 64 Browser plugins | |
Polkit Policykit Polkit | |
Prboom PrBoom | |
Privoxy Privoxy | |
Processus de boot Chargeur de démarrage Arch boot process | |
PulseAudio Pulseaudio PulseAudio | |
Python/Environnement virtuel Python/Virtual environment | |
Périphérique de stockage USB | |
Qemu QEMU | |
Qtile Qtile | |
RAID RAID | |
REFInd | |
Ramfs | |
Ranger Ranger | |
RawTherapee | |
Recommandations Recommendations generale General recommendations | |
Reflector Reflector | |
Resolv.conf Domain name resolution | |
Ring Jami | |
Rofi Rofi | |
Rsync Sauvegarde système complète avec rsync Rsync | |
Récepteur Logitech Unifying Logitech Unifying Receiver | |
Récupération de fichier File recovery | |
S.M.A.R.T. S.M.A.R.T. | |
SDDM SDDM | |
SLiM Slim SLiM | |
SSD Solid state drive | |
Sakura | |
Samba Samba | |
Sane SANE | |
Sauvegarde des données avec BORG | |
Seafile Seafile | |
Services | |
Softether VPN client | |
Ssh Secure Shell | |
Sshfs SSHFS | |
Stand RMLL2011 | |
Stand RMLL2012 | |
Standard paquetage Arch package guidelines | |
Startx Xinit | |
Su Su | |
Subtle | |
Sudo Sudo | |
Suivi | |
Swap Swap | |
Synchroniser vos ordinateurs Synchronization and backup programs | |
Synergy Barrier | |
Syslinux Syslinux | |
Sysrq Keyboard shortcuts | |
Systemd Systemd | |
Systemd-networkd Systemd-networkd | |
Systemd-timesyncd Systemd-timesyncd | |
Systemd/cron Systemd/Timers | |
Systemd/logind | |
Systemd/utilisateur Systemd/User | |
TNT : regarder et enregistrer en ligne de commande avec v4l-utils et mpv | |
TU Trusted Users | |
Thinkpad x250 Lenovo ThinkPad X250 | |
Thunar Thunar | |
Thunderbird Thunderbird | |
Tor Tor | |
Toshiba NB550D | |
Touchpad Synaptics Touchpad Synaptics | |
Transmission Transmission | |
Udev Udev | |
Udevil | |
Urxvt Rxvt-unicode | |
Utilisateurs et Groupes Users and groups | |
Utilisation chroot Install environnement 32 sur un systeme 64 | |
Utiliser des applications 32bits avec Arch64 Official repositories | |
Vconsole | |
Vi Vi | |
Vifm Vifm | |
Vim Vim | |
VirtualBox VirtualBox | |
Vpnc Vpnc | |
WMFS2 WMFS2 | |
Wayland Wayland | |
Weston Weston | |
Wicd Wicd | |
Wifi Network configuration/Wireless | |
Wiki.archlinux.fr | |
Wine Wine | |
Wireguard WireGuard | |
Wireshark Wireshark | |
Xbindkeys Xbindkeys | |
Xfce Xfce | |
Xmodmap Xmodmap | |
Xmonad Xmonad | |
Xorg Xorg | |
Xterm Xterm | |
YaCy | |
Yaourt AUR helpers | |
Zram Improving performance | |
Zsh Zsh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
PAGE_L="$1" | |
PAGE_L="$(sed 's| |_|g' <<< $PAGE_L)" | |
PAGE_L="$(sed 's|_(Français)||' <<< $PAGE_L)" | |
PAGE_N="$(sed 's|_| |g' <<< $PAGE_L)" | |
PLINK="https://wiki.archlinux.org/index.php?title=${PAGE_L}_(Fran%C3%A7ais)&action=edit" | |
curl -s "$PLINK" | grep -Po "\[\[[^ ][^\[\]]+\]\]" | grep -vF "${PAGE_N}]]" | sed -r 's|\[\[([^]|#]*).*$|\1|' | grep -vFi 'Wikipedia:' | sort -u |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment