Created
October 4, 2014 06:52
-
-
Save hollie/51d3ef3af0e41d43a291 to your computer and use it in GitHub Desktop.
Example web site parsing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Category=Weather | |
my $kwb_file = "$config_parms{data_dir}/web/kwb.html"; | |
my $kwb_voice = "/Volumes/Media/speech/kustweer.wave"; | |
my $last_fetched_kwb; # To keep track of the last type of fetched weather report | |
my $weerbeeld; | |
$v_get_kwb = new Voice_Cmd "Check kustweerbericht [mini,prof]"; | |
$v_process_kwb = new Voice_Cmd "Vertel het kustweerbericht"; | |
$fetch_kwb = new Process_Item "get_url http://www.kustweerbericht.be/nl/home.asp $kwb_file"; | |
$say_kwb = new Process_Item; | |
if ($state = said $v_get_kwb) { | |
$last_fetched_kwb = $state; | |
# Fetch new data | |
unlink $kwb_file; | |
# Get the correct URL | |
my $url = 'http://www.kustweerbericht.be/nl/home.asp'; | |
$url = 'http://www.kustweerbericht.be/nl/professionele.gebruikers.algemeen.asp' if $state eq 'prof'; | |
set $fetch_kwb "get_url $url $kwb_file"; | |
start $fetch_kwb; | |
print_log "Fetching kustweerbericht $state"; | |
} | |
if (said $v_process_kwb or done_now $fetch_kwb){ | |
my $html = file_read($kwb_file); | |
# Gewoon kustweerbericht | |
my $weerbeeld; | |
$state = $v_process_kwb->state(); | |
if ($last_fetched_kwb eq 'mini' && $html =~ /Weerbeeld\s:\s<\/strong>\s+(.*?)<\/p>/s){ | |
$weerbeeld = $1; | |
$weerbeeld =~ s/N/noord /g; # idem for wind directions | |
$weerbeeld =~ s/O/oost /g; | |
$weerbeeld =~ s/Z/zuid /g; | |
$weerbeeld =~ s/W/west /g; | |
$weerbeeld = "Hier volgt het korte kustweerbericht: " . $weerbeeld; | |
} | |
if ($last_fetched_kwb eq 'prof' && $html =~ /(Mariene meteoverwachting.*?)\(De in de tekst/s){ | |
$weerbeeld = $1; | |
} | |
print_log "Weerbeeld: $weerbeeld"; | |
# Parse if we found something | |
if ($weerbeeld ne '') { | |
$weerbeeld =~ s/<.*?>//sg; # strip HTML code | |
$weerbeeld =~ s/\s{2,}/ /g; # strip extra spaces | |
$weerbeeld =~ s/°C/graden Celcius/; # ensure temperature units can be TTSed | |
$weerbeeld =~ s/Bft/Beaufort/; # and units | |
} else { | |
$weerbeeld = "Kon het weerbericht niet terugvinden, mijn parser moet waarschijnlijk aangepast worden"; | |
} | |
print_log "Converted to '$weerbeeld'"; | |
unlink $kwb_voice; | |
set $say_kwb "say \"$weerbeeld\" -o $kwb_voice"; | |
start $say_kwb; | |
print_log "Started conversion"; | |
} | |
if (done_now $say_kwb) { | |
print_log("Conversion done, starting playback"); | |
$sb_wekker->play_notification($kwb_voice, 30); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment