Skip to content

Instantly share code, notes, and snippets.

@eMerzh
Created December 28, 2013 23:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eMerzh/8165510 to your computer and use it in GitHub Desktop.
Save eMerzh/8165510 to your computer and use it in GitHub Desktop.
Patch for gcstar
package GCPlugins::GCfilms::GCAllocine;
###################################################
#
# Copyright 2005-2010 Christian Jodar
#
# This file is part of GCstar.
#
# GCstar is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# GCstar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCstar; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
###################################################
use strict;
use utf8;
use GCPlugins::GCfilms::GCfilmsCommon;
{
package GCPlugins::GCfilms::GCPluginAllocine;
use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase);
sub start
{
my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
$self->{inside}->{$tagname}++;
if ($self->{parsingList})
{
if ($self->{insideResults} eq 1)
{
if ( ($tagname eq "a")
&& ($attr->{href} =~ /^\/film\/fichefilm_gen_cfilm=/)
&& ($self->{isMovie} eq 0))
{
my $url = $attr->{href};
$self->{isMovie} = 1;
$self->{isInfo} = 0;
$self->{itemIdx}++;
$self->{itemsList}[ $self->{itemIdx} ]->{url} = $url;
}
elsif (($tagname eq "td") && ($self->{isMovie} eq 1))
{
$self->{isMovie} = 2;
}
elsif (($tagname eq "a") && ($self->{isMovie} eq 2))
{
$self->{isMovie} = 3;
}
elsif (($tagname eq "br") && ($self->{isMovie} eq 3))
{
$self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/^\s*//;
$self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/\s*$//;
$self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/\s+/ /g;
$self->{isMovie} = 4;
}
elsif (($tagname eq "span")
&& ($attr->{class} eq "fs11")
&& ($self->{isMovie} eq 4))
{
$self->{isInfo} = 1;
$self->{isMovie} = 0;
}
elsif (($tagname eq "br") && ($self->{isInfo} eq 1))
{
$self->{isInfo} = 2;
}
elsif (($tagname eq "br") && ($self->{isInfo} eq 2))
{
$self->{isInfo} = 3;
}
}
}
else
{
if (($tagname eq "div") && ($attr->{class} eq "poster"))
{
$self->{insidePicture} = 1;
}
elsif (($tagname eq "img") && ($self->{insidePicture} eq 1))
{
my $src = $attr->{src};
if (!$self->{curInfo}->{image})
{
if ($src =~ /r_160_240/)
{
$self->{curInfo}->{image} = $src;
}
else
{
$self->{curInfo}->{image} = "empty";
}
}
}
elsif ($tagname eq "h1")
{
$self->{insideTitle} = 1;
}
elsif (($tagname eq "span") && ($self->{insideDate} eq 1))
{
$self->{insideDate} = 2;
}
elsif (($tagname eq "span") && ($attr->{itemprop} eq "duration"))
{
$self->{insideTime} = 1;
}
elsif (($tagname eq "span") && ($self->{insideDirector} eq 1))
{
$self->{insideDirector} = 2;
}
elsif (($tagname eq "a") && ($self->{insideActor} eq 1))
{
$self->{insideActor} = 2;
}
elsif (($tagname eq "span") && ($self->{insideGenre} eq 1))
{
$self->{insideGenre} = 2;
}
elsif (($tagname eq "span") && ($self->{insideCountry} eq 1))
{
$self->{insideCountry} = 2;
}
elsif (($tagname eq "span") && ($attr->{class} eq "note") && ($self->{insidePressRating} eq 1))
{
$self->{insidePressRating} = 2;
}
elsif (($tagname eq "div") && ($attr->{class} eq "breaker"))
{
$self->{insidePressRating} = 0;
}
elsif (($tagname eq "h2") && $attr->{id} eq "synopsys_details")
{
$self->{synopsis_comming} = 1;
}
elsif ( ($self->{synopsis_comming} eq 1) && ($tagname eq "p") && ($attr->{itemprop} eq "description"))
{
$self->{insideSynopsis} = 1;
$self->{synopsis_comming} = 0;
}
elsif (($tagname eq "td") && ($self->{insideOriginal} eq 1))
{
$self->{insideOriginal} = 2;
}
}
}
sub end
{
my ($self, $tagname) = @_;
$self->{inside}->{$tagname}--;
if ($tagname eq "td")
{
$self->{insideDirector} = 0;
$self->{insideActor} = 0;
$self->{insideGenre} = 0;
}
elsif ($tagname eq "tr")
{
$self->{insideCountry} = 0;
}
elsif ($tagname eq "div")
{
$self->{insideSynopsis} = 0;
$self->{insideActor} = 0;
}
elsif ($tagname eq "th")
{
$self->{insideSynopsis} = 0;
}
elsif ($tagname eq "table")
{
$self->{insideResults} = 0;
}
}
sub text
{
my ($self, $origtext) = @_;
if ($self->{parsingList})
{
if (($origtext =~ m/(\d+) r..?sultats? trouv..?s? dans les titres de films/) && ($1 > 0))
{
$self->{insideResults} = 1;
}
if ($self->{isMovie} eq 3)
{
$self->{itemsList}[ $self->{itemIdx} ]->{title} .= $origtext;
}
if ($self->{isInfo} eq 1)
{
if ($origtext =~ /\s*([0-9]{4})/)
{
$self->{itemsList}[ $self->{itemIdx} ]->{date} = $1;
}
}
elsif ($self->{isInfo} eq 2)
{
if ($origtext =~ /^\s*de (.*)/)
{
$self->{itemsList}[ $self->{itemIdx} ]->{director} = $1;
}
}
elsif ($self->{isInfo} eq 3)
{
if ( ($origtext =~ m/^\s*avec (.*)/)
&& (!$self->{itemsList}[ $self->{itemIdx} ]->{actors}))
{
$self->{itemsList}[ $self->{itemIdx} ]->{actors} = $1;
}
$self->{isInfo} = 0;
}
}
else
{
my ($self, $origtext) = @_;
$origtext =~ s/[\r\n]//g;
$origtext =~ s/^\s*//;
$origtext =~ s/\s*$//;
if ($self->{insideTitle} eq 1)
{
$self->{curInfo}->{title} = $origtext;
$self->{insideTitle} = 0;
}
elsif ($self->{insideSynopsis} eq 1)
{
$self->{curInfo}->{synopsis} .= $origtext;
}
elsif (($self->{insideDate} eq 2) && (length($origtext) > 1))
{
$self->{curInfo}->{date} = $origtext
if !($origtext =~ /inconnu/);
$self->{insideDate} = 0;
}
elsif (($origtext =~ /^Date de sortie/)
&& (!$self->{curInfo}->{date}))
{
$self->{insideDate} = 1;
}
elsif (($origtext =~ /^Date de reprise/)
&& (!$self->{curInfo}->{date}))
{
$self->{insideDate} = 1;
}
elsif ($self->{insideTime} eq 1)
{
$origtext =~ /(\d+)h\s*(\d+)m/;
my $time = ($1*60) + $2;
$self->{curInfo}->{time} = $time." m.";
$self->{insideTime} = 0;
}
elsif ($self->{insideDirector} eq 2)
{
$origtext = ", " if $origtext =~ m/^,/;
$self->{curInfo}->{director} .= $origtext;
}
elsif ($origtext =~ /^R..?alis..? par/)
{
$self->{insideDirector} = 1;
}
elsif ($self->{insideActor} eq 2)
{
$origtext =~ s/plus//;
$origtext = "," if $origtext =~ m/^,/;
$self->{curInfo}->{actors} .= $origtext;
}
elsif ($origtext =~ /^Avec/)
{
$self->{insideActor} = 1;
}
elsif ($self->{insideGenre} eq 2)
{
$origtext = "," if $origtext =~ m/^,/;
$self->{curInfo}->{genre} .= $origtext;
}
elsif ($origtext =~ /^[\s\n]*Genre/)
{
$self->{insideGenre} = 1;
}
elsif ($self->{insideCountry} eq 2)
{
$origtext = "," if $origtext =~ m/^,/;
$self->{curInfo}->{country} .= $origtext;
}
elsif ($origtext =~ /Nationalité/)
{
$self->{insideCountry} = 1;
}
elsif ($origtext =~ /^Presse$/)
{
$self->{insidePressRating} = 1;
}
elsif ($self->{insidePressRating} eq 2)
{
$origtext =~ s/,/./;
$self->{curInfo}->{ratingpress} .= $origtext * 2;
}
elsif ($origtext =~ /^Interdit aux moins de (\d+) ans/)
{
$self->{curInfo}->{age} = $1;
}
elsif ($self->{insideOriginal} eq 2)
{
$self->{curInfo}->{original} = $origtext;
$self->{insideOriginal} = 0;
}
elsif ($origtext =~ /^Titre original/)
{
$self->{insideOriginal} = 1;
}
}
}
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = $class->SUPER::new();
$self->{hasField} = {
title => 1,
date => 1,
director => 1,
actors => 1,
};
$self->{isInfo} = 0;
$self->{isMovie} = 0;
$self->{insideResults} = 0;
$self->{curName} = undef;
$self->{curUrl} = undef;
$self->{actorsCounter} = 0;
bless($self, $class);
return $self;
}
sub preProcess
{
my ($self, $html) = @_;
return $html;
}
sub getSearchUrl
{
my ($self, $word) = @_;
# f=3 ?
# return "http://www.allocine.fr/recherche/?q=$word&f=3&rub=1";
return "http://www.allocine.fr/recherche/1/?q=$word";
}
sub getSearchCharset
{
my $self = shift;
# Need urls to be double character encoded
return "utf8";
}
sub getItemUrl
{
my ($self, $url) = @_;
return "http://www.allocine.fr" . $url;
}
sub getName
{
return "Allocine.fr";
}
sub getAuthor
{
return 'Tian';
}
sub getLang
{
return 'FR';
}
sub getCharset
{
# return "UTF-8"; # For 1.5.0 Win32
return "ISO-8859-1"; # For 1.5.0 Win32 with /lib/gcstar/GCPlugins/ ver.1.5.9svn
}
}
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment