GCstar fonctionne à nouveau en modifiant le script amazon, voir
ici :
package GCPlugins::GCbooks::GCAmazon;
###################################################
#
# Copyright 2005-2009 Tian
#
# This file is part of GCstar.
#
# GCstar is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# GCstar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCstar; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
###################################################
use strict;
use utf8;
use GCPlugins::GCbooks::GCbooksCommon;
{
package GCPlugins::GCbooks::GCPluginAmazon;
use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
use XML::Simple;
use LWP::Simple qw($ua);
use Encode;
use HTML::Entities;
use GCUtils;
sub start
{
my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
$self->{inside}->{$tagname}++;
if ($self->{parsingList})
{
# Identify beginning of comments
if (($self->{isComment} == 0) && ($tagname eq 'varkcomment'))
{
$self->{isComment} = 1 ;
}
# Capture URL of book
if (($self->{isComment} == 0) && ($self->{isUrl} == 1) && ($tagname eq 'a'))
{
$self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
$self->{isUrl} = 0 ;
$self->{isTitle} = 1 ;
return;
}
# Identify beginning of new book (next text is title)
if (($self->{isComment} == 0) && ($tagname eq 'li') && ($attr->{id} =~ /result_[0-9]+/ ))
{
# Create new entry
$self->{itemIdx}++;
$self->{isUrl} = 1 ;
$self->{isAuthor} = 0 ;
return ;
}
# Identify end of authors list
if (($self->{isComment} == 0) && ($tagname eq 'varkendauthors') && ($self->{isAuthor} != 0))
{
$self->{isAuthor} = 0 ;
return ;
}
}
else
{
# Detection of book themes
if (($self->{isTheme} == 0) && ($tagname eq 'varkgenre'))
{
$self->{isTheme} = 1 ;
return ;
}
# Detection of book page count
if (($self->{isPage} == 0) && ($tagname eq 'varkdata'))
{
$self->{isPage} = 1 ;
return ;
}
# Detection of authors
if ($tagname eq 'varkauthor')
{
$self->{isAuthor} = 1;
return ;
}
# Capture of image
if ($tagname eq 'varkimage')
{
$attr->{adress} =~ /http.*?\.jpg/;
$attr->{adress} =~ s|https://images-na.ssl-images-amazon.com/images/I/|http://z2-ec2.images-amazon.com/images/I/|;
$self->{curInfo}->{cover} = $attr->{adress};
return ;
}
# Detection of book description
if (($self->{isDescription} == 0) && ($tagname eq 'varkdescription'))
{
$self->{isDescription} = 1 ;
return ;
}
if (($self->{isDescription} == 1) && ($tagname eq 'div'))
{
$self->{isDescription} = 2 ;
return ;
}
# Detection title
if (($self->{isTitle} == 0) && ($tagname eq 'varktitle'))
{
$self->{isTitle} = 2 ;
return ;
}
}
}
sub end
{
my ($self, $tagname) = @_;
$self->{inside}->{$tagname}--;
if ($self->{parsingList})
{
# Identify end of comments
if (($self->{isComment} == 1) && ($tagname eq 'varkcomment'))
{
$self->{isComment} = 0 ;
}
}
else
{
# Finishing themes analysis
if (($self->{isTheme} != 0) && ($tagname eq 'li'))
{
$self->{isTheme} = 0 ;
return ;
}
# Finishing description analysis
if (($self->{isDescription} != 0) && ($tagname eq 'div'))
{
$self->{isDescription} = 0 ;
return ;
}
}
}
sub text
{
my ($self, $origtext) = @_;
if ($self->{parsingList})
{
# Remove blanks before and after string
$origtext =~ s/^\s+//;
$origtext =~ s/\s+$//g;
# Capture of book title
if (($self->{isComment} == 0) && ($self->{isTitle} == 1) && ($origtext ne ''))
{
$self->{itemsList}[$self->{itemIdx}]->{title} = $origtext;
$self->{isTitle} = 0 ;
$self->{isPublication} = 1 ;
return ;
}
# Capture of book publication date
if (($self->{isComment} == 0) && ($self->{isPublication} == 1) && ($origtext ne ''))
{
$self->{itemsList}[$self->{itemIdx}]->{publication} = $origtext;
$self->{isAuthor} = 1 ;
$self->{isPublication} = 0 ;
return ;
}
# Avoid a text area before the first author
if (($self->{isComment} == 0) && ($self->{isAuthor} == 1) && ($origtext ne ''))
{
$self->{isAuthor} = 2 ;
return ;
}
# Capture of authors
if (($self->{isComment} == 0) && ($self->{isAuthor} == 2) && ($origtext ne ''))
{
if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '')
{
$self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext;
}
else
{
$self->{itemsList}[$self->{itemIdx}]->{authors} .= " " . $origtext;
}
return;
}
}
else
{
# Remove blanks before and after string
$origtext =~ s/^\s+//;
$origtext =~ s/\s+$//g;
# Capture of title
if (($self->{isTitle} == 2) && ($origtext ne ''))
{
$self->{isTitle} = 0 ;
$self->{curInfo}->{title} = $origtext;
return ;
}
# Capture of page number
if (($self->{isPage} == 1) && ($origtext =~ /^[0-9]+/))
{
$self->{curInfo}->{pages} = $origtext;
$self->{isPage} = 0 ;
return ;
}
# Capture of editor and publication date
if (($self->{isEditor} == 0) && ($origtext eq $self->getTranslation(1)))
{
$self->{isEditor} = 1 ;
return ;
}
if (($self->{isEditor} == 1) && ($origtext ne ''))
{
my @array = split('\(',$origtext);
$array[1] =~ s/\)//g;
$array[0] =~ s/^\s+//;
$array[0] =~ s/\s+$//g;
$array[0] =~ s/\;//g;
$array[1] =~ s/^\s+//;
$array[1] =~ s/\s+$//g;
$self->{curInfo}->{publisher} = $array[0];
$self->{curInfo}->{publication} = $array[1];
$self->{isEditor} = 0 ;
return ;
}
# Capture of language
if (($self->{isLanguage} == 0) && ($origtext eq $self->getTranslation(2)))
{
$self->{isLanguage} = 1 ;
return ;
}
if (($self->{isLanguage} == 1) && ($origtext ne ''))
{
$self->{curInfo}->{language} = $origtext;
$self->{isLanguage} = 0 ;
return ;
}
# Capture of ISBN
if (($self->{isISBN} == 0) && ($origtext eq $self->getTranslation(3)))
{
$self->{isISBN} =1 ;
return ;
}
if (($self->{isISBN} == 1) && ($origtext ne ''))
{
$origtext =~ s|-||gi;
$self->{curInfo}->{isbn} = $origtext;
$self->{isISBN} = 0 ;
return ;
}
# Capture of book dimensions
if (($self->{isSize} == 0) && ($origtext eq $self->getTranslation(4)))
{
$self->{isSize} = 1 ;
return ;
}
if (($self->{isSize} == 1) && ($origtext ne ''))
{
$self->{curInfo}->{format} = $origtext;
$self->{isSize} = 0 ;
return ;
}
# Detection of themes
if (($origtext eq '>') && ($self->{isTheme} == 1))
{
$self->{isTheme} = 2 ;
return ;
}
# Capture of themes
if (($self->{isTheme} == 2) && ($origtext ne ''))
{
if ($self->{curInfo}->{genre} eq '')
{
$self->{curInfo}->{genre} = $origtext;
}
else
{
$self->{curInfo}->{genre} .= ", " . $origtext;
}
$self->{isTheme} = 1 ;
return;
}
# Capture of authors
if (($self->{isAuthor} == 1) && ($origtext ne '') && ($origtext =~ /^(?:(?!Ajax).)*$/))
{
# Lower case for author names, except for first letters
$origtext =~ s/([[:alpha:]]+)/ucfirst(lc $1)/egi;
if ($self->{curInfo}->{authors} eq '')
{
$self->{curInfo}->{authors} = $origtext;
}
else
{
$self->{curInfo}->{authors} .= ", " . $origtext;
}
$self->{isAuthor} = 0 ;
return;
}
# Capture of description
if (($self->{isDescription} == 2) && ($origtext ne ''))
{
if ($self->{curInfo}->{description} eq '')
{
$self->{curInfo}->{description} = $origtext;
}
else
{
$self->{curInfo}->{description} .= $origtext;
}
return ;
}
}
}
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = $class->SUPER::new();
bless ($self, $class);
$self->{hasField} = {
title => 1,
authors => 1,
publication => 1,
format => 0,
edition => 0,
};
$self->{isComment} = 0;
$self->{isUrl} = 0;
$self->{isTitle} = 0;
$self->{isPublication} = 0;
$self->{isAuthor} = 0;
$self->{isPage} = 0;
$self->{isEditor} = 0;
$self->{isISBN} = 0;
$self->{isDescription} = 0;
$self->{isLanguage} = 0 ;
$self->{isTheme} = 0 ;
return $self;
}
sub getItemUrl
{
my ($self, $url) = @_;
return $url;
}
sub preProcess
{
my ($self, $html) = @_;
if ($self->{parsingList})
{
# Analysis of results must be disabled during comments
$html =~ s|<!--|<varkcomment>|gi;
$html =~ s|-->|</varkcomment>|gi;
# Remove other commercial offers
$html =~ s|END SPONSORED LINKS SCRIPT.*||s;
# End of authors listing detection
$html =~ s|</span></div></div><div class="a-row"><div class="a-column a-span7"><div class="a-row a-spacing-none">|<varkendauthors>|gi;
$html =~ s|<h3 class="a-size-small a-color-null s-inline a-text-normal">|<varkendauthors>|gi;
$html =~ s|<div class="a-row a-spacing-mini">|<varkendauthors>|gi;
}
else
{
# Beginning of book data : pages, editor, publication date, ISBN, dimensions
$html =~ s|<td class="bucket">|<varkdata>|gi;
# Beginning and end of book description
$html =~ s|<script id="bookDesc_override_CSS" type="text/undefined">|<varkdescription>|;
#$html =~ s|<div id="bookDesc_outer_postBodyPS" style="overflow: hidden; z-index: 1; height: 0px; display: block;">|</varkdescription>|;
# Beginning of book title
$html =~ s|<div id="booksTitle" class="feature" data-feature-name="booksTitle">|<varktitle>|gi;
# Beginning of book themes
$html =~ s|<ul class="zg_hrsr">|<varkgenre>|gi;
# Beginning of authors
$html =~ s|<span class="author notFaded" data-width="">|<varkauthor>|gi;
# Beginning of image
$html =~ s|class="a-dynamic-image image-stretch-vertical frontImage" id="imgBlkFront" data-a-dynamic-image="{"|><varkimage adress="|;
$html =~ s|<BR>||gi;
$html =~ s|<I>||gi;
$html =~ s|</I>||gi;
$html =~ s|\x{8C}|OE|gi;
$html =~ s|\x{9C}|oe|gi;
$html =~ s|’|'|gi;
}
return $html;
}
sub getSearchUrl
{
my ($self, $word) = @_;
return 'http://' . $self->baseWWWamazonUrl . '/s/ref=nb_sb_noss_1?url=search-alias=stripbooks&field-keywords=' . "$word";
}
sub baseWWWamazonUrl
{
return "www.amazon.com";
}
sub getName
{
return "Amazon (US)";
}
sub getAuthor
{
return 'Varkolak';
}
sub getLang
{
return 'EN';
}
sub getCharset
{
my $self = shift;
return "ISO-8859-15";
}
sub getSearchFieldsArray
{
return ['title', 'authors', 'isbn'];
}
# Used to get the local translation of editor, language, ISBN, product dimension, series
sub getTranslation
{
my $param = $_[1];
if ($param == 1)
{
return 'Publisher:';
}
elsif ($param == 2)
{
return 'Language:';
}
elsif ($param == 3)
{
return 'ISBN-13:';
}
elsif ($param == 4)
{
return 'Product Dimensions:';
}
elsif ($param == 5)
{
return 'Series:';
}
}
}
1;
A placer dans :
/usr/share/gcstar/lib/GCPlugins/GCbooks/GCAmazon.pm
/usr/share/gcstar/lib/GCPlugins/GCfilms/GCAmazon.pm
/usr/share/gcstar/lib/GCPlugins/GCgames/GCAmazon.pm
Dernière modification par kawer (07-01-2017 18:19:28)
Songez à ce que vous avez été, à ce que vous êtes, et à ce que vous serez un jour, et vous deviendrez humbles. Saint Vincent de Paul
La maladie la plus constante et la plus mortelle, mais aussi la plus méconnue de toute société, est l’indifférence. Abbé Pierre
Carpe diem, quam minimum credula postero. Horace