GCstar fonctionne à nouveau en modifiant le script amazon, voir
ici :
package GCPlugins::GCbooks::GCAmazon;
###################################################
#
# Copyright 2005-2009 Tian
#
# This file is part of GCstar.
#
# GCstar is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# GCstar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCstar; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
###################################################
use strict;
use utf8;
use GCPlugins::GCbooks::GCbooksCommon;
{
package GCPlugins::GCbooks::GCPluginAmazon;
use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
use XML::Simple;
use LWP::Simple qw($ua);
use Encode;
use HTML::Entities;
use GCUtils;
sub start
{
my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
$self->{inside}->{$tagname}++;
if ($self->{parsingList})
{
# Identify beginning of comments
if (($self->{isComment} == 0) && ($tagname eq 'varkcomment'))
{
$self->{isComment} = 1 ;
}
# Capture URL of book
if (($self->{isComment} == 0) && ($self->{isUrl} == 1) && ($tagname eq 'a'))
{
$self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
$self->{isUrl} = 0 ;
$self->{isTitle} = 1 ;
return;
}
# Identify beginning of new book (next text is title)
if (($self->{isComment} == 0) && ($tagname eq 'li') && ($attr->{id} =~ /result_[0-9]+/ ))
{
# Create new entry
$self->{itemIdx}++;
$self->{isUrl} = 1 ;
$self->{isAuthor} = 0 ;
return ;
}
# Identify end of authors list
if (($self->{isComment} == 0) && ($tagname eq 'varkendauthors') && ($self->{isAuthor} != 0))
{
$self->{isAuthor} = 0 ;
return ;
}
}
else
{
# Detection of book themes
if (($self->{isTheme} == 0) && ($tagname eq 'varkgenre'))
{
$self->{isTheme} = 1 ;
return ;
}
# Detection of book page count
if (($self->{isPage} == 0) && ($tagname eq 'varkdata'))
{
$self->{isPage} = 1 ;
return ;
}
# Detection of authors
if ($tagname eq 'varkauthor')
{
$self->{isAuthor} = 1;
return ;
}
# Capture of image
if ($tagname eq 'varkimage')
{
$attr->{adress} =~ /http.*?\.jpg/;
$attr->{adress} =~ s|https://images-na.ssl-images-amazon.com/images/I/|http://z2-ec2.images-amazon.com/images/I/|;
$self->{curInfo}->{cover} = $attr->{adress};
return ;
}
# Detection of book description
if (($self->{isDescription} == 0) && ($tagname eq 'varkdescription'))
{
$self->{isDescription} = 1 ;
return ;
}
if (($self->{isDescription} == 1) && ($tagname eq 'div'))
{
$self->{isDescription} = 2 ;
return ;
}
# Detection title
if (($self->{isTitle} == 0) && ($tagname eq 'varktitle'))
{
$self->{isTitle} = 2 ;
return ;
}
}
}
sub end
{
my ($self, $tagname) = @_;
$self->{inside}->{$tagname}--;
if ($self->{parsingList})
{
# Identify end of comments
if (($self->{isComment} == 1) && ($tagname eq 'varkcomment'))
{
$self->{isComment} = 0 ;
}
}
else
{
# Finishing themes analysis
if (($self->{isTheme} != 0) && ($tagname eq 'li'))
{
$self->{isTheme} = 0 ;
return ;
}
# Finishing description analysis
if (($self->{isDescription} != 0) && ($tagname eq 'div'))
{
$self->{isDescription} = 0 ;
return ;
}
}
}
sub text
{
my ($self, $origtext) = @_;
if ($self->{parsingList})
{
# Remove blanks before and after string
$origtext =~ s/^\s+//;
$origtext =~ s/\s+$//g;
# Capture of book title
if (($self->{isComment} == 0) && ($self->{isTitle} == 1) && ($origtext ne ''))
{
$self->{itemsList}[$self->{itemIdx}]->{title} = $origtext;
$self->{isTitle} = 0 ;
$self->{isPublication} = 1 ;
return ;
}
# Capture of book publication date
if (($self->{isComment} == 0) && ($self->{isPublication} == 1) && ($origtext ne ''))
{
$self->{itemsList}[$self->{itemIdx}]->{publication} = $origtext;
$self->{isAuthor} = 1 ;
$self->{isPublication} = 0 ;
return ;
}
# Avoid a text area before the first author
if (($self->{isComment} == 0) && ($self->{isAuthor} == 1) && ($origtext ne ''))
{
$self->{isAuthor} = 2 ;
return ;
}
# Capture of authors
if (($self->{isComment} == 0) && ($self->{isAuthor} == 2) && ($origtext ne ''))
{
if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '')
{
$self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext;
}
else
{
$self->{itemsList}[$self->{itemIdx}]->{authors} .= " " . $origtext;
}
return;
}
}
else
{
# Remove blanks before and after string
$origtext =~ s/^\s+//;
$origtext =~ s/\s+$//g;
# Capture of title
if (($self->{isTitle} == 2) && ($origtext ne ''))
{
$self->{isTitle} = 0 ;
$self->{curInfo}->{title} = $origtext;
return ;
}
# Capture of page number
if (($self->{isPage} == 1) && ($origtext =~ /^[0-9]+/))
{
$self->{curInfo}->{pages} = $origtext;
$self->{isPage} = 0 ;
return ;
}
# Capture of editor and publication date
if (($self->{isEditor} == 0) && ($origtext eq $self->getTranslation(1)))
{
$self->{isEditor} = 1 ;
return ;
}
if (($self->{isEditor} == 1) && ($origtext ne ''))
{
my @array = split('\(',$origtext);
$array[1] =~ s/\)//g;
$array[0] =~ s/^\s+//;
$array[0] =~ s/\s+$//g;
$array[0] =~ s/\;//g;
$array[1] =~ s/^\s+//;
$array[1] =~ s/\s+$//g;
$self->{curInfo}->{publisher} = $array[0];
$self->{curInfo}->{publication} = $array[1];
$self->{isEditor} = 0 ;
return ;
}
# Capture of language
if (($self->{isLanguage} == 0) && ($origtext eq $self->getTranslation(2)))
{
$self->{isLanguage} = 1 ;
return ;
}
if (($self->{isLanguage} == 1) && ($origtext ne ''))
{
$self->{curInfo}->{language} = $origtext;
$self->{isLanguage} = 0 ;
return ;
}
# Capture of ISBN
if (($self->{isISBN} == 0) && ($origtext eq $self->getTranslation(3)))
{
$self->{isISBN} =1 ;
return ;
}
if (($self->{isISBN} == 1) && ($origtext ne ''))
{
$origtext =~ s|-||gi;
$self->{curInfo}->{isbn} = $origtext;
$self->{isISBN} = 0 ;
return ;
}
# Capture of book dimensions
if (($self->{isSize} == 0) && ($origtext eq $self->getTranslation(4)))
{
$self->{isSize} = 1 ;
return ;
}
if (($self->{isSize} == 1) && ($origtext ne ''))
{
$self->{curInfo}->{format} = $origtext;
$self->{isSize} = 0 ;
return ;
}
# Detection of themes
if (($origtext eq '>') && ($self->{isTheme} == 1))
{
$self->{isTheme} = 2 ;
return ;
}
# Capture of themes
if (($self->{isTheme} == 2) && ($origtext ne ''))
{
if ($self->{curInfo}->{genre} eq '')
{
$self->{curInfo}->{genre} = $origtext;
}
else
{
$self->{curInfo}->{genre} .= ", " . $origtext;
}
$self->{isTheme} = 1 ;
return;
}
# Capture of authors
if (($self->{isAuthor} == 1) && ($origtext ne '') && ($origtext =~ /^(?:(?!Ajax).)*$/))
{
# Lower case for author names, except for first letters
$origtext =~ s/([[:alpha:]]+)/ucfirst(lc $1)/egi;
if ($self->{curInfo}->{authors} eq '')
{
$self->{curInfo}->{authors} = $origtext;
}
else
{
$self->{curInfo}->{authors} .= ", " . $origtext;
}
$self->{isAuthor} = 0 ;
return;
}
# Capture of description
if (($self->{isDescription} == 2) && ($origtext ne ''))
{
if ($self->{curInfo}->{description} eq '')
{
$self->{curInfo}->{description} = $origtext;
}
else
{
$self->{curInfo}->{description} .= $origtext;
}
return ;
}
}
}
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = $class->SUPER::new();
bless ($self, $class);
$self->{hasField} = {
title => 1,
authors => 1,
publication => 1,
format => 0,
edition => 0,
};
$self->{isComment} = 0;
$self->{isUrl} = 0;
$self->{isTitle} = 0;
$self->{isPublication} = 0;
$self->{isAuthor} = 0;
$self->{isPage} = 0;
$self->{isEditor} = 0;
$self->{isISBN} = 0;
$self->{isDescription} = 0;
$self->{isLanguage} = 0 ;
$self->{isTheme} = 0 ;
return $self;
}
sub getItemUrl
{
my ($self, $url) = @_;
return $url;
}
sub preProcess
{
my ($self, $html) = @_;
if ($self->{parsingList})
{
# Analysis of results must be disabled during comments
$html =~ s|<!--|<varkcomment>|gi;
$html =~ s|-->|</varkcomment>|gi;
# Remove other commercial offers
$html =~ s|END SPONSORED LINKS SCRIPT.*||s;
# End of authors listing detection
$html =~ s|</span></div></div><div class="a-row"><div class="a-column a-span7"><div class="a-row a-spacing-none">|<varkendauthors>|gi;
$html =~ s|<h3 class="a-size-small a-color-null s-inline a-text-normal">|<varkendauthors>|gi;
$html =~ s|<div class="a-row a-spacing-mini">|<varkendauthors>|gi;
}
else
{
# Beginning of book data : pages, editor, publication date, ISBN, dimensions
$html =~ s|<td class="bucket">|<varkdata>|gi;
# Beginning and end of book description
$html =~ s|<script id="bookDesc_override_CSS" type="text/undefined">|<varkdescription>|;
#$html =~ s|<div id="bookDesc_outer_postBodyPS" style="overflow: hidden; z-index: 1; height: 0px; display: block;">|</varkdescription>|;
# Beginning of book title
$html =~ s|<div id="booksTitle" class="feature" data-feature-name="booksTitle">|<varktitle>|gi;
# Beginning of book themes
$html =~ s|<ul class="zg_hrsr">|<varkgenre>|gi;
# Beginning of authors
$html =~ s|<span class="author notFaded" data-width="">|<varkauthor>|gi;
# Beginning of image
$html =~ s|class="a-dynamic-image image-stretch-vertical frontImage" id="imgBlkFront" data-a-dynamic-image="{"|><varkimage adress="|;
$html =~ s|<BR>||gi;
$html =~ s|<I>||gi;
$html =~ s|</I>||gi;
$html =~ s|\x{8C}|OE|gi;
$html =~ s|\x{9C}|oe|gi;
$html =~ s|’|'|gi;
}
return $html;
}
sub getSearchUrl
{
my ($self, $word) = @_;
return 'http://' . $self->baseWWWamazonUrl . '/s/ref=nb_sb_noss_1?url=search-alias=stripbooks&field-keywords=' . "$word";
}
sub baseWWWamazonUrl
{
return "www.amazon.com";
}
sub getName
{
return "Amazon (US)";
}
sub getAuthor
{
return 'Varkolak';
}
sub getLang
{
return 'EN';
}
sub getCharset
{
my $self = shift;
return "ISO-8859-15";
}
sub getSearchFieldsArray
{
return ['title', 'authors', 'isbn'];
}
# Used to get the local translation of editor, language, ISBN, product dimension, series
sub getTranslation
{
my $param = $_[1];
if ($param == 1)
{
return 'Publisher:';
}
elsif ($param == 2)
{
return 'Language:';
}
elsif ($param == 3)
{
return 'ISBN-13:';
}
elsif ($param == 4)
{
return 'Product Dimensions:';
}
elsif ($param == 5)
{
return 'Series:';
}
}
}
1;
A placer dans :
/usr/share/gcstar/lib/GCPlugins/GCbooks/GCAmazon.pm
/usr/share/gcstar/lib/GCPlugins/GCfilms/GCAmazon.pm
/usr/share/gcstar/lib/GCPlugins/GCgames/GCAmazon.pm
Dernière modification par kawer (07-01-2017 18:19:28)
ThinkPad T530 - Debian - CoreBoot