From d1c29717fcff974e51d8a6bd83851a1607f3ffd9 Mon Sep 17 00:00:00 2001 From: kdx Date: Thu, 12 Jan 2023 18:00:44 +0100 Subject: scrap allocine to generate citebd screenings --- .gitignore | 3 ++ build.sh | 2 + cite.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scrap.sh | 5 ++ 4 files changed, 169 insertions(+) create mode 100644 .gitignore create mode 100755 build.sh create mode 100644 cite.c create mode 100755 scrap.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..85b937c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +seances.xml +cite +cite.html diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..d1fd2d4 --- /dev/null +++ b/build.sh @@ -0,0 +1,2 @@ +#!/bin/sh +gcc -Wall -Wextra -Wno-pointer-sign $(xml2-config --cflags --libs) -o cite cite.c diff --git a/cite.c b/cite.c new file mode 100644 index 0000000..7db3749 --- /dev/null +++ b/cite.c @@ -0,0 +1,159 @@ +#include +#include + +typedef struct { + int id; + char title[256]; + char og_title[256]; + char directors[1024]; + char actors[1024]; + char prod_year[32]; + char release_date[32]; + char duration[32]; + char main_genres[256]; + char nationality[256]; + char pitch[2048]; + char poster[256]; +} Movie; + +Movie movies[256] = {0}; + +static void print_style(void) +{ + puts(""); +} + +static void print_movie(const Movie *movie) +{ + if (movie->og_title[0] != '\0') + printf("

%s

\n", movie->og_title); + else + printf("

%s

\n", movie->title); + printf("\"affiche\"\n", + movie->poster, movie->poster); + printf("
    \n"); + printf("
  • réalisateurs : %s
  • \n", movie->directors); + printf("
  • acteurs : %s
  • \n", movie->actors); + printf("
  • année de production : %s
  • \n", movie->prod_year); + printf("
  • date de sortie : %s
  • \n", movie->release_date); + printf("
  • durée : %s
  • \n", movie->duration); + printf("
  • genres : %s
  • \n", movie->main_genres); + printf("
  • nationalité : %s
  • \n", movie->nationality); + //printf("synopsis : %s\n", movie->pitch); + printf("
\n"); + printf("\n"); +} + +static void xfree(const void *ptr) +{ + if (ptr != NULL) + xmlFree((void *)ptr); +} + +static int get_id(const xmlNode *node) +{ + const xmlChar *id = xmlGetProp(node, "id"); + if (id == NULL) + return 0; + const int v = atoi(id); + xfree(id); + return v; +} + +static const xmlNode *get_movie_node(const xmlNode *node) +{ + while (node != NULL && strcmp((const char *)node->name, "film") != 0) + node = node->children; + return node; +} + +static Movie *get_movie(int id) +{ + Movie *movie = movies; + while (movie->id != id && movie->id != 0) + movie += 1; + return movie; +} + +static int set_movie_base_fields(Movie *movie, const xmlNode *node) +{ + int err = 0; + const xmlChar *title = xmlGetProp(node, "titre"); + const xmlChar *og_title = xmlGetProp(node, "titreoriginal"); + const xmlChar *directors = xmlGetProp(node, "realisateurs"); + const xmlChar *actors = xmlGetProp(node, "acteurs"); + const xmlChar *prod_year = xmlGetProp(node, "anneeproduction"); + const xmlChar *release_date = xmlGetProp(node, "datesortie"); + const xmlChar *duration = xmlGetProp(node, "duree"); + const xmlChar *main_genres = xmlGetProp(node, "genreprincipal"); + const xmlChar *nationality = xmlGetProp(node, "nationalite"); + const xmlChar *pitch = xmlGetProp(node, "synopsis"); + const xmlChar *poster = xmlGetProp(node, "affichette"); + if (title == NULL || og_title == NULL || directors == NULL || + actors == NULL || prod_year == NULL || release_date == NULL || + duration == NULL || main_genres == NULL || nationality == NULL || + pitch == NULL || poster == NULL) { + err = 1; + goto set_fields_panic; + } + strncpy(movie->title, title, sizeof(movie->title)); + strncpy(movie->og_title, og_title, sizeof(movie->title)); + strncpy(movie->directors, directors, sizeof(movie->directors)); + strncpy(movie->actors, actors, sizeof(movie->actors)); + strncpy(movie->prod_year, prod_year, sizeof(movie->prod_year)); + strncpy(movie->release_date, release_date, sizeof(movie->release_date)); + strncpy(movie->duration, duration, sizeof(movie->duration)); + strncpy(movie->main_genres, main_genres, sizeof(movie->main_genres)); + strncpy(movie->nationality, nationality, sizeof(movie->nationality)); + strncpy(movie->pitch, pitch, sizeof(movie->pitch)); + strncpy(movie->poster, poster, sizeof(movie->poster)); +set_fields_panic: + xfree(title); + xfree(og_title); + xfree(directors); + xfree(actors); + xfree(prod_year); + xfree(release_date); + xfree(duration); + xfree(main_genres); + xfree(nationality); + xfree(pitch); + xfree(poster); + return err; +} + +int main(int argc, char **argv) +{ + if (argc != 2) + return 1; + xmlDoc *const document = xmlReadFile(argv[1], NULL, 0); + if (document == NULL) { + xmlCleanupParser(); + return 1; + } + xmlNode *const root = xmlDocGetRootElement(document); + if (root == NULL) { + xmlCleanupParser(); + xmlFreeDoc(document); + return 1; + } + for (const xmlNode *week = root->children; week != NULL; week = week->next) { + for (const xmlNode *mov = get_movie_node(week); mov != NULL; mov = mov->next) { + const int id = get_id(mov); + if (id == 0) + continue; + Movie *movie = get_movie(id); + if (movie->id == 0) { + movie->id = id; + set_movie_base_fields(movie, mov); + } + } + } + print_style(); + printf("

cinéma de la cité

\n"); + for (const Movie *movie = movies; movie->id != 0; movie++) + print_movie(movie); + xmlFreeDoc(document); + xmlCleanupParser(); + return 0; +} diff --git a/scrap.sh b/scrap.sh new file mode 100755 index 0000000..7a2943d --- /dev/null +++ b/scrap.sh @@ -0,0 +1,5 @@ +#!/bin/sh +./build.sh || exit 1 +curl -o seances.xml 'http://www.citebd.org/IMG/xml/allocineseances-4.xml' || exit 1 +./cite seances.xml >cite.html || exit 1 +firefox cite.html -- cgit v1.2.3