summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkdx <kikoodx@paranoici.org>2023-01-12 18:00:44 +0100
committerkdx <kikoodx@paranoici.org>2023-01-12 18:00:44 +0100
commitd1c29717fcff974e51d8a6bd83851a1607f3ffd9 (patch)
tree0b635ef237bebb17c081a03c7eca14b54fe91307
downloadcite-scrapper-d1c29717fcff974e51d8a6bd83851a1607f3ffd9.tar.gz
scrap allocine to generate citebd screenings
-rw-r--r--.gitignore3
-rwxr-xr-xbuild.sh2
-rw-r--r--cite.c159
-rwxr-xr-xscrap.sh5
4 files changed, 169 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..85b937c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+seances.xml
+cite
+cite.html
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..d1fd2d4
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+gcc -Wall -Wextra -Wno-pointer-sign $(xml2-config --cflags --libs) -o cite cite.c
diff --git a/cite.c b/cite.c
new file mode 100644
index 0000000..7db3749
--- /dev/null
+++ b/cite.c
@@ -0,0 +1,159 @@
+#include <libxml/parser.h>
+#include <string.h>
+
+typedef struct {
+ int id;
+ char title[256];
+ char og_title[256];
+ char directors[1024];
+ char actors[1024];
+ char prod_year[32];
+ char release_date[32];
+ char duration[32];
+ char main_genres[256];
+ char nationality[256];
+ char pitch[2048];
+ char poster[256];
+} Movie;
+
+Movie movies[256] = {0};
+
+static void print_style(void)
+{
+ puts("<link rel='stylesheet' type='text/css' href='https://kdx.re/theme.css'/>");
+}
+
+static void print_movie(const Movie *movie)
+{
+ if (movie->og_title[0] != '\0')
+ printf("<h2>%s</h2>\n", movie->og_title);
+ else
+ printf("<h2>%s</h2>\n", movie->title);
+ printf("<a href=\"%s\"><img src=\"%s\" alt=\"affiche\" /></a>\n",
+ movie->poster, movie->poster);
+ printf("<ul>\n");
+ printf("<li>réalisateurs : %s</li>\n", movie->directors);
+ printf("<li>acteurs : %s</li>\n", movie->actors);
+ printf("<li>année de production : %s</li>\n", movie->prod_year);
+ printf("<li>date de sortie : %s</li>\n", movie->release_date);
+ printf("<li>durée : %s</li>\n", movie->duration);
+ printf("<li>genres : %s</li>\n", movie->main_genres);
+ printf("<li>nationalité : %s</li>\n", movie->nationality);
+ //printf("synopsis : %s\n", movie->pitch);
+ printf("</ul>\n");
+ printf("\n");
+}
+
+static void xfree(const void *ptr)
+{
+ if (ptr != NULL)
+ xmlFree((void *)ptr);
+}
+
+static int get_id(const xmlNode *node)
+{
+ const xmlChar *id = xmlGetProp(node, "id");
+ if (id == NULL)
+ return 0;
+ const int v = atoi(id);
+ xfree(id);
+ return v;
+}
+
+static const xmlNode *get_movie_node(const xmlNode *node)
+{
+ while (node != NULL && strcmp((const char *)node->name, "film") != 0)
+ node = node->children;
+ return node;
+}
+
+static Movie *get_movie(int id)
+{
+ Movie *movie = movies;
+ while (movie->id != id && movie->id != 0)
+ movie += 1;
+ return movie;
+}
+
+static int set_movie_base_fields(Movie *movie, const xmlNode *node)
+{
+ int err = 0;
+ const xmlChar *title = xmlGetProp(node, "titre");
+ const xmlChar *og_title = xmlGetProp(node, "titreoriginal");
+ const xmlChar *directors = xmlGetProp(node, "realisateurs");
+ const xmlChar *actors = xmlGetProp(node, "acteurs");
+ const xmlChar *prod_year = xmlGetProp(node, "anneeproduction");
+ const xmlChar *release_date = xmlGetProp(node, "datesortie");
+ const xmlChar *duration = xmlGetProp(node, "duree");
+ const xmlChar *main_genres = xmlGetProp(node, "genreprincipal");
+ const xmlChar *nationality = xmlGetProp(node, "nationalite");
+ const xmlChar *pitch = xmlGetProp(node, "synopsis");
+ const xmlChar *poster = xmlGetProp(node, "affichette");
+ if (title == NULL || og_title == NULL || directors == NULL ||
+ actors == NULL || prod_year == NULL || release_date == NULL ||
+ duration == NULL || main_genres == NULL || nationality == NULL ||
+ pitch == NULL || poster == NULL) {
+ err = 1;
+ goto set_fields_panic;
+ }
+ strncpy(movie->title, title, sizeof(movie->title));
+ strncpy(movie->og_title, og_title, sizeof(movie->title));
+ strncpy(movie->directors, directors, sizeof(movie->directors));
+ strncpy(movie->actors, actors, sizeof(movie->actors));
+ strncpy(movie->prod_year, prod_year, sizeof(movie->prod_year));
+ strncpy(movie->release_date, release_date, sizeof(movie->release_date));
+ strncpy(movie->duration, duration, sizeof(movie->duration));
+ strncpy(movie->main_genres, main_genres, sizeof(movie->main_genres));
+ strncpy(movie->nationality, nationality, sizeof(movie->nationality));
+ strncpy(movie->pitch, pitch, sizeof(movie->pitch));
+ strncpy(movie->poster, poster, sizeof(movie->poster));
+set_fields_panic:
+ xfree(title);
+ xfree(og_title);
+ xfree(directors);
+ xfree(actors);
+ xfree(prod_year);
+ xfree(release_date);
+ xfree(duration);
+ xfree(main_genres);
+ xfree(nationality);
+ xfree(pitch);
+ xfree(poster);
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ if (argc != 2)
+ return 1;
+ xmlDoc *const document = xmlReadFile(argv[1], NULL, 0);
+ if (document == NULL) {
+ xmlCleanupParser();
+ return 1;
+ }
+ xmlNode *const root = xmlDocGetRootElement(document);
+ if (root == NULL) {
+ xmlCleanupParser();
+ xmlFreeDoc(document);
+ return 1;
+ }
+ for (const xmlNode *week = root->children; week != NULL; week = week->next) {
+ for (const xmlNode *mov = get_movie_node(week); mov != NULL; mov = mov->next) {
+ const int id = get_id(mov);
+ if (id == 0)
+ continue;
+ Movie *movie = get_movie(id);
+ if (movie->id == 0) {
+ movie->id = id;
+ set_movie_base_fields(movie, mov);
+ }
+ }
+ }
+ print_style();
+ printf("<h1>cinéma de la cité</h1>\n");
+ for (const Movie *movie = movies; movie->id != 0; movie++)
+ print_movie(movie);
+ xmlFreeDoc(document);
+ xmlCleanupParser();
+ return 0;
+}
diff --git a/scrap.sh b/scrap.sh
new file mode 100755
index 0000000..7a2943d
--- /dev/null
+++ b/scrap.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+./build.sh || exit 1
+curl -o seances.xml 'http://www.citebd.org/IMG/xml/allocineseances-4.xml' || exit 1
+./cite seances.xml >cite.html || exit 1
+firefox cite.html