From 1684f585b61073f5fb13126868c4a6b2298c18d0 Mon Sep 17 00:00:00 2001 From: kdx Date: Fri, 13 Jan 2023 04:03:40 +0100 Subject: scrap movie posters --- .gitignore | 1 + build.sh | 4 +++- cite.c | 34 +++++++++++++++++++++++++++++++++- scrap.sh | 8 ++++---- 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index b15486f..cba224e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ seances.xml +scrap cite cite.html citenofr.html diff --git a/build.sh b/build.sh index 983c5fd..772a236 100755 --- a/build.sh +++ b/build.sh @@ -1,2 +1,4 @@ #!/bin/sh -gcc -g -Wall -Wextra -Wno-pointer-sign $(xml2-config --cflags --libs) -o cite cite.c +gcc -Wall -Wextra -Wno-pointer-sign \ + -lcurl $(xml2-config --cflags --libs) \ + -o scrap cite.c diff --git a/cite.c b/cite.c index 8feb067..12545c5 100644 --- a/cite.c +++ b/cite.c @@ -1,3 +1,4 @@ +#include #include #include @@ -18,6 +19,7 @@ typedef struct { } Movie; Movie movies[64] = {0}; +CURL *curl = NULL; static void print_style(void) { @@ -58,6 +60,27 @@ static void print_movie(Movie *movie, int hide_fr) printf("\n"); } +static void download_poster(Movie *movie) +{ + char out_path[512]; + char *last_slash = movie->poster; + while (strchr(last_slash, '/') != NULL) + last_slash = strchr(last_slash, '/') + 1; + strcpy(out_path, "cite/"); + strcat(out_path, last_slash); + FILE *const fp = fopen(out_path, "wb"); + if (fp == NULL) + return; + curl_easy_setopt(curl, CURLOPT_URL, movie->poster); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp); + fprintf(stderr, "getting %s", out_path); + fprintf(stderr, "\rgot %s \n", out_path); + const CURLcode res = curl_easy_perform(curl); + if (res == CURLE_OK) + strcpy(movie->poster, last_slash); +} + static void xfree(const void *ptr) { if (ptr != NULL) @@ -170,6 +193,12 @@ int main(int argc, char **argv) xmlFreeDoc(document); return 1; } + curl = curl_easy_init(); + if (curl == NULL) { + xmlCleanupParser(); + xmlFreeDoc(document); + return 1; + } for (const xmlNode *week = root->children; week != NULL; week = week->next) { for (const xmlNode *mov = get_movie_node(week); mov != NULL; mov = mov->next) { const int id = get_id(mov); @@ -185,8 +214,11 @@ int main(int argc, char **argv) } print_style(); printf("

cinéma de la cité

\n"); - for (Movie *movie = movies; movie->id != 0; movie++) + for (Movie *movie = movies; movie->id != 0; movie++) { + download_poster(movie); print_movie(movie, atoi(argv[2])); + } + curl_easy_cleanup(curl); xmlFreeDoc(document); xmlCleanupParser(); return 0; diff --git a/scrap.sh b/scrap.sh index bb4862a..9228443 100755 --- a/scrap.sh +++ b/scrap.sh @@ -1,7 +1,7 @@ #!/bin/sh ./build.sh || exit 1 curl -o seances.xml 'http://www.citebd.org/IMG/xml/allocineseances-4.xml' || exit 1 -./cite seances.xml 0 >cite.html || exit 1 -scp cite.html root@kdx.re:/var/www/html -./cite seances.xml 1 >citenofr.html || exit 1 -scp citenofr.html root@kdx.re:/var/www/html +mkdir -p cite +./scrap seances.xml 0 >cite/index.html || exit 1 +./scrap seances.xml 1 >cite/nofr.html || exit 1 +rsync -rvu --delete cite root@kdx.re:/var/www/html -- cgit v1.2.3