diff options
author | kdx <kikoodx@paranoici.org> | 2023-01-13 04:03:40 +0100 |
---|---|---|
committer | kdx <kikoodx@paranoici.org> | 2023-01-13 04:03:40 +0100 |
commit | 1684f585b61073f5fb13126868c4a6b2298c18d0 (patch) | |
tree | 1674877bcaea29f444aee3697ee43f6e23e8cd45 | |
parent | f9ca781f1222c95272b7f380460cf093821b5bee (diff) | |
download | cite-scrapper-1684f585b61073f5fb13126868c4a6b2298c18d0.tar.gz |
scrap movie posters
-rw-r--r-- | .gitignore | 1 | ||||
-rwxr-xr-x | build.sh | 4 | ||||
-rw-r--r-- | cite.c | 34 | ||||
-rwxr-xr-x | scrap.sh | 8 |
4 files changed, 41 insertions, 6 deletions
@@ -1,4 +1,5 @@ seances.xml +scrap cite cite.html citenofr.html @@ -1,2 +1,4 @@ #!/bin/sh -gcc -g -Wall -Wextra -Wno-pointer-sign $(xml2-config --cflags --libs) -o cite cite.c +gcc -Wall -Wextra -Wno-pointer-sign \ + -lcurl $(xml2-config --cflags --libs) \ + -o scrap cite.c @@ -1,3 +1,4 @@ +#include <curl/curl.h> #include <libxml/parser.h> #include <string.h> @@ -18,6 +19,7 @@ typedef struct { } Movie; Movie movies[64] = {0}; +CURL *curl = NULL; static void print_style(void) { @@ -58,6 +60,27 @@ static void print_movie(Movie *movie, int hide_fr) printf("\n"); } +static void download_poster(Movie *movie) +{ + char out_path[512]; + char *last_slash = movie->poster; + while (strchr(last_slash, '/') != NULL) + last_slash = strchr(last_slash, '/') + 1; + strcpy(out_path, "cite/"); + strcat(out_path, last_slash); + FILE *const fp = fopen(out_path, "wb"); + if (fp == NULL) + return; + curl_easy_setopt(curl, CURLOPT_URL, movie->poster); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp); + fprintf(stderr, "getting %s", out_path); + fprintf(stderr, "\rgot %s \n", out_path); + const CURLcode res = curl_easy_perform(curl); + if (res == CURLE_OK) + strcpy(movie->poster, last_slash); +} + static void xfree(const void *ptr) { if (ptr != NULL) @@ -170,6 +193,12 @@ int main(int argc, char **argv) xmlFreeDoc(document); return 1; } + curl = curl_easy_init(); + if (curl == NULL) { + xmlCleanupParser(); + xmlFreeDoc(document); + return 1; + } for (const xmlNode *week = root->children; week != NULL; week = week->next) { for (const xmlNode *mov = get_movie_node(week); mov != NULL; mov = mov->next) { const int id = get_id(mov); @@ -185,8 +214,11 @@ int main(int argc, char **argv) } print_style(); printf("<h1><a href=\"http://www.citebd.org/spip.php?film2912\">cinéma de la cité</a></h1>\n"); - for (Movie *movie = movies; movie->id != 0; movie++) + for (Movie *movie = movies; movie->id != 0; movie++) { + download_poster(movie); print_movie(movie, atoi(argv[2])); + } + curl_easy_cleanup(curl); xmlFreeDoc(document); xmlCleanupParser(); return 0; @@ -1,7 +1,7 @@ #!/bin/sh ./build.sh || exit 1 curl -o seances.xml 'http://www.citebd.org/IMG/xml/allocineseances-4.xml' || exit 1 -./cite seances.xml 0 >cite.html || exit 1 -scp cite.html root@kdx.re:/var/www/html -./cite seances.xml 1 >citenofr.html || exit 1 -scp citenofr.html root@kdx.re:/var/www/html +mkdir -p cite +./scrap seances.xml 0 >cite/index.html || exit 1 +./scrap seances.xml 1 >cite/nofr.html || exit 1 +rsync -rvu --delete cite root@kdx.re:/var/www/html |