commit 2ab1b868fbac189f3c7b8e2d4af14838aad1227f
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sun, 11 Nov 2018 13:34:20 +0100
initial repo
Diffstat:
A | .gitignore | | | 2 | ++ |
A | LICENSE | | | 15 | +++++++++++++++ |
A | Makefile | | | 5 | +++++ |
A | README | | | 56 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | TODO | | | 5 | +++++ |
A | arg.h | | | 37 | +++++++++++++++++++++++++++++++++++++ |
A | bget.c | | | 524 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
7 files changed, 644 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+bget
+*.o
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+Copyright (c) 2018 Hiltjo Posthuma <hiltjo@codemadness.org>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,5 @@
+build: clean
+ cc -o bget bget.c -ltls ${CFLAGS} ${LDFLAGS}
+
+clean:
+ rm -f bget *.o
diff --git a/README b/README
@@ -0,0 +1,56 @@
+bget
+====
+
+Relatively simple HTTP, HTTPS and Gopher client/file grabber.
+
+
+Why?
+----
+
+curl is a nice swish-army knife, but it's not a very sharp tool. Every week
+there is a new curl CVE. Other tools like OpenBSD ftp are much better, but
+still do too much.
+
+Sometimes (or most of the time?) you just want to fetch a file via the HTTP,
+HTTPS or Gopher protocol.
+
+The focus of this tool is on security and simplicity.
+
+
+Dependencies
+------------
+
+- LibreSSL
+- libtls
+
+
+Features
+--------
+
+- Uses OpenBSD pledge(2) and unveil(2). Allow no filesystem access (write to
+ stdout).
+- Impose timeout and maximum size limits.
+- Use well-defined exitcodes for reliable scripting (curl sucks at this).
+- Send as little information as possible (no User-Agent etc by default).
+- Allow to send custom headers / header string if wanted.
+
+
+Anti-features
+-------------
+
+Not by default, but possible with custom headers:
+- No HTTP byte range support.
+- No HTTP User-Agent.
+- No HTTP If-Modified-Since/If-* support.
+- No HTTP auth support.
+
+Other:
+- No HTTP keep-alive.
+- No HTTP chunked-encoding support.
+- No HTTP redirect support.
+- No (GZIP) compression support.
+- No cookie-jar or cookie parsing support.
+
+- No Gopher text handling (".\r\n").
+
+- ... etc...
diff --git a/TODO b/TODO
@@ -0,0 +1,5 @@
+- man page documentation.
+- add compat functions, for example strlcpy.
+- separate program error with other error.
+? HTTP proxy support?
+
diff --git a/arg.h b/arg.h
@@ -0,0 +1,37 @@
+#ifndef ARG_H
+#define ARG_H
+
+#define USED(x) ((void)(x))
+
+extern char *argv0;
+
+#define ARGBEGIN for(argv0 = *argv, argv++, argc--;\
+ argv[0] && argv[0][0] == '-'\
+ && argv[0][1];\
+ argc--, argv++) {\
+ char _argc;\
+ char **_argv;\
+ if(argv[0][1] == '-' && argv[0][2] == '\0') {\
+ argv++;\
+ argc--;\
+ break;\
+ }\
+ int i_;\
+ for(i_ = 1, _argv = argv; argv[0][i_];\
+ i_++) {\
+ if(_argv != argv)\
+ break;\
+ _argc = argv[0][i_];\
+ switch(_argc)
+
+#define ARGEND }\
+ USED(_argc);\
+ }\
+ USED(argv);\
+ USED(argc);
+
+#define EARGF(x) ((argv[1] == NULL)? ((x), abort(), (char *)0) :\
+ (argc--, argv++, argv[0]))
+
+#endif
+
diff --git a/bget.c b/bget.c
@@ -0,0 +1,524 @@
+#include <sys/socket.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <netdb.h>
+#include <locale.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <tls.h>
+
+#include "arg.h"
+
+#define READ_BUF_SIZ 16384
+
+#ifndef __OpenBSD__
+#define pledge(p1,p2) 0
+#define unveil(p1,p2) 0
+#endif
+
+#ifndef TLS_CA_CERT_FILE
+#define TLS_CA_CERT_FILE "/etc/ssl/cert.pem"
+#endif
+
+/* uri */
+struct uri {
+ char proto[48];
+ char host[256];
+ char path[2048];
+ char port[6]; /* numeric port */
+};
+
+char *argv0;
+
+/* max response size in bytes, 0 is unlimited */
+static size_t config_maxresponsesiz = 0;
+/* time-out in seconds */
+static time_t config_timeout = 10;
+/* custom HTTP header */
+static char *config_custom;
+/* parsed uri */
+static struct uri u;
+/* raw command-line argument */
+static char *url;
+
+void
+die(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ exit(1);
+}
+
+int
+parseuri(const char *s, struct uri *u)
+{
+ const char *p = s, *b;
+ char *endptr = NULL;
+ size_t i;
+ unsigned long l;
+
+ u->proto[0] = u->host[0] = u->path[0] = u->port[0] = '\0';
+ if (!*p)
+ return 0;
+
+ /* protocol part */
+ for (p = s; *p && (isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
+ *p == '+' || *p == '-' || *p == '.'); p++)
+ ;
+ if (!strncmp(p, "://", 3)) {
+ if ((size_t)(p - s) >= sizeof(u->proto))
+ return -1; /* protocol too long */
+ memcpy(u->proto, s, p - s);
+ u->proto[p - s] = '\0';
+ p += 3; /* skip "://" */
+ } else {
+ return -1; /* no protocol specified */
+ }
+
+ /* IPv6 address */
+ if (*p == '[') {
+ /* bracket not found or host too long */
+ if (!(b = strchr(p, ']')) || (size_t)(b - p) >= (ssize_t)sizeof(u->host))
+ return -1;
+ memcpy(u->host, p + 1, b - p - 1);
+ u->host[b - p - 1] = '\0';
+ p = b + 1;
+ } else {
+ /* domain / host part, skip until port, path or end. */
+ if ((i = strcspn(p, ":/")) >= sizeof(u->host))
+ return -1; /* host too long */
+ memcpy(u->host, p, i);
+ u->host[i] = '\0';
+ p = &p[i];
+ }
+ /* port */
+ if (*p == ':') {
+ if ((i = strcspn(++p, "/")) >= sizeof(u->port))
+ return -1; /* port too long */
+ memcpy(u->port, p, i);
+ u->port[i] = '\0';
+ /* check for valid port: range 1 - 65535 */
+ errno = 0;
+ l = strtoul(u->port, &endptr, 10);
+ if (errno || u->port[0] == '\0' || *endptr ||
+ !l || l > 65535)
+ return -1;
+ p = &p[i];
+ }
+ if (u->host[0]) {
+ p = &p[strspn(p, "/")];
+ strlcpy(u->path, "/", sizeof(u->path));
+ } else {
+ return -1;
+ }
+ /* treat truncation as an error */
+ if (strlcat(u->path, p, sizeof(u->path)) >= sizeof(u->path))
+ return -1;
+ return 0;
+}
+
+int
+edial(const char *host, const char *port)
+{
+ struct addrinfo hints, *res, *res0;
+ int error, save_errno, s;
+ const char *cause = NULL;
+ struct timeval timeout;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_NUMERICSERV; /* numeric port only */
+ if ((error = getaddrinfo(host, port, &hints, &res0)))
+ die("%s: %s: %s:%s\n", __func__, gai_strerror(error), host, port);
+ s = -1;
+ for (res = res0; res; res = res->ai_next) {
+ s = socket(res->ai_family, res->ai_socktype,
+ res->ai_protocol);
+ if (s == -1) {
+ cause = "socket";
+ continue;
+ }
+
+ timeout.tv_sec = config_timeout;
+ timeout.tv_usec = 0;
+ if (setsockopt(s, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout)) == -1)
+ die("%s: setsockopt: %s\n", __func__, strerror(errno));
+
+ timeout.tv_sec = config_timeout;
+ timeout.tv_usec = 0;
+ if (setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) == -1)
+ die("%s: setsockopt: %s\n", __func__, strerror(errno));
+
+ if (connect(s, res->ai_addr, res->ai_addrlen) == -1) {
+ cause = "connect";
+ save_errno = errno;
+ close(s);
+ errno = save_errno;
+ s = -1;
+ continue;
+ }
+ break;
+ }
+ if (s == -1)
+ die("%s: %s: %s:%s\n", __func__, cause, host, port);
+ freeaddrinfo(res0);
+
+ return s;
+}
+
+int
+https_request(void)
+{
+ struct tls *t = NULL;
+ char buf[READ_BUF_SIZ], *p;
+ size_t n, len = 0;
+ ssize_t r;
+ int fd = -1, httpok = 0, ret = 1;
+
+ if (pledge("stdio dns inet rpath unveil", NULL) == -1)
+ err(1, "pledge");
+
+ if (unveil(TLS_CA_CERT_FILE, "r") == -1)
+ err(1, "unveil: %s", TLS_CA_CERT_FILE);
+ if (unveil(NULL, NULL) == -1)
+ err(1, "unveil");
+
+ if (!(t = tls_client())) {
+ fprintf(stderr, "tls_client: %s\n", tls_error(t));
+ goto err;
+ }
+
+ fd = edial(u.host, u.port);
+ if (tls_connect_socket(t, fd, u.host) == -1)
+ die("tls_connect: %s\n", tls_error(t));
+
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+
+ /* create and send HTTP header */
+ snprintf(buf, sizeof(buf),
+ "GET %s HTTP/1.0\r\n"
+ "Host: %s\r\n"
+ "Connection: close\r\n"
+ "%s"
+ "\r\n", u.path, u.host, config_custom ? config_custom : "");
+ if ((r = tls_write(t, buf, strlen(buf))) == -1) {
+ fprintf(stderr, "tls_write: %s\n", tls_error(t));
+ exit(1);
+ }
+
+ /* NOTE: HTTP header must fit in the buffer */
+ r = tls_read(t, &buf, sizeof(buf));
+ if (r == 0) {
+ fprintf(stderr, "nothing read\n");
+ goto err;
+ }
+ if (r == -1) {
+ fprintf(stderr, "tls_read: %s\n", tls_error(t));
+ goto err;
+ }
+ len += r;
+
+ if (!strncmp(buf, "HTTP/1.0 200 ", sizeof("HTTP/1.0 200 ") - 1) ||
+ !strncmp(buf, "HTTP/1.1 200 ", sizeof("HTTP/1.1 200 ") - 1))
+ httpok = 1;
+
+ if (!(p = strstr(buf, "\r\n\r\n"))) {
+ fprintf(stderr, "no HTTP header found or header too big\n");
+ goto err;
+ }
+ *p = '\0'; /* NUL terminate header part */
+ p += strlen("\r\n\r\n");
+
+ if (httpok) {
+ n = r - (p - buf);
+ r = fwrite(p, 1, n, stdout);
+ if (ferror(stdout)) {
+ fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
+ goto err;
+ }
+ } else {
+ /* if not 200 OK print header */
+ fputs(buf, stderr);
+ /* TODO: exit or continue reading, probably nicer to continue read */
+ }
+
+ while (1) {
+ r = tls_read(t, &buf, sizeof(buf));
+ if (r == 0)
+ break;
+ if (r == -1) {
+ fprintf(stderr, "tls_read: %s\n", tls_error(t));
+ goto err;
+ }
+ len += r;
+
+ if (httpok) {
+ r = fwrite(buf, 1, r, stdout);
+ if (ferror(stdout)) {
+ fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
+ goto err;
+ }
+ }
+
+ if (config_maxresponsesiz && len >= config_maxresponsesiz)
+ break;
+ }
+ if (config_maxresponsesiz && len >= config_maxresponsesiz) {
+ fprintf(stderr, "tls_read: response too big: %zu >= %zu\n",
+ len, config_maxresponsesiz);
+ goto err;
+ }
+ ret = 0;
+
+err:
+ if (t) {
+ tls_close(t);
+ tls_free(t);
+ }
+
+ return httpok ? ret : 2;
+}
+
+int
+http_request(void)
+{
+ char buf[READ_BUF_SIZ], *p;
+ size_t n, len = 0;
+ ssize_t r;
+ int fd = -1, httpok = 0, ret = 1;
+
+ if (pledge("stdio dns inet", NULL) == -1)
+ err(1, "pledge");
+
+ fd = edial(u.host, u.port);
+
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+
+ /* create and send HTTP header */
+ snprintf(buf, sizeof(buf),
+ "GET %s HTTP/1.0\r\n"
+ "Host: %s\r\n"
+ "Connection: close\r\n"
+ "%s"
+ "\r\n", u.path, u.host, config_custom ? config_custom : "");
+ if ((r = write(fd, buf, strlen(buf))) == -1) {
+ fprintf(stderr, "write: %s\n", strerror(errno));
+ goto err;
+ }
+
+ /* NOTE: HTTP header must fit in the buffer */
+ r = read(fd, &buf, sizeof(buf));
+ if (r == 0) {
+ fprintf(stderr, "nothing read\n");
+ goto err;
+ }
+ if (r == -1) {
+ fprintf(stderr, "read: %s\n", strerror(errno));
+ goto err;
+ }
+ len += r;
+
+ if (!strncmp(buf, "HTTP/1.0 200 ", sizeof("HTTP/1.0 200 ") - 1) ||
+ !strncmp(buf, "HTTP/1.1 200 ", sizeof("HTTP/1.1 200 ") - 1))
+ httpok = 1;
+
+ if (!(p = strstr(buf, "\r\n\r\n"))) {
+ fprintf(stderr, "no HTTP header found or header too big\n");
+ goto err;
+ }
+ *p = '\0'; /* NUL terminate header part */
+ p += strlen("\r\n\r\n");
+
+ if (httpok) {
+ n = r - (p - buf);
+ r = fwrite(p, 1, n, stdout);
+ if (ferror(stdout)) {
+ fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
+ goto err;
+ }
+ } else {
+ /* if not 200 OK print header */
+ fputs(buf, stderr);
+ /* TODO: exit or continue reading, probably nicer to continue read */
+ }
+
+ while (1) {
+ r = read(fd, &buf, sizeof(buf));
+ if (r == 0)
+ break;
+ if (r == -1) {
+ fprintf(stderr, "read: %s\n", strerror(errno));
+ goto err;
+ }
+ len += r;
+
+ if (httpok) {
+ r = fwrite(buf, 1, r, stdout);
+ if (ferror(stdout)) {
+ fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
+ goto err;
+ }
+ }
+
+ if (config_maxresponsesiz && len >= config_maxresponsesiz)
+ break;
+ }
+ if (config_maxresponsesiz && len >= config_maxresponsesiz) {
+ fprintf(stderr, "read: response too big: %zu >= %zu\n",
+ len, config_maxresponsesiz);
+ goto err;
+ }
+ ret = 0;
+
+err:
+ if (fd != -1)
+ close(fd);
+ return httpok ? ret : 2;
+}
+
+int
+gopher_request(void)
+{
+ char buf[READ_BUF_SIZ];
+ size_t len = 0;
+ ssize_t r;
+ int fd = -1, ret = 1;
+
+ if (pledge("stdio dns inet", NULL) == -1)
+ err(1, "pledge");
+
+ fd = edial(u.host, u.port);
+
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+
+ /* create and send path, skip type part */
+ snprintf(buf, sizeof(buf), "%s\r\n", u.path + 2);
+ if ((r = write(fd, buf, strlen(buf))) == -1) {
+ fprintf(stderr, "write: %s\n", strerror(errno));
+ goto err;
+ }
+
+ while (1) {
+ r = read(fd, &buf, READ_BUF_SIZ);
+ if (r == 0)
+ break;
+ if (r == -1) {
+ fprintf(stderr, "read: %s\n", strerror(errno));
+ goto err;
+ }
+ len += r;
+
+ r = fwrite(buf, 1, r, stdout);
+ if (ferror(stdout)) {
+ fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
+ goto err;
+ }
+
+ if (config_maxresponsesiz && len >= config_maxresponsesiz)
+ break;
+ }
+ if (config_maxresponsesiz && len >= config_maxresponsesiz) {
+ fprintf(stderr, "tls_read: response too big: %zu >= %zu\n",
+ len, config_maxresponsesiz);
+ goto err;
+ }
+ ret = 0;
+
+err:
+ if (fd != -1)
+ close(fd);
+ return ret;
+}
+
+void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [ -H header | -m maxresponse | -t timeout ]\n",
+ argv0);
+ exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+ int statuscode;
+
+ ARGBEGIN {
+ case 'H': /* custom HTTP headers */
+ config_custom = EARGF(usage());
+ break;
+ case 'm': /* max filesize */
+ /* TODO: strtonum */
+ config_maxresponsesiz = atoll(EARGF(usage()));
+ break;
+ case 't': /* timeout */
+ /* TODO: strtonum */
+ config_timeout = atoll(EARGF(usage()));
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ if (argc != 1)
+ usage();
+
+ url = argv[0];
+ if (parseuri(url, &u) == -1) {
+ fprintf(stderr, "invalid url: %s\n", url);
+ exit(1);
+ }
+
+ if (!u.proto[0]) {
+ fprintf(stderr, "no protocol specified\n");
+ exit(1);
+ }
+
+ if (!strcmp(u.proto, "https")) {
+ if (!strcmp(u.proto, "https"))
+ memcpy(u.port, "443", 4);
+ statuscode = https_request();
+ } else if (!strcmp(u.proto, "http")) {
+ if (!u.port[0])
+ memcpy(u.port, "80", 3);
+ statuscode = http_request();
+ } else if (!strcmp(u.proto, "gopher")) {
+ if (config_custom) {
+ fprintf(stderr, "no custom header supported with gopher protocol\n");
+ exit(1);
+ }
+
+ if (!u.port[0])
+ memcpy(u.port, "70", 3);
+
+ if (u.path[0] != '/' || u.path[1] == '\0') {
+ fprintf(stderr, "must specify type\n");
+ exit(1);
+ }
+
+ statuscode = gopher_request();
+ } else {
+ if (u.proto[0])
+ fprintf(stderr, "unsupported protocol specified: %s\n", u.proto);
+ else
+ fprintf(stderr, "no protocol specified\n");
+ exit(1);
+ }
+
+ return statuscode;
+}