commit e8e1e1a7d09c614b57fac5070eb5c28822c948ba
parent 5a9951db80a5e9b9f2d5ad7ca1c6efebbd00e11f
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Fri, 12 Mar 2021 22:22:13 +0100
rework URI handling
- Parse the URI in a more correct way following the Gopher URI RFC 4266 and
General URI RFC 3986 - Uniform Resource Identifier (URI): Generic Syntax.
- An URI fragment is not sent to the server anymore.
- A gopher type is now optional for an empty path or for example:
"gopher://codemadness.org".
Also The use of strlcat() is removed and the code should now be more portable.
Diffstat:
M | hurl.c | | | 175 | ++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------- |
1 file changed, 116 insertions(+), 59 deletions(-)
diff --git a/hurl.c b/hurl.c
@@ -28,12 +28,15 @@
#define TLS_CA_CERT_FILE "/etc/ssl/cert.pem"
#endif
-/* uri */
+/* URI */
struct uri {
- char proto[48];
+ char proto[48]; /* scheme including ":" or "://" */
+ char userinfo[256]; /* username [:password] */
char host[256];
- char path[2048];
- char port[6]; /* numeric port */
+ char port[6]; /* numeric port */
+ char path[1024];
+ char query[1024];
+ char fragment[1024];
};
char *argv0;
@@ -61,70 +64,115 @@ sighandler(int signo)
}
int
-parseuri(const char *s, struct uri *u)
+uri_parse(const char *s, struct uri *u)
{
- const char *p = s, *b;
- char *endptr = NULL;
+ const char *p = s;
+ char *endptr;
size_t i;
- unsigned long l;
+ long l;
- u->proto[0] = u->host[0] = u->path[0] = u->port[0] = '\0';
- if (!*p)
- return 0;
+ u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
+ u->path[0] = u->query[0] = u->fragment[0] = '\0';
- /* protocol part */
- for (p = s; *p && (isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
- *p == '+' || *p == '-' || *p == '.'); p++)
+ /* protocol-relative */
+ if (*p == '/' && *(p + 1) == '/') {
+ p += 2; /* skip "//" */
+ goto parseauth;
+ }
+
+ /* scheme / protocol part */
+ for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
+ *p == '+' || *p == '-' || *p == '.'; p++)
;
- if (!strncmp(p, "://", 3)) {
+ /* scheme, except if empty and starts with ":" then it is a path */
+ if (*p == ':' && p != s) {
+ if (*(p + 1) == '/' && *(p + 2) == '/')
+ p += 3; /* skip "://" */
+ else
+ p++; /* skip ":" */
+
if ((size_t)(p - s) >= sizeof(u->proto))
return -1; /* protocol too long */
memcpy(u->proto, s, p - s);
u->proto[p - s] = '\0';
- p += 3; /* skip "://" */
+
+ if (*(p - 1) != '/')
+ goto parsepath;
} else {
- return -1; /* no protocol specified */
+ p = s; /* no scheme format, reset to start */
+ goto parsepath;
+ }
+
+parseauth:
+ /* userinfo (username:password) */
+ i = strcspn(p, "@/?#");
+ if (p[i] == '@') {
+ if (i >= sizeof(u->userinfo))
+ return -1; /* userinfo too long */
+ memcpy(u->userinfo, p, i);
+ u->userinfo[i] = '\0';
+ p += i + 1;
}
/* IPv6 address */
if (*p == '[') {
- /* bracket not found or host too long */
- if (!(b = strchr(p, ']')) || (size_t)(b - p) >= (ssize_t)sizeof(u->host))
+ /* bracket not found, host too short or too long */
+ i = strcspn(p, "]");
+ if (p[i] != ']' || i < 3)
return -1;
- memcpy(u->host, p + 1, b - p - 1);
- u->host[b - p - 1] = '\0';
- p = b + 1;
+ i++; /* including "]" */
} else {
/* domain / host part, skip until port, path or end. */
- if ((i = strcspn(p, ":/")) >= sizeof(u->host))
- return -1; /* host too long */
- memcpy(u->host, p, i);
- u->host[i] = '\0';
- p = &p[i];
+ i = strcspn(p, ":/?#");
}
+ if (i >= sizeof(u->host))
+ return -1; /* host too long */
+ memcpy(u->host, p, i);
+ u->host[i] = '\0';
+ p += i;
+
/* port */
if (*p == ':') {
- if ((i = strcspn(++p, "/")) >= sizeof(u->port))
+ p++;
+ if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
return -1; /* port too long */
memcpy(u->port, p, i);
u->port[i] = '\0';
- /* check for valid port: range 1 - 65535 */
+ /* check for valid port: range 1 - 65535, may be empty */
errno = 0;
- l = strtoul(u->port, &endptr, 10);
- if (errno || u->port[0] == '\0' || *endptr ||
- !l || l > 65535)
+ l = strtol(u->port, &endptr, 10);
+ if (i && (errno || *endptr || l <= 0 || l > 65535))
return -1;
- p = &p[i];
+ p += i;
}
- if (u->host[0]) {
- p = &p[strspn(p, "/")];
- memcpy(u->path, "/", 2);
- } else {
- return -1;
+
+parsepath:
+ /* path */
+ if ((i = strcspn(p, "?#")) >= sizeof(u->path))
+ return -1; /* path too long */
+ memcpy(u->path, p, i);
+ u->path[i] = '\0';
+ p += i;
+
+ /* query */
+ if (*p == '?') {
+ p++;
+ if ((i = strcspn(p, "#")) >= sizeof(u->query))
+ return -1; /* query too long */
+ memcpy(u->query, p, i);
+ u->query[i] = '\0';
+ p += i;
}
- /* treat truncation as an error */
- if (strlcat(u->path, p, sizeof(u->path)) >= sizeof(u->path))
- return -1;
+
+ /* fragment */
+ if (*p == '#') {
+ p++;
+ if ((i = strlen(p)) >= sizeof(u->fragment))
+ return -1; /* fragment too long */
+ memcpy(u->fragment, p, i);
+ u->fragment[i] = '\0';
+ }
+
return 0;
}
@@ -206,11 +254,14 @@ https_request(void)
/* create and send HTTP header */
r = snprintf(buf, sizeof(buf),
- "GET %s HTTP/1.0\r\n"
+ "GET %s%s%s HTTP/1.0\r\n"
"Host: %s%s%s\r\n"
"Connection: close\r\n"
"%s%s"
- "\r\n", u.path, u.host,
+ "\r\n",
+ u.path[0] ? u.path : "/",
+ u.query[0] ? "?" : "", u.query,
+ u.host,
stdport ? "" : ":",
stdport ? "" : u.port,
config_headers, config_headers[0] ? "\r\n" : "");
@@ -334,11 +385,14 @@ http_request(void)
/* create and send HTTP header */
r = snprintf(buf, sizeof(buf),
- "GET %s HTTP/1.0\r\n"
+ "GET %s%s%s HTTP/1.0\r\n"
"Host: %s%s%s\r\n"
"Connection: close\r\n"
"%s%s"
- "\r\n", u.path, u.host,
+ "\r\n",
+ u.path[0] ? u.path : "/",
+ u.query[0] ? "?" : "", u.query,
+ u.host,
stdport ? "" : ":",
stdport ? "" : u.port,
config_headers, config_headers[0] ? "\r\n" : "");
@@ -427,7 +481,7 @@ int
gopher_request(void)
{
char buf[READ_BUF_SIZ], *p;
- const char *errstr;
+ const char *errstr, *path;
size_t len = 0;
ssize_t r;
int fd = -1, ret = 1;
@@ -440,8 +494,13 @@ gopher_request(void)
if (pledge("stdio", NULL) == -1)
err(1, "pledge");
- /* create and send path, skip type part */
- r = snprintf(buf, sizeof(buf), "%s\r\n", u.path + 2);
+ /* create and send path, skip type part, empty path is allowed,
+ see RFC 4266 The gopher URI Scheme - section 2.1 */
+ path = u.path;
+ if (*path == '/' && *path++)
+ path++;
+ r = snprintf(buf, sizeof(buf), "%s%s%s\r\n",
+ path, u.query[0] ? "?" : "", u.query);
if (r < 0 || (size_t)r >= sizeof(buf)) {
fprintf(stderr, "not writing header because it is truncated");
goto err;
@@ -623,8 +682,10 @@ main(int argc, char **argv)
usage();
url = argv[0];
- if (parseuri(url, &u) == -1)
- errx(1, "invalid url: %s", url);
+ if (uri_parse(url, &u) == -1)
+ errx(1, "invalid URL: %s", url);
+ if (u.userinfo[0])
+ errx(1, "userinfo field not supported in the URL: %s", url);
if (config_timeout > 0) {
signal(SIGALRM, sighandler);
@@ -632,7 +693,7 @@ main(int argc, char **argv)
err(1, "alarm");
}
- if (!strcmp(u.proto, "https")) {
+ if (!strcmp(u.proto, "https://")) {
if (tls_init())
errx(1, "tls_init failed");
if (!(tls_config = tls_config_new()))
@@ -643,22 +704,18 @@ main(int argc, char **argv)
errx(1, "tls set ciphers failed: %s",
tls_config_error(tls_config));
}
- if (!u.port[0] && !strcmp(u.proto, "https"))
+ if (!u.port[0])
memcpy(u.port, "443", 4);
statuscode = https_request();
- } else if (!strcmp(u.proto, "http")) {
+ } else if (!strcmp(u.proto, "http://")) {
if (!u.port[0])
memcpy(u.port, "80", 3);
statuscode = http_request();
- } else if (!strcmp(u.proto, "gopher")) {
+ } else if (!strcmp(u.proto, "gopher://")) {
if (!u.port[0])
memcpy(u.port, "70", 3);
-
- if (u.path[0] != '/' || u.path[1] == '\0')
- errx(1, "must specify type");
-
statuscode = gopher_request();
- } else if (!strcmp(u.proto, "gophers")) {
+ } else if (!strcmp(u.proto, "gophers://")) {
if (tls_init())
errx(1, "tls_init failed");
if (!(tls_config = tls_config_new()))