/* ------------------------------------------------------------------- */
/* htget : Fetch a file using HTTP protocol */
/* */
/* Author : Ole Husby, BIBSYS */
/* Updated : 1998-09-30 */
/* */
/* ------------------------------------------------------------------- */
/* */
/* htget(url, type, timeout_seconds, outfile, content_type, location) */
/* */
/* Returns: HTTP statuscode, with additional private: */
/* 0 : OK ( = 200) */
/* 900 : Error, possible timeout */
/* 901 : Syntax error in url */
/* 902 : Unknown host */
/* 903 : No response from server (no connection) */
/* 904 : File is not text/html */
/* 905 : Statusline > 255 bytes */
/* 906 : Statusline < 4 bytes */
/* 907 : Statusline not starting with "HTTP" */
/* 908 : Statuscode not numeric */
/* 909 : Size of header > BUFSIZE */
/* 910 : Unable to open output file */
/* 999 : Unspecified TCP/IP error */
/* */
/* Writes to outfile, depending on type, if statuscode = 0 | 200 : */
/* */
/* type = 0 : Nothing */
/* type = 1 : HTTP header */
/* type = 2 : HTTP header + entitybody */
/* type = 3 : HTTP entitybody */
/* type = 4 : HTTP entitybody if text/html */
/* type = 5 : HTTP
part of entitybody if text/html */
/* type = 6 : HTTP part of entitybody if text/html */
/* HTTP entitybody if application/marc */
/* */
/* ------------------------------------------------------------------- */
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define FALSE 0
#define TRUE 1
#define BUFSIZE 10000
#define TYPE_NONE 0
#define TYPE_HTTPHEAD 1
#define TYPE_HTTPALL 2
#define TYPE_HTTPBODY 3
#define TYPE_HTMLALL 4
#define TYPE_HTMLHEAD 5
#define TYPE_MARC 6
#define TRACE 0
#define AGENT "BIBSYS_htget v1.1"
char conType[128];
int htmlonly, marcrecord;
void thandler(int i)
{
}
/* ------------------------------------------------------------------- */
/* geteoHEAD: Look for or or */
/* immediately before ");
if (p)
{
i = p - buf + 7;
buf[i] = '\n';
buf[i+1] = '\0';
return 1;
}
return 0;
}
/* ------------------------------------------------------------------- */
/* writeRequest: send request to server */
/* */
/* returns: Number of bytes written */
/* ------------------------------------------------------------------- */
int writeRequest(char *req, int server)
{
if (TRACE)
printf("*** send(): %s\n", req);
return write(server, req, strlen(req));
}
/* ------------------------------------------------------------------- */
/* getBody : Read the Entity-body into the file filename */
/* */
/* Open the given file for writing, read data from the */
/* socket until a terminating '\0' is found, write to */
/* the file. Returns 0 if ok, positive if an error results */
/* in errno being set, or -1 if other error. */
/* Single read()'s blocking for more than TIMEOUT_SECONDS will */
/* be interrupted. The read() then returns a negative value, and */
/* errno will be set appropriately (EINTR). */
/* */
/* Returns 0 if ok */
/* 900 if read error */
/* */
/* ------------------------------------------------------------------- */
int getBody(int server, int timeout, int fd, char *filename)
{
int i, ef;
unsigned char *bf, buf[BUFSIZE + 1];
int bytecount;
int found_end;
if (TRACE)
printf("*** Read entitybody\n");
/* Loop until the endmark is found */
found_end = FALSE;
while (!found_end)
{
alarm (timeout);
bytecount = read (server, buf, BUFSIZE);
alarm (0);
if (bytecount < 0)
return 900; /* error in read() */
else if (bytecount == 0)
break; /* server closed socket */
else
{
if (buf[bytecount-1] == '\0')
{
bytecount--; /* do not write the '\0' to file */
found_end = TRUE; /* terminate the loop */
}
/* Write to file */
if (bytecount > 0)
{
bf = (unsigned char *) buf;
bf[bytecount] = '\0';
if ( htmlonly )
{
ef = geteoHEAD(bf);
bytecount = strlen(bf);
}
else
ef = 0;
write (fd, bf, bytecount);
if (ef)
break;
}
}
}
return 0;
}
/* ------------------------------------------------------------------- */
/* getHeader : Access file and read HTTP header */
/* */
/* Returns 0 if ok */
/* 904 if Content-type not "text/html" */
/* 905 if Statusline > 255 bytes */
/* 906 if Statusline < 4 bytes */
/* 907 if Statusline not starting with "HTTP" */
/* 908 if Statuscode not numeric */
/* 909 if size of header > BUFSIZE */
/* HTTP statuscode if <> 200 */
/* */
/* ------------------------------------------------------------------- */
int getHeader(int server, int timeout, int type, int fd, char *reason, char *loc)
{
char buf[BUFSIZE+1], dummy[8];
unsigned char *p, *cp = buf, *d = dummy;
int i, rc, statuscode, plf, rlen = 0;
reason = (char *) NULL;
*conType = 0;
if (TRACE)
printf("*** Read statusline\n");
/* Read HTTP statusline (until LF or 0, max 256 byte) */
for ( cp = buf, *buf = 0; 1; cp++ )
{
alarm (timeout);
rc = read ( server, cp, 1 );
alarm (0);
if ( rc < 0 )
return 900;
if ( *cp == '\r')
{
cp--;
continue;
}
rlen++;
if (TRACE)
printf("%c", *cp);
if ( *cp == 0 || rlen > 255)
return 905;
if ( *cp == '\n')
{
*cp = 0;
break;
}
}
/* Write statusline */
if ( ( type == TYPE_HTTPHEAD ) || ( type == TYPE_HTTPALL ) )
{
write(fd, buf, strlen(buf));
write(fd, "\n\n", 2);
}
if (TRACE)
printf("*** read() : (%d) %s\n", rlen, buf);
/* Parse statusline */
if (rlen < 4)
return 906;
if (strncmp(buf, "HTTP", 4) != 0)
return 907;
p = strtok(buf, " ");
p = strtok(NULL, " ");
if (!*p)
statuscode = 200;
else
{
for (i = 0; i < strlen(p); i++)
if (!isdigit(p[i]))
return 908;
}
statuscode = atoi(p);
p = strtok(NULL, "\0");
if (p)
reason = p;
if (statuscode == 200)
statuscode = 0;
if (!type)
return statuscode;
/* Read HTTP response header (until 0 or empty line, max BUFSIZE bytes */
rlen = 0;
plf = FALSE;
if (TRACE)
printf("*** Read responseheader\n");
for ( cp = buf, *buf = 0; 1; cp++ )
{
alarm (timeout);
rc = read ( server, cp, 1 );
alarm (0);
if ( rc < 0 )
return 900;
if ( *cp == '\r')
{
cp--;
continue;
}
rlen++;
if ( *cp == 0 || rlen > BUFSIZE)
return 909;
else if ( *cp == '\n')
{
if (plf)
{
*cp = 0;
break;
}
else
plf = TRUE;
}
else
plf = FALSE;
}
/* Write rest of HTTP header */
if ( ( type == TYPE_HTTPHEAD ) || ( type == TYPE_HTTPALL ) )
{
write(fd, buf, strlen(buf));
write(fd, "\n", 1);
}
if (TRACE)
printf("*** read() : (%d) %s\n", rlen, buf);
/* Parse header for Content-Type and Loaction */
rc = 904;
p = strtok(buf, "\n");
while (p)
{
if (strncasecmp(p, "Content-Type:", 13) == 0)
{
p += 13;
while (p[0] == ' ')
p++;
strcpy(conType, p);
if (strncasecmp(p, "text/html", 7) == 0)
{
if ( ( type == TYPE_HTMLHEAD) || (type == TYPE_MARC ) )
htmlonly = 1;
rc = 0;
}
else if (strncasecmp(p, "application/marc", 16) == 0)
{
if ( type == TYPE_MARC )
{
marcrecord = 1;
rc = 0;
}
}
}
else if (strncasecmp(p, "Location:", 9) == 0)
{
p += 9;
while (p[0] == ' ')
p++;
strcpy(loc, p);
}
p = strtok(NULL, "\n");
}
/* All OK. Socket is positioned at start of HTTP Entity-Body */
if (rc)
return rc;
else
return statuscode;
}
/* ------------------------------------------------------------------- */
/* htget : Fetch a URL */
/* ------------------------------------------------------------------- */
int htget(char *iurl, int type, int timeout, char *outfile, char *h_contype, char *h_location)
{
int i, rc, fd, soc, port;
struct sockaddr_in addr;
struct hostent *hp, *gethostbyname();
char uurl[1024], *url = uurl, hostname[256], cport[64], req[1024];
char *h, *p, *q, *r;
char tfile[256], blank[2];
*blank = *h_contype = *h_location = 0;
htmlonly = marcrecord = 0;
strcpy(tfile, "/tmp/geturl.tmp");
if (!*outfile)
outfile = (char *) tfile;
if (!*iurl || ( strlen(iurl) > 1023 ) )
return 901;
strcpy(url, iurl);
/* Parse and validate URL */
if (strncmp(url, "http://", 7) != 0)
return 901;
url += 7;
q = strtok(url, "/");
if (!q)
return 901;
r = strtok(NULL, "\0");
if (!r)
r = (char *) blank;
h = strtok(q, ":");
if (!*h)
return 901;
strcpy(hostname, h);
p = strtok(NULL, "\0");
if (!p || !*p)
port = 80;
else
{
for (i = 0; i < strlen(p); i++)
if (!isdigit(p[i]))
return 901;
port = atoi(p);
}
sprintf(req, "GET /%s HTTP/1.0\r\nUser_Agent: %s\n\n", r, AGENT);
/* Establish handler for the alarm-signal */
signal (SIGALRM, thandler);
/* Get IP address */
hp = gethostbyname(hostname);
if (!hp)
return 902;
/* Get socket and connect */
soc = socket(AF_INET, SOCK_STREAM, 0);
addr.sin_family = AF_INET;
memcpy( &addr.sin_addr.s_addr, hp->h_addr, (size_t) hp->h_length );
addr.sin_port = htons(port);
if (connect(soc, (struct sockaddr *) &addr, sizeof(struct sockaddr_in)) < 0)
return 903;
/* Open the destination file */
if (type)
{
fd = open(outfile, O_WRONLY | O_CREAT | O_TRUNC , 0666);
if (fd < 0)
{
close(soc);
return 910;
}
}
/* Write HTTP-request */
if (!writeRequest(req, soc))
{
close(soc);
if (type)
close(fd);
return 999;
}
/* Read header part of response */
rc = getHeader(soc, timeout, type, fd, r, h_location);
if ( ( rc == 904 ) && (type < 4 ) )
rc = 0;
if ( ( rc == 0 ) && ( type > 1 ) )
rc = getBody(soc, timeout, fd, outfile);
close(soc);
if (type)
close(fd);
strcpy(h_contype, conType);
return rc;
}