view usr/src/cmd/man/src/util/nsgmls.src/lib/URLStorage.cxx @ 4:1a15d5aaf794

synchronized with onnv_86 (6202) in onnv-gate
author Koji Uno <koji.uno@sun.com>
date Mon, 31 Aug 2009 14:38:03 +0900
parents c9caec207d52
children
line wrap: on
line source

// Copyright (c) 1995 James Clark
// See the file COPYING for copying permission.
#pragma ident	"%Z%%M%	%I%	%E% SMI"

#ifdef __GNUG__
#pragma implementation
#endif

// FIXME This implementation won't work on an EBCDIC machine.

#include "splib.h"
#ifdef WINSOCK
#include <winsock.h>
#define readsocket(s, p, n) ::recv(s, p, n, 0)
#define writesocket(s, p, n) ::send(s, p, n, 0)
#define errnosocket (WSAGetLastError())
#define SocketMessageArg(n) WinsockMessageArg(n)
#define SOCKET_EINTR (WSAEINTR)
#define SP_HAVE_SOCKET
#else
#ifdef SP_HAVE_SOCKET
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#ifdef SP_INCLUDE_UNISTD_H
#include <unistd.h>
#endif

#ifdef SP_INCLUDE_OSFCN_H
#include <osfcn.h>
#endif

#ifdef SP_DECLARE_H_ERRNO
extern int h_errno;
#endif

typedef int SOCKET;
#define SOCKET_ERROR (-1)
#define INVALID_SOCKET (-1)
#define SOCKET_EINTR (EINTR)
#define closesocket(s) close(s)
#define writesocket(fd, p, n) ::write(fd, p, n)
#define readsocket(s, p, n) ::read(s, p, n)
#define errnosocket (errno)
#define SocketMessageArg(n) ErrnoMessageArg(n)
#include "ErrnoMessageArg.h"

#endif /* SP_HAVE_SOCKET */

#endif /* not WINSOCK */

#include "URLStorage.h"
#include "URLStorageMessages.h"
#include "RewindStorageObject.h"
#include "UnivCharsetDesc.h"
#include "MessageArg.h"
#include "MessageBuilder.h"
#include "macros.h"

#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stddef.h>
#include <ctype.h>
#include <stdio.h>

#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif

static UnivCharsetDesc::Range range = { 0, 128, 0 };
static CharsetInfo iso646Charset(UnivCharsetDesc(&range, 1));

#ifdef SP_HAVE_SOCKET

class HttpSocketStorageObject : public RewindStorageObject {
public:
  HttpSocketStorageObject(SOCKET fd, Boolean mayRewind, const StringC &hostStr);
  ~HttpSocketStorageObject();
  Boolean open(const String<char> &path, Messenger &);
  Boolean read(char *buf, size_t bufSize, Messenger &mgr, size_t &nread);
  Boolean seekToStart(Messenger &);
  static SOCKET openHttp(const String<char> &host,
			 unsigned short port,
			 const StringC &hostStr,
			 Messenger &mgr);
private:
  HttpSocketStorageObject(const HttpSocketStorageObject &); // undefined
  void operator=(const HttpSocketStorageObject &); // undefined
  Boolean readHeader(Messenger &);
  Boolean readLine(Messenger &mgr, String<char> &line, String<char> &leftOver);
  static Boolean parseStatus(const char *&ptr, int &val);
  StringC hostStr_;
  String<char> path_;
  Boolean eof_;
  SOCKET fd_;
};

#ifdef WINSOCK

class WinsockMessageArg : public MessageArg {
public:
  WinsockMessageArg(int n) : n_(n) { }
  MessageArg *copy() const { return new WinsockMessageArg(*this); }
  void append(MessageBuilder &) const;
private:
  int n_;
};

void WinsockMessageArg::append(MessageBuilder &builder) const
{
  // I can't figure out how to get a string associated
  // with this error number.  FormatMessage() doesn't seem
  // to work.
  builder.appendFragment(URLStorageMessages::winsockErrorNumber);
  builder.appendNumber(n_);
}

class WinsockIniter {
public:
  WinsockIniter();
  ~WinsockIniter();
  Boolean init(Messenger &mgr);
private:
  Boolean inited_;
  Boolean initSuccess_;
};

static WinsockIniter winsockIniter;

WinsockIniter::WinsockIniter()
: inited_(0)
{
}

WinsockIniter::~WinsockIniter()
{
  if (inited_ && initSuccess_)
    (void)WSACleanup();
}

Boolean WinsockIniter::init(Messenger &mgr)
{
  if (!inited_) {
    inited_ = 1;
    initSuccess_ = 0;
    WORD version = MAKEWORD(1, 1);
    WSADATA wsaData;
    int err = WSAStartup(version, &wsaData);
    if (err)
      mgr.message(URLStorageMessages::winsockInitialize,
		  WinsockMessageArg(err));
    else if (LOBYTE(wsaData.wVersion) != 1
	     || HIBYTE(wsaData.wVersion) != 1) {
      mgr.message(URLStorageMessages::winsockVersion);
      WSACleanup();
    }
    else
      initSuccess_ = 1;
  }
  return initSuccess_;
}

#endif /* WINSOCK */

#endif /* SP_HAVE_SOCKET */

URLStorageManager::URLStorageManager(const char *type)
: type_(type), IdStorageManager(&iso646Charset)
{
}

const char *URLStorageManager::type() const
{
  return type_;
}

Boolean URLStorageManager::guessIsId(const StringC &id,
				     const CharsetInfo &charset) const
{
  if (id.size() < 8)
    return 0;
  size_t i = 0;
  for (const char *s = "http://"; *s; s++, i++)
    if (id[i] != charset.execToDesc(*s)
	&& (!islower(*s) || id[i] != charset.execToDesc(toupper(*s))))
      return 0;
  return 1;
}

StorageObject *URLStorageManager::makeStorageObject(const StringC &specId,
						    const StringC &baseId,
						    Boolean,
						    Boolean mayRewind,
						    Messenger &mgr,
						    StringC &id)
{
#ifdef SP_HAVE_SOCKET
  id = specId;
  resolveRelative(baseId, id, 0);
  if (id.size() < 5
      || (id[0] != 'h' && id[0] != 'H')
      || (id[1] != 't' && id[1] != 'T')
      || (id[2] != 't' && id[2] != 'T')
      || (id[3] != 'p' && id[3] != 'P')
      || id[4] != ':') {
    mgr.message(URLStorageMessages::onlyHTTP);
    return 0;
  }
  if (id.size() < 7 || id[5] != '/' || id[6] != '/') {
    mgr.message(URLStorageMessages::badRelative,
		StringMessageArg(id));
    return 0;
  }
  size_t i = 7;
  String<char> host;
  while (i < id.size()) {
    if (id[i] == '/')
      break;
    if (id[i] == ':')
      break;
    host += char(id[i]);
    i++;
  }
  if (host.size() == 0) {
    mgr.message(URLStorageMessages::emptyHost,
		StringMessageArg(id));
    return 0;
  }
  unsigned short port;
  if (i < id.size() && id[i] == ':') {
    i++;
    String<char> digits;
    while (i < id.size() && id[i] != '/') {
      digits += char(id[i]);
      i++;
    }
    if (digits.size() == 0) {
      mgr.message(URLStorageMessages::emptyPort,
		  StringMessageArg(id));
      return 0;
    }
    digits += '\0';
    char *endptr;
    long n = strtol(digits.data(), &endptr, 10);
    if (endptr != digits.data() + digits.size() - 1
	|| n < 0
	|| n > 65535L) {
      mgr.message(URLStorageMessages::invalidPort,
		  StringMessageArg(id));
      return 0;
    }
    port = (unsigned short)n;
  }
  else
    port = 80;
  String<char> path;
  if (i < id.size()) {
    while (i < id.size() && id[i] != '#') {
      path += char(id[i]);
      i++;
    }
  }
  if (path.size() == 0)
    path += '/';

  StringC hostStr;
  for (i = 0; i < host.size(); i++)
    hostStr += host[i];
  host += '\0';
  SOCKET fd = HttpSocketStorageObject::openHttp(host, port, hostStr, mgr);
  if (fd == INVALID_SOCKET)
    return 0;
  HttpSocketStorageObject *p
    = new HttpSocketStorageObject(fd, mayRewind, hostStr);
  if (!p->open(path, mgr)) {
    delete p;
    return 0;
  }
  return p;
#else /* not SP_HAVE_SOCKET */
  ParentLocationMessenger(mgr).message(URLStorageMessages::notSupported);
  return 0;
#endif /* not SP_HAVE_SOCKET */
}

Boolean URLStorageManager::resolveRelative(const StringC &baseId,
					   StringC &id,
					   Boolean) const
{
  static const char schemeChars[] = 
    "abcdefghijklmnopqrstuvwxyz"
    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    "01234567879"
    "+-.";
  size_t i;
  // If it has a scheme, it is absolute.
  for (i = 0; i < id.size(); i++) {
    if (id[i] == ':') {
      if (i == 0)
	break;
      else
	return 1;
    }
    else if (!strchr(schemeChars, id[i]))
      break;
  }
  for (i = 0; i < id.size(); i++) {
    if (id[i] != '/')
      break;
  }
  size_t slashCount = i;
  if (slashCount > 0) {
    Boolean foundSameSlash = 0;
    size_t sameSlashPos;
    for (size_t j = 0; j < baseId.size(); j++) {
      size_t thisSlashCount = 0;
      for (size_t k = j; k < baseId.size() && baseId[k] == '/'; k++)
	thisSlashCount++;
      if (thisSlashCount == slashCount && !foundSameSlash) {
	foundSameSlash = 1;
	sameSlashPos = j;
      }
      else if (thisSlashCount > slashCount)
	foundSameSlash = 0;
    }
    if (foundSameSlash) {
      StringC tem(baseId.data(), sameSlashPos);
      tem += id;
      tem.swap(id);
    }
  }
  else {
    size_t j;
    for (j = baseId.size(); j > 0; j--)
      if (baseId[j - 1] == '/')
	break;
    if (j > 0) {
      StringC tem(baseId.data(), j);
      tem += id;
      tem.swap(id);
    }
  }
  // FIXME remove xxx/../, and /.
  return 1;
}

Boolean URLStorageManager::transformNeutral(StringC &str, Boolean fold,
					    Messenger &) const
{
  if (fold)
    for (size_t i = 0; i < str.size(); i++) {
      Char c = str[i];
      if (c <= (unsigned char)-1)
	str[i] = tolower(str[i]);
    }
  return 1;
}

#ifdef SP_HAVE_SOCKET

SOCKET HttpSocketStorageObject::openHttp(const String<char> &host,
					unsigned short port,
					const StringC &hostStr,
					Messenger &mgr)
{
#ifdef WINSOCK
  if (!winsockIniter.init(mgr))
    return INVALID_SOCKET;
#endif
  struct sockaddr_in sock;
  sock.sin_family = AF_INET;
  sock.sin_port = htons(port);
  if (isdigit((unsigned char)host[0])) {
    unsigned long n = inet_addr(host.data());
    if (n == (unsigned long)-1) {
      ParentLocationMessenger(mgr).message(URLStorageMessages::invalidHostNumber,
					   StringMessageArg(hostStr));
      return INVALID_SOCKET;
    }
    sock.sin_addr.s_addr = n;
  }
  else {
    struct hostent *hp = gethostbyname(host.data());
    if (!hp) {
      const MessageType1 *message;
      switch (h_errno) {
      case HOST_NOT_FOUND:
	message = &URLStorageMessages::hostNotFound;
	break;
      case TRY_AGAIN:
	message = &URLStorageMessages::hostTryAgain;
	break;
      case NO_RECOVERY:
	message = &URLStorageMessages::hostNoRecovery;
	break;
      case NO_DATA:
#ifdef NO_ADDRESS
#if NO_ADDRESS != NO_DATA
      case NO_ADDRESS:
#endif
#endif
	message = &URLStorageMessages::hostNoData;
	break;
      default:
#ifdef WINSOCK
	ParentLocationMessenger(mgr).message(URLStorageMessages::hostOtherError,
					     StringMessageArg(hostStr),
					     WinsockMessageArg(h_errno));
	return INVALID_SOCKET;
#else
	message = &URLStorageMessages::hostUnknownError;
	break;
#endif
      }
      ParentLocationMessenger(mgr).message(*message,
					   StringMessageArg(hostStr));
      return INVALID_SOCKET;
    }
    memcpy(&sock.sin_addr, hp->h_addr, hp->h_length);
  }
  SOCKET fd = socket(PF_INET, SOCK_STREAM, 0);
  if (fd == INVALID_SOCKET) {
    ParentLocationMessenger(mgr).message(URLStorageMessages::cannotCreateSocket,
					 SocketMessageArg(errnosocket));
    return INVALID_SOCKET;
  }
  if (connect(fd, (struct sockaddr *)&sock, sizeof(sock)) == SOCKET_ERROR) {
    ParentLocationMessenger(mgr).message(URLStorageMessages::cannotConnect,
					 StringMessageArg(hostStr),
					 SocketMessageArg(errnosocket));
    (void)closesocket(fd);
    return INVALID_SOCKET;
  }
  return fd;
}

HttpSocketStorageObject::HttpSocketStorageObject(SOCKET fd,
					       Boolean mayRewind,
					       const StringC &hostStr)

: RewindStorageObject(mayRewind, 0), hostStr_(hostStr), fd_(fd), eof_(0)
{
}

HttpSocketStorageObject::~HttpSocketStorageObject()
{
  if (fd_ != INVALID_SOCKET)
    (void)closesocket(fd_);
}

Boolean HttpSocketStorageObject::open(const String<char> &path, Messenger &mgr)
{
  path_ = path;
  String<char> request;
  request.append("GET ", 4);
  request += path_;
  request += ' ';
  request.append("HTTP/1.0\r\n", 10);
  request.append("Accept: */*\r\n", 13);
  request.append("\r\n", 2);
  // FIXME check length of write
  if (writesocket(fd_, request.data(), request.size()) == SOCKET_ERROR) {
    ParentLocationMessenger(mgr).message(URLStorageMessages::writeError,
					 StringMessageArg(hostStr_),
					 SocketMessageArg(errnosocket));
    (void)closesocket(fd_);
    fd_ = INVALID_SOCKET;
    return 0;
  }
  if (!readHeader(mgr)) {
    (void)closesocket(fd_);
    fd_ = INVALID_SOCKET;
    return 0;
  }
  return 1;
}

Boolean HttpSocketStorageObject::readHeader(Messenger &mgr)
{
  String<char> buf;
  String<char> leftOver;
  if (!readLine(mgr, buf, leftOver))
    return 0;
  buf += '\0';
  const char *ptr = &buf[0];
  int val;
  if (!parseStatus(ptr, val)) {
    if (buf.size() > 0)
      unread(buf.data(), buf.size() - 1);
    return 1;
  }
  if (val < 200 || val >= 300) {
    StringC reason;
    while (*ptr && *ptr != '\n' && *ptr != '\r') {
      reason += Char(*ptr);
      ptr++;
    }
    StringC pathStr;
    for (size_t i = 0; i < path_.size(); i++)
      pathStr += path_[i];
    ParentLocationMessenger(mgr).message(URLStorageMessages::getFailed,
					 StringMessageArg(hostStr_),
					 StringMessageArg(pathStr),
					 StringMessageArg(reason));
    return 0;
  }
					 
  for (;;) {
    if (!readLine(mgr, buf, leftOver))
      return 0;
    if (buf.size() == 0 || buf[0] == '\r' || buf[0] == '\n')
      break;
  }
  if (leftOver.size())
    unread(leftOver.data(), leftOver.size());
  return 1;
}

// Status line must start with: "HTTP/" 1*DIGIT "." 1*DIGIT SP 3DIGIT SP

Boolean HttpSocketStorageObject::parseStatus(const char *&ptr, int &val)
{
  static const char ver[] = "HTTP/";
  for (const char *v = ver; *v; v++, ptr++)
    if (*v != *ptr)
      return 0;
  if (!isdigit((unsigned char)*ptr))
    return 0;
  do {
    ++ptr;
  } while (isdigit((unsigned char)*ptr));
  if (*ptr != '.')
    return 0;
  ptr++;
  if (!isdigit((unsigned char)*ptr))
    return 0;
  do {
    ++ptr;
  } while (isdigit((unsigned char)*ptr));
  if (*ptr != ' ')
    return 0;
  ptr++;
  val = 0;
  for (int i = 0; i < 3; i++, ptr++) {
    if (!isdigit((unsigned char)*ptr))
      return 0;
    val = val*10 + *ptr - '0';
  }
  if (*ptr != ' ')
    return 0;
  ptr++;
  return 1;
}

// True will be returned for an empty line.

Boolean HttpSocketStorageObject::readLine(Messenger &mgr,
					  String<char> &line,
					  String<char> &leftOver)
{
  line.resize(0);
  Boolean hadCr = 0;
  Boolean gotLine = 0;
  size_t li;
  for (li = 0; li < leftOver.size(); li++) {
    if (leftOver[li] == '\r') {
      if (hadCr) {
	gotLine = 1;
	break;
      }
      line += '\r';
      hadCr = 1;
    }
    else if (leftOver[li] == '\n') {
      line += '\n';
      li++;
      gotLine = 1;
      break;
    }
    else if (hadCr) {
      gotLine = 1;
      break;
    }
    else
      line += leftOver[li];
  }
  if (gotLine) {
    for (size_t i = li; i < leftOver.size(); i++)
      leftOver[i - li] = leftOver[i];
    leftOver.resize(leftOver.size() - li);
    return 1;
  }
  leftOver.resize(0);
  if (eof_)
    return 1;
  for (;;) {
    char c;
    long n;
    do {
      n = readsocket(fd_, &c, 1);
    } while (n < 0 && errnosocket == SOCKET_EINTR);
    if (n == 0) {
      (void)closesocket(fd_);
      eof_ = 1;
      return 1;
    }
    if (n < 0) {
      ParentLocationMessenger(mgr).message(URLStorageMessages::readError,
					   StringMessageArg(hostStr_),
					   SocketMessageArg(errnosocket));
      (void)closesocket(fd_);
      fd_ = INVALID_SOCKET;
      return 0;
    }
    switch (c) {
    case '\r':
      if (hadCr) {
	leftOver += c;
	return 1;
      }
      hadCr = 1;
      line += c;
      break;
    case '\n':
      line += c;
      return 1;
    default:
      if (hadCr) {
	leftOver += c;
	return 1;
      }
      line += c;
      break;
    }
  }
  return 0;			// not reached
}

Boolean HttpSocketStorageObject::read(char *buf, size_t bufSize, Messenger &mgr,
				     size_t &nread)
{
  if (readSaved(buf, bufSize, nread))
    return 1;
  if (fd_ == INVALID_SOCKET || eof_)
    return 0;
  long n;
  do {
    n = readsocket(fd_, buf, bufSize);
  } while (n < 0 && errnosocket == SOCKET_EINTR);
  if (n > 0) {
    nread = size_t(n);
    saveBytes(buf, nread);
    return 1;
  }
  if (n < 0) {
    ParentLocationMessenger(mgr).message(URLStorageMessages::readError,
					 StringMessageArg(hostStr_),
					 SocketMessageArg(errnosocket));
    fd_ = INVALID_SOCKET;
  }
  else {
    eof_ = 1;
    if (closesocket(fd_) == SOCKET_ERROR)
      ParentLocationMessenger(mgr).message(URLStorageMessages::closeError,
					   StringMessageArg(hostStr_),
					   SocketMessageArg(errnosocket));
    fd_ = INVALID_SOCKET;
  }
  return 0;
}

Boolean HttpSocketStorageObject::seekToStart(Messenger &)
{
  CANNOT_HAPPEN();
  return 0;
}

#endif /* SP_HAVE_SOCKET */

#ifdef SP_NAMESPACE
}
#endif