view usr/src/cmd/man/src/util/nsgmls.src/lib/ExtendEntityManager.cxx @ 4:1a15d5aaf794

synchronized with onnv_86 (6202) in onnv-gate
author Koji Uno <koji.uno@sun.com>
date Mon, 31 Aug 2009 14:38:03 +0900
parents c9caec207d52
children
line wrap: on
line source

// Copyright (c) 1994, 1995, 1996 James Clark
// See the file COPYING for copying permission.
#pragma ident	"%Z%%M%	%I%	%E% SMI"

#ifdef __GNUG__
#pragma implementation
#endif

#include "splib.h"
#include "ExtendEntityManager.h"
#include "Message.h"
#include "MessageArg.h"
#include "OffsetOrderedList.h"
#include "rtti.h"
#include "StorageManager.h"
#include "Vector.h"
#include "NCVector.h"
#include "Owner.h"
#include "constant.h"
#include "EntityManagerMessages.h"
#include "StorageObjectPosition.h"
#include "Owner.h"
#include "CodingSystem.h"
#include "CodingSystemKit.h"
#include "InputSource.h"
#include "Mutex.h"
#include "macros.h"
#include "EntityCatalog.h"
#include "CharMap.h"

#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdio.h>

#ifdef DECLARE_MEMMOVE
extern "C" {
  void *memmove(void *, const void *, size_t);
}
#endif

#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif

const char EOFCHAR = '\032';	// Control-Z

class ExternalInputSource;

class EntityManagerImpl : public ExtendEntityManager {
public:
  EntityManagerImpl(StorageManager *defaultStorageManager,
		    const InputCodingSystem *defaultCodingSystem,
		    const ConstPtr<InputCodingSystemKit> &,
		    Boolean internalCharsetIsDocCharset);
  void setCatalogManager(CatalogManager *catalogManager);
  void registerStorageManager(StorageManager *);
  InputSource *open(const StringC &sysid,
		    const CharsetInfo &,
		    InputSourceOrigin *,
		    unsigned flags,
		    Messenger &);
  const CharsetInfo &charset() const;
  Boolean internalCharsetIsDocCharset() const;
  ConstPtr<EntityCatalog> makeCatalog(StringC &systemId,
				      const CharsetInfo &charset,
				      Messenger &mgr);
  Boolean expandSystemId(const StringC &,
			 const Location &,
			 Boolean isNdata,
			 const CharsetInfo &,
			 const StringC *,
			 Messenger &,
			 StringC &);
  Boolean mergeSystemIds(const Vector<StringC> &,
			 Boolean mapCatalogDocument,
			 const CharsetInfo &,
			 Messenger &mgr,
			 StringC &) const;
  StorageManager *lookupStorageType(const StringC &, const CharsetInfo &) const;
  StorageManager *lookupStorageType(const char *) const;
  StorageManager *guessStorageType(const StringC &, const CharsetInfo &) const;
  const InputCodingSystem *lookupCodingSystem(const StringC &,
					      const CharsetInfo &,
					      Boolean isBctf,
					      const char *&) const;
  Boolean resolveSystemId(const StringC &str,
			  const CharsetInfo &idCharset,
			  Messenger &mgr,
			  const Location &defLocation,
			  Boolean isNdata,
			  ParsedSystemId &parsedSysid) const;
  Boolean parseSystemId(const StringC &str,
			const CharsetInfo &idCharset,
			Boolean isNdata,
			const StorageObjectLocation *def,
			Messenger &mgr,
			ParsedSystemId &parsedSysid) const;
  const CharsetInfo &internalCharset(const CharsetInfo &docCharset) const {
    if (internalCharsetIsDocCharset_)
      return docCharset;
    else
      return charset();
  }
private:
  EntityManagerImpl(const EntityManagerImpl &); // undefined
  void operator=(const EntityManagerImpl &); // undefined
  static Boolean defLocation(const Location &, StorageObjectLocation &);
  static Boolean matchKey(const StringC &type, const char *s,
			  const CharsetInfo &internalCharset);
  NCVector<Owner<StorageManager> > storageManagers_;
  Owner<StorageManager> defaultStorageManager_;
  const InputCodingSystem *defaultCodingSystem_;
  Owner<CatalogManager> catalogManager_;
  Boolean internalCharsetIsDocCharset_;
  ConstPtr<InputCodingSystemKit> codingSystemKit_;
  friend class FSIParser;
};

class ExternalInfoImpl : public ExternalInfo {
  RTTI_CLASS
public:
  ExternalInfoImpl(ParsedSystemId &parsedSysid);
  const StorageObjectSpec &spec(size_t i) const;
  size_t nSpecs() const;
  const ParsedSystemId &parsedSystemId() const;
  void noteRS(Offset);
  void noteStorageObjectEnd(Offset);
  void noteInsertedRSs();
  void setDecoder(size_t i, Decoder *);
  void setId(size_t i, StringC &);
  void getId(size_t i, StringC &) const;
  Boolean convertOffset(Offset, StorageObjectLocation &) const;
private:
  ParsedSystemId parsedSysid_;
  NCVector<StorageObjectPosition> position_;
  size_t currentIndex_;
  // list of inserted RSs
  OffsetOrderedList rsList_;
  Boolean notrack_;
  Mutex mutex_;
};

class ExternalInputSource : public InputSource {
public:
  ExternalInputSource(ParsedSystemId &parsedSysid,
		      const CharsetInfo &internalCharset,
		      const CharsetInfo &docCharset,
		      Boolean internalCharsetIsDocCharset,
		      Char replacementChar,
		      InputSourceOrigin *origin,
		      unsigned flags);
  void pushCharRef(Char, const NamedCharRef &);
  ~ExternalInputSource();
private:
  Xchar fill(Messenger &);
  Boolean rewind(Messenger &);
  void willNotRewind();
  void setDocCharset(const CharsetInfo &, const CharsetInfo &);
  void willNotSetDocCharset();

  void init();
  void noteRS();
  void noteRSAt(const Char *);
  void reallocateBuffer(size_t size);
  void insertChar(Char);
  void buildMap(const CharsetInfo &internalCharset,
		const CharsetInfo &docCharset);
  void buildMap1(const CharsetInfo &, const CharsetInfo &);
  static const Char *findNextCr(const Char *start, const Char *end);
  static const Char *findNextLf(const Char *start, const Char *end);
  static const Char *findNextCrOrLf(const Char *start, const Char *end);

  ExternalInfoImpl *info_;
  Char *buf_;
  const Char *bufLim_;
  Offset bufLimOffset_;
  size_t bufSize_;
  size_t readSize_;
  NCVector<Owner<StorageObject> > sov_;
  StorageObject *so_;
  size_t soIndex_;
  Boolean insertRS_;
  Decoder *decoder_;
  const char *leftOver_;
  size_t nLeftOver_;
  Boolean mayRewind_;
  Boolean maySetDocCharset_;
  Boolean mayNotExist_;
  enum RecordType {
    unknown,
    crUnknown,
    crlf,
    lf,
    cr,
    asis
    };
  RecordType recordType_;
  Boolean zapEof_;
  Boolean internalCharsetIsDocCharset_;
  Char replacementChar_;
  Ptr<CharMapResource<Unsigned32> > map_;
};

class FSIParser {
public:
  FSIParser(const StringC &, const CharsetInfo &idCharset,
	    Boolean isNdata,
	    const StorageObjectLocation *defLoc,
	    const EntityManagerImpl *em,
	    Messenger &mgr);
  Boolean parse(ParsedSystemId &parsedSysid);
  static const char *recordsName(StorageObjectSpec::Records records);
  struct RecordType {
    const char *name;
    StorageObjectSpec::Records value;
  };
private:
  Boolean handleInformal(size_t startIndex, ParsedSystemId &parsedSysid);
  Boolean convertId(StringC &, Xchar smcrd, const StorageManager *);
  Xchar get();
  void unget();
  StorageManager *lookupStorageType(const StringC &key, Boolean &neutral);
  Boolean matchKey(const StringC &, const char *);
  Boolean matchChar(Xchar, char);
  Boolean isS(Xchar);
  Boolean convertDigit(Xchar c, int &weight);
  void uncharref(StringC &);
  Boolean setAttributes(StorageObjectSpec &sos, Boolean neutral,
			Xchar &smcrd, Boolean &fold);
  Boolean setCatalogAttributes(ParsedSystemId &parsedSysid);
  void setDefaults(StorageObjectSpec &sos);
  Boolean parseAttribute(StringC &token, Boolean &gotValue, StringC &value);
  Boolean lookupRecords(const StringC &token, StorageObjectSpec::Records &);
  void convertMinimumLiteral(const StringC &from, StringC &to);

  const StringC &str_;
  size_t strIndex_;
  Messenger &mgr_;
  const EntityManagerImpl *em_;
  const StorageObjectSpec *defSpec_;
  const StringC *defId_;
  const CharsetInfo &idCharset_;
  Boolean isNdata_;
  static RecordType recordTypeTable[];
};

const Char RS = '\n';
const Char RE = '\r';

ExtendEntityManager::CatalogManager::~CatalogManager()
{
}

ExtendEntityManager *ExtendEntityManager::make(StorageManager *sm,
					       const InputCodingSystem *cs,
					       const ConstPtr<InputCodingSystemKit> &csKit,
					       Boolean internalCharsetIsDocCharset)
{
  return new EntityManagerImpl(sm, cs, csKit, internalCharsetIsDocCharset);
}

Boolean ExtendEntityManager::externalize(const ExternalInfo *info,
					 Offset off,
					 StorageObjectLocation &loc)
{
  if (!info)
    return false;
  const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info);
  if (!p)
    return false;
  return p->convertOffset(off, loc);
}

const ParsedSystemId *
ExtendEntityManager::externalInfoParsedSystemId(const ExternalInfo *info)
{
  if (!info)
    return 0;
  const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info);
  if (!p)
    return 0;
  return &p->parsedSystemId();
}

EntityManagerImpl::EntityManagerImpl(StorageManager *defaultStorageManager,
				     const InputCodingSystem *defaultCodingSystem,
				     const ConstPtr<InputCodingSystemKit> &codingSystemKit,
				     Boolean internalCharsetIsDocCharset)
: defaultStorageManager_(defaultStorageManager),
  defaultCodingSystem_(defaultCodingSystem),
  codingSystemKit_(codingSystemKit),
  internalCharsetIsDocCharset_(internalCharsetIsDocCharset)
{
}

Boolean EntityManagerImpl::internalCharsetIsDocCharset() const
{
  return internalCharsetIsDocCharset_;
}

const CharsetInfo &EntityManagerImpl::charset() const
{
  return codingSystemKit_->systemCharset();
}

InputSource *EntityManagerImpl::open(const StringC &sysid,
				     const CharsetInfo &docCharset,
				     InputSourceOrigin *origin,
				     unsigned flags,
				     Messenger &mgr)
{
  ParsedSystemId parsedSysid;
  if (!parseSystemId(sysid, docCharset, (flags & ExtendEntityManager::isNdata) != 0,
		     0, mgr, parsedSysid)
      || !catalogManager_->mapCatalog(parsedSysid, this, mgr))
    return 0;
  return new ExternalInputSource(parsedSysid,
				 charset(),
				 docCharset,
				 internalCharsetIsDocCharset_,
				 codingSystemKit_->replacementChar(),
				 origin, flags);
}


ConstPtr<EntityCatalog>
EntityManagerImpl::makeCatalog(StringC &systemId,
			       const CharsetInfo &docCharset,
			       Messenger &mgr)
{
  return catalogManager_->makeCatalog(systemId, docCharset, this, mgr);
}

Boolean
EntityManagerImpl::mergeSystemIds(const Vector<StringC> &sysids,
				  Boolean mapCatalogDocument,
				  const CharsetInfo &docCharset,
				  Messenger &mgr,
				  StringC &result) const
{
  ParsedSystemId parsedSysid;
  if (mapCatalogDocument) {
    parsedSysid.maps.resize(parsedSysid.maps.size() + 1);
    parsedSysid.maps.back().type = ParsedSystemId::Map::catalogDocument;
  }
  for (size_t i = 0; i < sysids.size(); i++)
    if (!parseSystemId(sysids[i],
		       docCharset,
		       0,
		       0,
		       mgr,
		       parsedSysid))
      return 0;
  parsedSysid.unparse(internalCharset(docCharset), 0, result);
  return 1;
}

Boolean
EntityManagerImpl::expandSystemId(const StringC &str,
				  const Location &defLoc,
				  Boolean LOCALisNdata,
				  const CharsetInfo &docCharset,
				  const StringC *mapCatalogPublic,
				  Messenger &mgr,
				  StringC &result)
{
  ParsedSystemId parsedSysid;
  StorageObjectLocation defSoLoc;
  const StorageObjectLocation *defSoLocP;
  if (defLocation(defLoc, defSoLoc))
    defSoLocP = &defSoLoc;
  else
    defSoLocP = 0;
  if (!parseSystemId(str, docCharset, LOCALisNdata, defSoLocP, mgr, parsedSysid))
    return 0;
  if (mapCatalogPublic) {
    ParsedSystemId::Map map;
    map.type = ParsedSystemId::Map::catalogPublic;
    map.publicId = *mapCatalogPublic;
    parsedSysid.maps.insert(parsedSysid.maps.begin(), 1, map);
  }
  parsedSysid.unparse(internalCharset(docCharset), LOCALisNdata, result);
  return 1;
}

Boolean EntityManagerImpl::parseSystemId(const StringC &str,
					 const CharsetInfo &docCharset,
					 Boolean LOCALisNdata,
					 const StorageObjectLocation *defLoc,
					 Messenger &mgr,
					 ParsedSystemId &parsedSysid) const
{
  FSIParser fsiParser(str, internalCharset(docCharset), LOCALisNdata, defLoc, this, mgr);
  return fsiParser.parse(parsedSysid);
}

StorageManager *
EntityManagerImpl::guessStorageType(const StringC &type,
				    const CharsetInfo &internalCharset) const
{
  for (size_t i = 0; i < storageManagers_.size(); i++)
    if (storageManagers_[i]->guessIsId(type, internalCharset))
      return storageManagers_[i].pointer();
  if (defaultStorageManager_->guessIsId(type, internalCharset))
    return defaultStorageManager_.pointer();
  return 0;
}

StorageManager *
EntityManagerImpl::lookupStorageType(const StringC &type,
				     const CharsetInfo &internalCharset) const
{
  if (type.size() == 0)
    return 0;
  if (matchKey(type, defaultStorageManager_->type(), internalCharset))
    return defaultStorageManager_.pointer();
  for (size_t i = 0; i < storageManagers_.size(); i++)
    if (matchKey(type, storageManagers_[i]->type(), internalCharset))
      return storageManagers_[i].pointer();
  return 0;
}

StorageManager *
EntityManagerImpl::lookupStorageType(const char *type) const
{
  if (type == defaultStorageManager_->type())
    return defaultStorageManager_.pointer();
  for (size_t i = 0; i < storageManagers_.size(); i++)
    if (type == storageManagers_[i]->type())
      return storageManagers_[i].pointer();
  return 0;
}

const InputCodingSystem *
EntityManagerImpl::lookupCodingSystem(const StringC &type,
				      const CharsetInfo &internalCharset,
				      Boolean isBctf,
				      const char *&name) const
{
  return codingSystemKit_->makeInputCodingSystem(type, internalCharset, isBctf, name);
}

Boolean
EntityManagerImpl::matchKey(const StringC &type,
			    const char *s,
			    const CharsetInfo &internalCharset)
{
  if (strlen(s) != type.size())
    return false;
  for (size_t i = 0; i < type.size(); i++)
    if (internalCharset.execToDesc(toupper(s[i])) != type[i]
	&& internalCharset.execToDesc(tolower(s[i])) != type[i])
      return false;
  return true;
}

void EntityManagerImpl::registerStorageManager(StorageManager *sm)
{
  storageManagers_.resize(storageManagers_.size() + 1);
  storageManagers_.back() = sm;
}

void EntityManagerImpl::setCatalogManager(CatalogManager *catalogManager)
{
  catalogManager_ = catalogManager;
}

Boolean
EntityManagerImpl::defLocation(const Location &defLocation,
			       StorageObjectLocation &soLoc)
{
  Offset off;
  const ExternalInfo *info;
  const Origin *origin = defLocation.origin().pointer();
  Index index = defLocation.index();
  for (;;) {
    if (!origin)
      return 0;
    const InputSourceOrigin *inputSourceOrigin = origin->asInputSourceOrigin();
    if (inputSourceOrigin) {
      off = inputSourceOrigin->startOffset(index);
      info = inputSourceOrigin->externalInfo();
      if (info)
	break;
      if (!inputSourceOrigin->defLocation(off, origin, index))
	return 0;
    }
    else {
      const Location &parentLoc = origin->parent();
      origin = parentLoc.origin().pointer();
      index = parentLoc.index();
    }
  }
  return ExtendEntityManager::externalize(info, off, soLoc);
}

class UnbufferingStorageObject : public StorageObject {
public:
  UnbufferingStorageObject(StorageObject *sub,
			   const Boolean *unbuffer)
    : sub_(sub), buf_(0), bufAvail_(0), bufNext_(0), unbuffer_(unbuffer) { }
  ~UnbufferingStorageObject() { delete [] buf_; }
  Boolean read(char *buf, size_t bufSize, Messenger &mgr,
               size_t &nread) {
    if (bufNext_ >= bufAvail_) {
      bufAvail_ = bufNext_ = 0;
      if (!*unbuffer_)
	return sub_->read(buf, bufSize, mgr, nread);
      if (buf_ == 0)
	buf_ = new char[bufSize_ = bufSize];
      if (!sub_->read(buf_, bufSize_, mgr, bufAvail_))
	return 0;
    }
    *buf = buf_[bufNext_++];
    nread = 1;
    return 1;
  }
  Boolean rewind(Messenger &mgr) {
    bufAvail_ = bufNext_ = 0;
    return sub_->rewind(mgr);
  }
  void willNotRewind() { sub_->willNotRewind(); }
  size_t getBlockSize() const { return sub_->getBlockSize(); }
private:
  Owner<StorageObject> sub_;
  size_t bufSize_;
  size_t bufAvail_;
  size_t bufNext_;
  char *buf_;
  const Boolean *unbuffer_;
};

class MappingDecoder : public Decoder {
public:
  MappingDecoder(Decoder *,
		 const ConstPtr<CharMapResource<Unsigned32> > &);
  Boolean convertOffset(unsigned long &offset) const;
  size_t decode(Char *, const char *, size_t, const char **);
private:
  Owner<Decoder> sub_;
  ConstPtr<CharMapResource<Unsigned32> > map_;
};

MappingDecoder::MappingDecoder(Decoder *sub,
			       const ConstPtr<CharMapResource<Unsigned32> > &map)
: Decoder(sub->minBytesPerChar()), sub_(sub), map_(map)
{
}

size_t MappingDecoder::decode(Char *to, const char *s,
			      size_t slen, const char **rest)
{
  size_t n = sub_->decode(to, s, slen, rest);
  const CharMap<Unsigned32> &map = *map_;
  for (size_t i = 0; i < n; i++) {
    Unsigned32 d = map[to[i]];
    if (d & (unsigned(1) << 31))
      to[i] = (d & ~(unsigned(1) << 31));
    else
      to[i] += d;
  }
  return n;
}

Boolean MappingDecoder::convertOffset(unsigned long &offset) const
{
  return sub_->convertOffset(offset);
}
  
ExternalInputSource::ExternalInputSource(ParsedSystemId &parsedSysid,
					 const CharsetInfo &systemCharset,
					 const CharsetInfo &docCharset,
					 Boolean internalCharsetIsDocCharset,
					 Char replacementChar,
					 InputSourceOrigin *origin,
					 unsigned flags)
: InputSource(origin, 0, 0),
  mayRewind_((flags & EntityManager::mayRewind) != 0),
  mayNotExist_((flags & ExtendEntityManager::mayNotExist) != 0),
  sov_(parsedSysid.size()),
  internalCharsetIsDocCharset_(internalCharsetIsDocCharset),
  // hack
  maySetDocCharset_((flags & EntityManager::maySetDocCharset) != 0),
  replacementChar_(replacementChar)
{
  for (size_t i = 0; i < parsedSysid.size(); i++) {
    if (parsedSysid[i].codingSystemType
        != (internalCharsetIsDocCharset
	    ? StorageObjectSpec::bctf
	    : StorageObjectSpec::encoding)
	&& parsedSysid[i].codingSystemType != StorageObjectSpec::special) {
      map_ = new CharMapResource<Unsigned32>;
      buildMap(systemCharset, docCharset);
      break;
    }
  }
  for (size_t i = 0; i < sov_.size(); i++)
    sov_[i] = 0;
  init();
  info_ = new ExternalInfoImpl(parsedSysid);
  origin->setExternalInfo(info_);
}

void ExternalInputSource::setDocCharset(const CharsetInfo &docCharset,
					const CharsetInfo &systemCharset)
{
  if (!map_.isNull())
    buildMap(systemCharset, docCharset);
  willNotSetDocCharset();
}

void ExternalInputSource::willNotSetDocCharset()
{
  maySetDocCharset_ = 0;
}

void ExternalInputSource::buildMap(const CharsetInfo &systemCharset,
				   const CharsetInfo &docCharset)
{
  CharMap<Unsigned32> &map = *map_;
  // FIXME How should invalidChar be chosen when internalCharsetIsDocCharset_?
  Char invalidChar
    = internalCharsetIsDocCharset_ ? 0 : replacementChar_;
  map.setAll((Unsigned32(1) << 31) | invalidChar);
  if (internalCharsetIsDocCharset_)
    buildMap1(systemCharset, docCharset);
  else
    buildMap1(docCharset, systemCharset);
}

void ExternalInputSource::buildMap1(const CharsetInfo &fromCharset,
				    const CharsetInfo &toCharset)
{
  UnivCharsetDescIter iter(fromCharset.desc());
  for (;;) {
    WideChar descMin, descMax;
    UnivChar univMin;
    if (!iter.next(descMin, descMax, univMin))
      break;
    if (descMin > charMax)
      break;
    if (descMax > charMax)
      descMax = charMax;
    WideChar totalCount = 1 + (descMax - descMin);
    do {
      WideChar count;
      WideChar toMin;
      ISet<WideChar> set;
      int nMap = toCharset.univToDesc(univMin, toMin, set, count);
      if (count > totalCount)
	count = totalCount;
      if (nMap && toMin <= charMax) {
	Char toMax;
	if (count - 1 > charMax - toMin)
	  toMax = charMax;
	else
	  toMax = toMin + (count - 1);
	map_->setRange(descMin, descMin + (toMax - toMin), Char(toMin - descMin));
      }
      descMin += count;
      univMin += count;
      totalCount -= count;
    } while (totalCount > 0);
  }
}

void ExternalInputSource::init()
{
  so_ = 0;
  buf_ = 0;
  bufSize_ = 0;
  bufLim_ = 0;
  bufLimOffset_ = 0;
  insertRS_ = true;
  soIndex_ = 0;
  leftOver_ = 0;
  nLeftOver_ = 0;  
}

ExternalInputSource::~ExternalInputSource()
{
  if (buf_)
    delete [] buf_;
}

Boolean ExternalInputSource::rewind(Messenger &mgr)
{
  reset(0, 0);
  if (buf_)
    delete [] buf_;
  // reset makes a new EntityOrigin
  ParsedSystemId parsedSysid(info_->parsedSystemId());
  ExternalInfoImpl *oldInfo = info_;
  info_ = new ExternalInfoImpl(parsedSysid);
  so_ = 0;
  for (size_t i = 0; i < soIndex_; i++) {
    if (sov_[i] && !sov_[i]->rewind(mgr))
      return 0;
    StringC tem;
    oldInfo->getId(i, tem);
    info_->setId(i, tem);
  }
  inputSourceOrigin()->setExternalInfo(info_);
  init();
  return 1;
}

void ExternalInputSource::willNotRewind()
{
  for (size_t i = 0; i < sov_.size(); i++)
    if (sov_[i])
      sov_[i]->willNotRewind();
  mayRewind_ = 0;
}


// Round up N so that it is a power of TO.
// TO must be a power of 2.

inline
size_t roundUp(size_t n, size_t to)
{
  return (n + (to - 1)) & ~(to - 1);
}

inline
void ExternalInputSource::noteRSAt(const Char *p)
{
  info_->noteRS(bufLimOffset_ - (bufLim_ - p));
}

inline
void ExternalInputSource::noteRS()
{
  noteRSAt(cur());
}

Xchar ExternalInputSource::fill(Messenger &mgr)
{
  ASSERT(cur() == end());
  while (end() >= bufLim_) {
    // need more data
    while (so_ == 0) {
      if (soIndex_ >= sov_.size())
	return eE;
      if (soIndex_ > 0)
	info_->noteStorageObjectEnd(bufLimOffset_ - (bufLim_ - end()));
      const StorageObjectSpec &spec = info_->spec(soIndex_);
      if (!sov_[soIndex_]) {
	StringC id;
	if (mayNotExist_) {
	  NullMessenger nullMgr;
	  sov_[soIndex_]
	    = spec.storageManager->makeStorageObject(spec.specId, spec.baseId,
						     spec.search,
						     mayRewind_, nullMgr, id);
	}
	else
	  sov_[soIndex_]
	    = spec.storageManager->makeStorageObject(spec.specId, spec.baseId,
						     spec.search,
						     mayRewind_, mgr, id);
	info_->setId(soIndex_, id);
      }
      so_ = sov_[soIndex_].pointer();
      if (so_) {
	decoder_ = spec.codingSystem->makeDecoder();
	if (spec.codingSystemType != StorageObjectSpec::special
	    && spec.codingSystemType != (internalCharsetIsDocCharset_ 
					 ? StorageObjectSpec::bctf
					 : StorageObjectSpec::encoding)) {
	  decoder_ = new MappingDecoder(decoder_, map_);
	  if (maySetDocCharset_) {
	    sov_[soIndex_] = new UnbufferingStorageObject(sov_[soIndex_].extract(), &maySetDocCharset_);
	    so_ = sov_[soIndex_].pointer();
	  }
	}
	info_->setDecoder(soIndex_, decoder_);
	zapEof_ = spec.zapEof;
	switch (spec.records) {
	case StorageObjectSpec::asis:
	  recordType_ = asis;
	  insertRS_ = false;
	  break;
	case StorageObjectSpec::cr:
	  recordType_ = cr;
	  break;
	case StorageObjectSpec::lf:
	  recordType_ = lf;
	  break;
	case StorageObjectSpec::crlf:
	  recordType_ = crlf;
	  break;
	case StorageObjectSpec::find:
	  recordType_ = unknown;
	  break;
	default:
	  CANNOT_HAPPEN();
	}
	soIndex_++;
	readSize_ = so_->getBlockSize();
	nLeftOver_ = 0;
	break;
      }
      else
	setAccessError();
      soIndex_++;
    }

    size_t keepSize = end() - start();
    const size_t align = sizeof(int)/sizeof(Char);
    size_t readSizeChars = (readSize_ + (sizeof(Char) - 1))/sizeof(Char);
    readSizeChars = roundUp(readSizeChars, align);
    size_t neededSize;		// in Chars
    size_t startOffset;
    // compute neededSize and readSize
    unsigned minBytesPerChar = decoder_->minBytesPerChar();
    if (nLeftOver_ == 0 && minBytesPerChar >= sizeof(Char)) {
      // In this case we want to do decoding in place.
      // FIXME It might be a win on some systems (Irix?) to arrange that the
      // read buffer is on a page boundary.

      if (keepSize >= size_t(-1)/sizeof(Char) - (align - 1) - insertRS_)
	abort();			// FIXME throw an exception
      
      // Now size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize > 0
      if (readSizeChars
	  > size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize)
	abort();
      neededSize = roundUp(readSizeChars + keepSize + insertRS_, align);
      startOffset = ((neededSize > bufSize_ ? neededSize : bufSize_)
		     - readSizeChars - insertRS_ - keepSize);
    }
    else {
      // Needs to be room for everything before decoding.
      neededSize = (keepSize + insertRS_ + readSizeChars
		    + (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char));
      // Also must be room for everything after decoding.
      size_t neededSize2
	= (keepSize + insertRS_
	   // all the converted characters
	   + (nLeftOver_ + readSize_)/minBytesPerChar
	   // enough Chars to contain left over bytes
	   + ((readSize_ % minBytesPerChar + sizeof(Char) - 1)
	      / sizeof(Char)));
      if (neededSize2 > neededSize)
	neededSize = neededSize2;
      neededSize = roundUp(neededSize, align);
      if (neededSize > size_t(-1)/sizeof(Char))
	abort();
      startOffset = 0;
    }
    if (bufSize_ < neededSize)
      reallocateBuffer(neededSize);
    Char *newStart = buf_ + startOffset;
    if (newStart != start() && keepSize > 0)
      memmove(newStart, start(), keepSize*sizeof(Char));
    char *bytesStart = (char *)(buf_ + bufSize_ - readSizeChars) - nLeftOver_;
    if (nLeftOver_ > 0 && leftOver_ != bytesStart)
      memmove(bytesStart, leftOver_, nLeftOver_);
    moveStart(newStart);
    bufLim_ = end();

    size_t nread;
    if (so_->read((char *)(buf_ + bufSize_ - readSizeChars), readSize_,
		  mgr, nread)) {
      if (nread > 0) {
	const char *bytesEnd = bytesStart + nLeftOver_ + nread;
	size_t nChars = decoder_->decode((Char *)end() + insertRS_,
					 bytesStart,
					 nLeftOver_ + nread
					 - (zapEof_ && bytesEnd[-1] == EOFCHAR),
					 &leftOver_);
	nLeftOver_ = bytesEnd - leftOver_;
	if (nChars > 0) {
	  if (insertRS_) {
	    noteRS();
	    *(Char *)end() = RS;
	    advanceEnd(end() + 1);
	    insertRS_ = false;
	    bufLim_ += 1;
	    bufLimOffset_ += 1;
	  }
	  bufLim_ += nChars;
	  bufLimOffset_ += nChars;
	  break;
	}
      }
    }
    else
      so_ = 0;
  }
  ASSERT(end() < bufLim_);
  if (insertRS_) {
    noteRS();
    insertChar(RS);
    insertRS_ = false;
    bufLimOffset_ += 1;
  }
  switch (recordType_) {
  case unknown:
    {
      const Char *e = findNextCrOrLf(end(), bufLim_);
      if (e) {
	if (*e == '\n') {
	  recordType_ = lf;
	  info_->noteInsertedRSs();
	  *(Char *)e = RE;
	  advanceEnd(e + 1);
	  insertRS_ = true;
	}
	else {
	  if (e + 1 < bufLim_) {
	    if (e[1] == '\n') {
	      recordType_ = crlf;
	      advanceEnd(e + 1);
	      if (e + 2 == bufLim_) {
		bufLim_--;
		bufLimOffset_--;
		insertRS_ = true;
	      }
	    }
	    else {
	      advanceEnd(e + 1);
	      recordType_ = cr;
	      info_->noteInsertedRSs();
	      insertRS_ = true;
	    }
	  }
	  else {
	    recordType_ = crUnknown;
	    advanceEnd(e + 1);
	  }
	}
      }
      else
	advanceEnd(bufLim_);
    }
    break;
  case crUnknown:
    {
      if (*cur() == '\n') {
	noteRS();
	advanceEnd(cur() + 1);
	recordType_ = crlf;
      }
      else {
	advanceEnd(cur() + 1);
	insertRS_ = true;
	recordType_ = cr;
	info_->noteInsertedRSs();
      }
    }
    break;
  case lf:
    {
      Char *e = (Char *)findNextLf(end(), bufLim_);
      if (e) {
	advanceEnd(e + 1);
	*e = RE;
	insertRS_ = true;
      }
      else
	advanceEnd(bufLim_);
    }
    break;
  case cr:
    {
      const Char *e = findNextCr(end(), bufLim_);
      if (e) {
	advanceEnd(e + 1);
	insertRS_ = true;
      }
      else
	advanceEnd(bufLim_);
    }
    break;
  case crlf:
    {
      const Char *e = end();
      for (;;) {
	e = findNextLf(e, bufLim_);
	if (!e) {
	  advanceEnd(bufLim_);
	  break;
	}
	// Need to delete final RS if not followed by anything.
	if (e + 1 == bufLim_) {
	  bufLim_--;
	  bufLimOffset_--;
	  advanceEnd(e);
	  insertRS_ = true;
	  if (cur() == end())
	    return fill(mgr);
	  break;
	}
	noteRSAt(e);
	e++;
      }
    }
    break;
  case asis:
    advanceEnd(bufLim_);
    break;
  default:
    CANNOT_HAPPEN();
  }
  ASSERT(cur() < end());
  return nextChar();
}

const Char *ExternalInputSource::findNextCr(const Char *start,
					    const Char *end)
{
  for (; start < end; start++)
    if (*start == '\r')
      return start;
  return 0;
}

const Char *ExternalInputSource::findNextLf(const Char *start,
					    const Char *end)
{
  for (; start < end; start++)
    if (*start == '\n')
      return start;
  return 0;
}

const Char *ExternalInputSource::findNextCrOrLf(const Char *start,
						const Char *end)
{
  for (; start < end; start++)
    if (*start == '\n' || *start == '\r')
      return start;
  return 0;
}

void ExternalInputSource::pushCharRef(Char ch, const NamedCharRef &ref)
{
  ASSERT(cur() == start());
  noteCharRef(startIndex() + (cur() - start()), ref);
  insertChar(ch);
}

void ExternalInputSource::insertChar(Char ch)
{
  if (start() > buf_) {
    if (cur() > start())
      memmove((Char *)start() - 1, start(), (cur() - start())*sizeof(Char));
    moveLeft();
    *(Char *)cur() = ch;
  }
  else {
    // must have start == buf
    if (buf_ + (bufSize_ - (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char))
	== bufLim_) {
      if (bufSize_ == size_t(-1))
	abort();		// FIXME throw an exception
      reallocateBuffer(bufSize_ + 1);
    }
    else if (nLeftOver_ > 0 && ((char *)(bufLim_ + 1) > leftOver_)) {
      char *s = (char *)(buf_ + bufSize_) - nLeftOver_;
      memmove(s, leftOver_, nLeftOver_);
      leftOver_ = s;
    }
    if (cur() < bufLim_)
      memmove((Char *)cur() + 1, cur(), (bufLim_ - cur())*sizeof(Char));
    *(Char *)cur() = ch;
    advanceEnd(end() + 1);
    bufLim_ += 1;
  }
}

void ExternalInputSource::reallocateBuffer(size_t newSize)
{
  Char *newBuf = new Char[newSize];
  
  memcpy(newBuf, buf_, bufSize_*sizeof(Char));
  bufSize_ = newSize;
  changeBuffer(newBuf, buf_);
  bufLim_ = newBuf + (bufLim_ - buf_);
  if (nLeftOver_ > 0) {
    char *s = (char *)(newBuf + bufSize_) - nLeftOver_;
    memmove(s,
	    (char *)newBuf + (leftOver_ - (char *)buf_),
	    nLeftOver_);
    leftOver_ = s;
  }
  delete [] buf_;
  buf_ = newBuf;
}

RTTI_DEF1(ExternalInfoImpl, ExternalInfo)

ExternalInfoImpl::ExternalInfoImpl(ParsedSystemId &parsedSysid)
: currentIndex_(0), position_(parsedSysid.size())
{
  parsedSysid.swap(parsedSysid_);
  if (parsedSysid_.size() > 0)
    notrack_ = parsedSysid_[0].notrack;
}

void ExternalInfoImpl::setId(size_t i, StringC &id)
{
  Mutex::Lock lock(&mutex_);
  id.swap(position_[i].id);
}

void ExternalInfoImpl::getId(size_t i, StringC &id) const
{
  Mutex::Lock lock(&((ExternalInfoImpl *)this)->mutex_);
  id = position_[i].id;
}

void ExternalInfoImpl::setDecoder(size_t i, Decoder *decoder)
{
  Mutex::Lock lock(&mutex_);
  position_[i].decoder = decoder;
}

void ExternalInfoImpl::noteInsertedRSs()
{
  position_[currentIndex_].insertedRSs = 1;
}

void ExternalInfoImpl::noteRS(Offset offset)
{
  // We do the locking in OffsetOrderedList.
  if (!notrack_)
    rsList_.append(offset);
  if (offset
      == (currentIndex_ == 0 ? 0 : position_[currentIndex_- 1].endOffset))
    position_[currentIndex_].startsWithRS = 1;
}

void ExternalInfoImpl::noteStorageObjectEnd(Offset offset)
{
  Mutex::Lock lock(&mutex_);
  ASSERT(currentIndex_ < position_.size());
  // The last endOffset_ must be -1.
  if (currentIndex_ < position_.size() - 1) {
    position_[currentIndex_++].endOffset = offset;
    position_[currentIndex_].line1RS = rsList_.size();
    notrack_ = parsedSysid_[currentIndex_].notrack;
  }
}

Boolean ExternalInfoImpl::convertOffset(Offset off,
					StorageObjectLocation &ret) const
{
  Mutex::Lock lock(&((ExternalInfoImpl *)this)->mutex_);
  if (off == Offset(-1) || position_.size() == 0)
    return false;
  // the last endOffset_ is Offset(-1), so this will
  // terminate
  int i;
  for (i = 0; off >= position_[i].endOffset; i++)
    ;
  for (; position_[i].id.size() == 0; i--)
    if (i == 0)
      return false;
  ret.storageObjectSpec = &parsedSysid_[i];
  ret.actualStorageId = position_[i].id;
  Offset startOffset = i == 0 ? 0 : position_[i - 1].endOffset;
  ret.storageObjectOffset = off - startOffset;
  ret.byteIndex = ret.storageObjectOffset;
  if (parsedSysid_[i].notrack
      || parsedSysid_[i].records == StorageObjectSpec::asis) {
    ret.lineNumber = (unsigned long)-1;
    if (parsedSysid_[i].records != StorageObjectSpec::asis) {
      if (position_[i].insertedRSs)
	ret.byteIndex = (unsigned long)-1;
      else if (ret.byteIndex > 0 && position_[i].startsWithRS)
	ret.byteIndex--;	// first RS is inserted
    }
    ret.columnNumber = (unsigned long)-1;
    return true;
  }
  else {
    size_t line1RS = position_[i].line1RS;
    // line1RS is now the number of RSs that are before or on the current line.
    size_t j;
    Offset colStart;
    if (rsList_.findPreceding(off, j, colStart)) {
      if (position_[i].insertedRSs)
	ret.byteIndex -= j + 1 - line1RS;
      else if (ret.byteIndex > 0 && position_[i].startsWithRS)
	ret.byteIndex--;	// first RS is inserted
      j++;
      colStart++;
    }
    else {
      j = 0;
      colStart = 0;
    }
    // j is now the number of RSs that are before or on the current line
    // colStart is the offset of the first column
    ret.lineNumber = j - line1RS + 1 - position_[i].startsWithRS;
    // the offset of the first column
    if (colStart < startOffset)
      colStart = startOffset;
    // the RS that starts a line will be in column 0;
    // the first real character of a line will be column 1
    ret.columnNumber = 1 + off - colStart;
  }
  if (!position_[i].decoder
      || !position_[i].decoder->convertOffset(ret.byteIndex))
    ret.byteIndex = (unsigned long)-1;
  return true;
}

const StorageObjectSpec &ExternalInfoImpl::spec(size_t i) const
{
  return parsedSysid_[i];
}

size_t ExternalInfoImpl::nSpecs() const
{
  return parsedSysid_.size();
}

const ParsedSystemId &ExternalInfoImpl::parsedSystemId() const
{
  return parsedSysid_;
}

StorageObjectSpec::StorageObjectSpec()
: storageManager(0), codingSystem(0), codingSystemName(0), notrack(0),
  records(find), zapEof(1), search(1)
{
}

StorageObjectPosition::StorageObjectPosition()
: endOffset(Offset(-1)), line1RS(0), startsWithRS(0), insertedRSs(0)
{
}

FSIParser::FSIParser(const StringC &str,
		     const CharsetInfo &idCharset,
		     Boolean isNdata,
		     const StorageObjectLocation *defLoc,
		     const EntityManagerImpl *em,
		     Messenger &mgr)
: str_(str),
  strIndex_(0),
  idCharset_(idCharset),
  isNdata_(isNdata),
  defSpec_(defLoc ? defLoc->storageObjectSpec : 0),
  defId_(defLoc ? &defLoc->actualStorageId : 0),
  em_(em),
  mgr_(mgr)
{
}

Xchar FSIParser::get()
{
  if (strIndex_ < str_.size())
    return str_[strIndex_++];
  else
    return -1;
}

void FSIParser::unget()
{
  if (strIndex_ > 0)
    strIndex_ -= 1;
}

Boolean FSIParser::matchKey(const StringC &str, const char *s)
{
  if (strlen(s) != str.size())
    return false;
  for (size_t i = 0; i < str.size(); i++)
    if (idCharset_.execToDesc(toupper(s[i])) != str[i]
	&& idCharset_.execToDesc(tolower(s[i])) != str[i])
      return false;
  return true;
}

Boolean FSIParser::matchChar(Xchar ch, char execC)
{
  return ch == idCharset_.execToDesc(execC);
}

Boolean FSIParser::isS(Xchar c)
{
  return (matchChar(c, ' ')
	  || matchChar(c, '\r')
	  || matchChar(c, '\n')
	  || matchChar(c, ' '));
}

Boolean FSIParser::convertDigit(Xchar c, int &weight)
{
  static const char digits[] = "0123456789";
  for (int i = 0; digits[i] != '\0'; i++)
    if (matchChar(c, digits[i])) {
      weight = i;
      return 1;
    }
  return 0;
}

Boolean FSIParser::parse(ParsedSystemId &parsedSysid)
{
  size_t startIndex = strIndex_;
  if (!matchChar(get(), '<'))
    return handleInformal(startIndex, parsedSysid);
  StringC key;
  for (;;) {
    Xchar c = get();
    if (c == -1)
      return handleInformal(startIndex, parsedSysid);
    if (isS(c) || matchChar(c, '>'))
      break;
    key += Char(c);
  }
  unget();
  if (matchKey(key, "CATALOG")) {
    if (!setCatalogAttributes(parsedSysid))
      return 0;
    return parse(parsedSysid);
  }
  Boolean neutral;
  StorageManager *sm = lookupStorageType(key, neutral);
  if (!sm)
    return handleInformal(startIndex, parsedSysid);
  for (;;) {
    parsedSysid.resize(parsedSysid.size() + 1);
    StorageObjectSpec &sos = parsedSysid.back();
    sos.storageManager = sm;
    Xchar smcrd;
    Boolean fold;
    if (!setAttributes(sos, neutral, smcrd, fold))
      return 0;
    sm = 0;
    StringC id;
    Boolean hadData = 0;
    for (;;) {
      Xchar c = get();
      if (c == -1)
	break;
      if (matchChar(c, '<')) {
	hadData = 1;
	Char stago = c;
	key.resize(0);
	for (;;) {
	  c = get();
	  if (c == -1) {
	    id += stago;
	    id += key;
	    break;
	  }
	  if (isS(c) || matchChar(c, '>')) {
	    unget();
	    sm = lookupStorageType(key, neutral);
	    if (!sm) {
	      id += stago;
	      id += key;
	    }
	    break;
	  }
	  key += c;
	}
	if (sm)
	  break;
      }
      else if (!((!hadData && matchChar(c, '\r')) // ignored RE
		 || matchChar(c, '\n') )) {	  // ignored RS
	hadData = 1;
	id += c;
      }
    }
    if (id.size() > 0 && matchChar(id[id.size() - 1], '\r'))
      id.resize(id.size() - 1);
    uncharref(id);
    id.swap(sos.specId);
    if (!convertId(sos.specId, smcrd, sos.storageManager))
      return 0;
    if (neutral) {
      if (!sos.storageManager->transformNeutral(sos.specId, fold, mgr_))
	return 0;
    }
    if (sos.storageManager->resolveRelative(sos.baseId, sos.specId,
					    sos.search))
      sos.baseId.resize(0);
    if (!sm)
      break;
  }
  return 1;
}

Boolean FSIParser::handleInformal(size_t index, ParsedSystemId &parsedSysid)
{
  parsedSysid.resize(parsedSysid.size() + 1);
  StorageObjectSpec &sos = parsedSysid.back();
  sos.specId.assign(str_.data() + index,
		    str_.size() - index);
  sos.storageManager = em_->guessStorageType(sos.specId, idCharset_);
  if (!sos.storageManager) {
    if (defSpec_ && defSpec_->storageManager->inheritable())
      sos.storageManager = defSpec_->storageManager;
    else
      sos.storageManager = em_->defaultStorageManager_.pointer();
  }
  setDefaults(sos);
  if (!convertId(sos.specId, -1, sos.storageManager))
    return 0;
  if (sos.storageManager->resolveRelative(sos.baseId, sos.specId, sos.search))
    sos.baseId.resize(0);
  return 1;
}

StorageManager *FSIParser::lookupStorageType(const StringC &key,
					     Boolean &neutral)
{
  if (matchKey(key, "NEUTRAL")) {
    neutral = 1;
    if (defSpec_ && defSpec_->storageManager->inheritable())
      return defSpec_->storageManager;
    else
      return em_->defaultStorageManager_.pointer();
  }
  else {
    StorageManager *sm = em_->lookupStorageType(key, idCharset_);
    if (sm)
      neutral = 0;
    return sm;
  }
}

Boolean FSIParser::setCatalogAttributes(ParsedSystemId &parsedSysid)
{
  Boolean hadPublic = 0;
  parsedSysid.maps.resize(parsedSysid.maps.size() + 1);
  parsedSysid.maps.back().type = ParsedSystemId::Map::catalogDocument;
  for (;;) {
    StringC token, value;
    Boolean gotValue;
    if (!parseAttribute(token, gotValue, value)) {
      mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_));
      return 0;
    }
    if (token.size() == 0)
      break;
    if (matchKey(token, "PUBLIC")) {
      if (hadPublic)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(idCharset_.execToDesc("PUBLIC")));
      else if (gotValue) {
	convertMinimumLiteral(value, parsedSysid.maps.back().publicId);
	parsedSysid.maps.back().type = ParsedSystemId::Map::catalogPublic;
      }
      else
	mgr_.message(EntityManagerMessages::fsiMissingValue,
		     StringMessageArg(token));
      hadPublic = 1;
    }
    else
      mgr_.message(gotValue
		   ? EntityManagerMessages::fsiUnsupportedAttribute
		   : EntityManagerMessages::fsiUnsupportedAttributeToken,
		   StringMessageArg(token));
  }
  return 1;
}

void FSIParser::convertMinimumLiteral(const StringC &from, StringC &to)
{
  // Do just enough to ensure it can be reparsed.
  to.resize(0);
  for (size_t i = 0; i < from.size(); i++) {
    Char c = from[i];
    if (matchChar(c, '"') || matchChar(c, '#'))
      mgr_.message(EntityManagerMessages::fsiLookupChar, NumberMessageArg(c));
    else if (matchChar(c, ' ')) {
      if (to.size() && to[to.size() - 1] != c)
	to += c;
    }
    else
      to += c;
  }
  if (to.size() && matchChar(to[to.size() - 1], ' '))
    to.resize(to.size() - 1);
}

// FIXME This should be table driven.

Boolean FSIParser::setAttributes(StorageObjectSpec &sos,
				 Boolean neutral,
				 Xchar &smcrd,
				 Boolean &fold)
{
  Boolean hadBctf = 0;
  Boolean hadEncoding = 0;
  Boolean hadTracking = 0;
  Boolean hadSmcrd = 0;
  smcrd = -1;
  fold = 1;
  Boolean hadRecords = 0;
  Boolean hadBase = 0;
  Boolean hadZapeof = 0;
  Boolean hadSearch = 0;
  Boolean hadFold = 0;
  StorageObjectSpec::Records records;
  setDefaults(sos);
  for (;;) {
    StringC token, value;
    Boolean gotValue;
    if (!parseAttribute(token, gotValue, value)) {
      mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_));
      return 0;
    }
    if (token.size() == 0)
      break;
    if (matchKey(token, "BCTF")) {
      if (sos.storageManager->requiredCodingSystem())
	mgr_.message(EntityManagerMessages::fsiBctfEncodingNotApplicable);
      else if (hadBctf)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(token));
      else if (hadEncoding)
	mgr_.message(EntityManagerMessages::fsiBctfAndEncoding);
      else if (gotValue) {
	const char *codingSystemName;
	const InputCodingSystem *codingSystem
	  = em_->lookupCodingSystem(value, idCharset_, 1, codingSystemName);
	if (codingSystem) {
	  sos.codingSystem = codingSystem;
	  sos.codingSystemName = codingSystemName;
	  sos.codingSystemType = StorageObjectSpec::bctf;
	}
	else if (matchKey(value, "SAME")) {
	  if (!isNdata_) {
	    if (defSpec_) {
	      sos.codingSystem = defSpec_->codingSystem;
	      sos.codingSystemName = defSpec_->codingSystemName;
	      sos.codingSystemType = defSpec_->codingSystemType;
	    }
	    else {
	      sos.codingSystem = em_->defaultCodingSystem_;
	      sos.codingSystemName = 0;
	      sos.codingSystemType = (em_->internalCharsetIsDocCharset_
				      ? StorageObjectSpec::bctf
				      : StorageObjectSpec::encoding);
	    }
	  }
	}
	else
	  mgr_.message(EntityManagerMessages::fsiUnknownBctf,
		       StringMessageArg(value));
      }
      else
	mgr_.message(EntityManagerMessages::fsiMissingValue,
		     StringMessageArg(token));
      hadBctf = 1;
    }
    else if (matchKey(token, "ENCODING")) {
      if (sos.storageManager->requiredCodingSystem())
	mgr_.message(EntityManagerMessages::fsiBctfEncodingNotApplicable);
      else if (hadEncoding)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(token));
      else if (hadBctf)
	mgr_.message(EntityManagerMessages::fsiBctfAndEncoding);
      else if (gotValue) {
	const char *codingSystemName;
	const InputCodingSystem *codingSystem
	  = em_->lookupCodingSystem(value, idCharset_, 0, codingSystemName);
	if (codingSystem) {
	  sos.codingSystem = codingSystem;
	  sos.codingSystemName = codingSystemName;
	  sos.codingSystemType = StorageObjectSpec::encoding;
	}
	else if (matchKey(value, "SAME")) {
	  if (!isNdata_) {
	    if (defSpec_) {
	      sos.codingSystem = defSpec_->codingSystem;
	      sos.codingSystemName = defSpec_->codingSystemName;
	      sos.codingSystemType = defSpec_->codingSystemType;
	    }
	    else {
	      sos.codingSystem = em_->defaultCodingSystem_;
	      sos.codingSystemName = 0;
	      sos.codingSystemType = (em_->internalCharsetIsDocCharset_
				      ? StorageObjectSpec::bctf
				      : StorageObjectSpec::encoding);
	    }
	  }
	}
	else
	  mgr_.message(EntityManagerMessages::fsiUnknownEncoding,
		       StringMessageArg(value));
      }
      else
	mgr_.message(EntityManagerMessages::fsiMissingValue,
		     StringMessageArg(token));
      hadEncoding = 1;
    }
    else if (matchKey(token, "TRACKING")) {
      if (hadTracking)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(token));
      else if (gotValue) {
	if (matchKey(value, "NOTRACK"))
	  sos.notrack = 1;
	else if (!matchKey(value, "TRACK"))
	  mgr_.message(EntityManagerMessages::fsiBadTracking,
		       StringMessageArg(value));
      }
      else
	mgr_.message(EntityManagerMessages::fsiMissingValue,
		     StringMessageArg(token));
      hadTracking = 1;
    }
    else if (matchKey(token, "ZAPEOF")) {
      if (sos.storageManager->requiredCodingSystem())
	mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable);
      else if (hadZapeof)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(token));
      else if (gotValue) {
	if (matchKey(value, "ZAPEOF"))
	  sos.zapEof = 1;
	else if (matchKey(value, "NOZAPEOF"))
	  sos.zapEof = 0;
	else
	  mgr_.message(EntityManagerMessages::fsiBadZapeof,
		       StringMessageArg(value));
      }
      else
	sos.zapEof = 1;
      hadZapeof = 1;
    }
    else if (matchKey(token, "NOZAPEOF")) {
      if (sos.storageManager->requiredCodingSystem())
	mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable);
      else if (hadZapeof)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(idCharset_.execToDesc("ZAPEOF")));
      else if (gotValue)
	mgr_.message(EntityManagerMessages::fsiValueAsName,
		     StringMessageArg(token));
      else
	sos.zapEof = 0;
      hadZapeof = 1;
    }
    else if (matchKey(token, "SEARCH")) {
      if (hadSearch)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(token));
      else if (gotValue) {
	if (matchKey(value, "SEARCH"))
	  sos.search = 1;
	else if (matchKey(value, "NOSEARCH"))
	  sos.search = 0;
	else
	  mgr_.message(EntityManagerMessages::fsiBadSearch,
		       StringMessageArg(value));
      }
      else
	sos.search = 1;
      hadSearch = 1;
    }
    else if (matchKey(token, "NOSEARCH")) {
      if (hadSearch)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(idCharset_.execToDesc("SEARCH")));
      else if (gotValue)
	mgr_.message(EntityManagerMessages::fsiValueAsName,
		     StringMessageArg(token));
      else
	sos.search = 0;
      hadSearch = 1;
    }
    else if (matchKey(token, "FOLD")) {
      if (!neutral)
	mgr_.message(EntityManagerMessages::fsiFoldNotNeutral);
      else if (hadFold)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(token));
      else if (gotValue) {
	if (matchKey(value, "FOLD"))
	  fold = 1;
	else if (matchKey(value, "NOFOLD"))
	  fold = 0;
	else
	  mgr_.message(EntityManagerMessages::fsiBadFold,
		       StringMessageArg(value));
      }
      else
	fold = 1;
      hadFold = 1;
    }
    else if (matchKey(token, "NOFOLD")) {
      if (!neutral)
	mgr_.message(EntityManagerMessages::fsiFoldNotNeutral);
      else if (hadFold)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(idCharset_.execToDesc("FOLD")));
      else if (gotValue)
	mgr_.message(EntityManagerMessages::fsiValueAsName,
		     StringMessageArg(token));
      else
	fold = 0;
      hadFold = 1;
    }
    else if (matchKey(token, "SMCRD")) {
      if (hadSmcrd)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(token));
      else if (gotValue) {
	if (value.size() == 0)
	  smcrd = -1;
	else if (value.size() == 1)
	  smcrd = value[0];
	else
	  mgr_.message(EntityManagerMessages::fsiBadSmcrd,
		       StringMessageArg(value));
      }
      else
	mgr_.message(EntityManagerMessages::fsiMissingValue,
		     StringMessageArg(token));
      hadSmcrd = 1;
    }
    else if (matchKey(token, "RECORDS")) {
      if (sos.storageManager->requiresCr())
	mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable);
      else if (hadRecords)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(token));
      else if (gotValue) {
	if (!lookupRecords(value, sos.records))
	  mgr_.message(EntityManagerMessages::fsiUnsupportedRecords,
		       StringMessageArg(value));
      }
      else
	mgr_.message(EntityManagerMessages::fsiMissingValue,
		     StringMessageArg(token));
      hadRecords = 1;
    }
    else if (matchKey(token, "SOIBASE")) {
      if (hadBase)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(token));
      else if (gotValue)
	value.swap(sos.baseId);
      else {
	mgr_.message(EntityManagerMessages::fsiMissingValue,
		     StringMessageArg(token));
	sos.baseId.resize(0);
      }
      hadBase = 1;
    }
    else if (lookupRecords(token, records)) {
      if (sos.storageManager->requiresCr())
	mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable);
      else if (hadRecords)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(idCharset_.execToDesc("RECORDS")));
      else if (!gotValue)
	sos.records = records;
      else
	mgr_.message(EntityManagerMessages::fsiValueAsName,
		     StringMessageArg(token));
      hadRecords = 1;
    }
    else if (matchKey(token, "NOTRACK")) {
      if (hadTracking)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(idCharset_.execToDesc("TRACKING")));
      else if (!gotValue)
	sos.notrack = 1;
      else
	mgr_.message(EntityManagerMessages::fsiValueAsName,
		     StringMessageArg(token));
      hadTracking = 1;
    }
    else if (matchKey(token, "TRACK")) {
      if (hadTracking)
	mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
		     StringMessageArg(idCharset_.execToDesc("TRACKING")));
      else if (gotValue)
	mgr_.message(EntityManagerMessages::fsiValueAsName,
		     StringMessageArg(token));
      hadTracking = 1;
    }
    else
      mgr_.message(gotValue
		   ? EntityManagerMessages::fsiUnsupportedAttribute
		   : EntityManagerMessages::fsiUnsupportedAttributeToken,
		   StringMessageArg(token));
  }
  if (hadBase && sos.baseId.size() > 0) {
    convertId(sos.baseId, smcrd, sos.storageManager);
    if (neutral) {
      if (!sos.storageManager->transformNeutral(sos.baseId, fold, mgr_))
	sos.baseId.resize(0);
    }
  }
  if (!hadZapeof && hadRecords && sos.records == StorageObjectSpec::asis)
    sos.zapEof = 0;
  return 1;
}

FSIParser::RecordType FSIParser::recordTypeTable[] = {
  { "FIND", StorageObjectSpec::find },
  { "ASIS", StorageObjectSpec::asis },
  { "CR", StorageObjectSpec::cr },
  { "LF", StorageObjectSpec::lf },
  { "CRLF", StorageObjectSpec::crlf }
};

const char *FSIParser::recordsName(StorageObjectSpec::Records records)
{
  for (size_t i = 0; i < SIZEOF(recordTypeTable); i++)
    if (records == recordTypeTable[i].value)
      return recordTypeTable[i].name;
  return 0;
}

Boolean FSIParser::lookupRecords(const StringC &token,
				 StorageObjectSpec::Records &result)
{
  for (size_t i = 0; i < SIZEOF(recordTypeTable); i++)
    if (matchKey(token, recordTypeTable[i].name)) {
      result = recordTypeTable[i].value;
      return 1;
    }
  return 0;
}

void FSIParser::setDefaults(StorageObjectSpec &sos)
{
  if (sos.storageManager->requiresCr())
    sos.records = StorageObjectSpec::cr;
  else if (isNdata_
	   || (defSpec_ && defSpec_->records == StorageObjectSpec::asis))
    sos.records = StorageObjectSpec::asis;
  if (isNdata_ || (defSpec_ && !defSpec_->zapEof))
    sos.zapEof = 0;
  if (defSpec_ && defSpec_->storageManager == sos.storageManager) {
    if (defId_)
      sos.baseId = *defId_;
    else {
      sos.baseId = defSpec_->specId;
      sos.storageManager->resolveRelative(defSpec_->baseId,
					  sos.baseId,
					  0);
    }
  }
  sos.codingSystem = sos.storageManager->requiredCodingSystem();
  if (sos.codingSystem) {
    sos.zapEof = 0;		// hack
    sos.codingSystemType = StorageObjectSpec::special;
  }
  else {
    sos.codingSystem = em_->defaultCodingSystem_;
    sos.codingSystemType
      = (em_->internalCharsetIsDocCharset_
         ? StorageObjectSpec::bctf
	 : StorageObjectSpec::encoding);
    if (isNdata_) {
      sos.codingSystem = em_->codingSystemKit_->identityInputCodingSystem();
      sos.codingSystemType = StorageObjectSpec::special;
    }
    else if (defSpec_) {
      sos.codingSystem = defSpec_->codingSystem;
      sos.codingSystemName = defSpec_->codingSystemName;
      sos.codingSystemType = defSpec_->codingSystemType;
    }
  }
}

Boolean FSIParser::parseAttribute(StringC &token, Boolean &gotValue,
				  StringC &value)
{
  Xchar c = get();
  while (isS(c))
    c = get();
  if (c == -1) {
    return 0;
  }
  token.resize(0);
  if (matchChar(c, '>'))
    return 1;
  if (matchChar(c, '"') || matchChar(c, '\'') || matchChar(c, '='))
    return 0;
  for (;;) {
    token += c;
    c = get();
    if (c == -1)
      return 0;
    if (isS(c))
      break;
    if (matchChar(c, '>') || matchChar(c, '='))
      break;
  }
  while (isS(c))
    c = get();
  if (c == -1)
    return 0;
  if (!matchChar(c, '=')) {
    unget();
    gotValue = 0;
    return 1;
  }
  gotValue = 1;
  value.resize(0);

  c = get();
  while (isS(c))
    c = get();
  if (matchChar(c, '>') || matchChar(c, '='))
    return 0;
  if (matchChar(c, '"') || matchChar(c, '\'')) {
    Char lit = c;
    for (;;) {
      Xchar c = get();
      if (c == lit)
	break;
      if (c == -1)
	return 0;
      if (matchChar(c, '\n'))
	;
      else if (matchChar(c, '\r') || matchChar(c, '\t'))
	value += idCharset_.execToDesc(' ');
      else
	value += c;
    }
    uncharref(value);
  }
  else {
    for (;;) {
      value += c;
      c = get();
      if (c == -1)
	return 0;
      if (isS(c))
	break;
      if (matchChar(c, '>') || matchChar(c, '=')) {
	unget();
	break;
      }
    }
  }
  return 1;
}

void FSIParser::uncharref(StringC &str)
{
  size_t j = 0;
  size_t i = 0;
  while (i < str.size()) {
    int digit;
    if (matchChar(str[i], '&')
	&& i + 2 < str.size()
	&& matchChar(str[i + 1], '#')
	&& convertDigit(str[i + 2], digit)) {
      unsigned long val = digit;
      i += 3;
      while (i < str.size() && convertDigit(str[i], digit)) {
	val = val*10 + digit;
	i++;
      }
      str[j++] = val;
      if (i < str.size() && matchChar(str[i], ';'))
	i++;
    }
    else
      str[j++] = str[i++];
  }
  str.resize(j);
}

Boolean FSIParser::convertId(StringC &id, Xchar smcrd,
			     const StorageManager *sm)
{
  const CharsetInfo *smCharset = sm->idCharset();
  StringC newId;
  size_t i = 0;
  while (i < id.size()) {
    UnivChar univ;
    WideChar wide;
    ISet<WideChar> wideSet;
    int digit;
    if (Xchar(id[i]) == smcrd
	&& i + 1 < id.size()
	&& convertDigit(id[i + 1], digit)) {
      i += 2;
      Char val = digit;
      while (i < id.size() && convertDigit(id[i], digit)) {
	val = val*10 + digit;
	i++;
      }
      newId += val;
      if (i < id.size() && matchChar(id[i], ';'))
	i++;
    }
    else if (smCharset) {
      if (!idCharset_.descToUniv(id[i++], univ))
	return 0;
      if (univ == UnivCharsetDesc::rs)
	;
      else if (univ == UnivCharsetDesc::re && sm->reString())
	newId += *sm->reString();
      else if (smCharset->univToDesc(univ, wide, wideSet) != 1
	       || wide > charMax)
	return 0;			// FIXME give error
      else
	newId += Char(wide);
    }
    else
      newId += id[i++];
  }
  newId.swap(id);
  return 1;
}

ParsedSystemId:: ParsedSystemId()
{
}

static
void unparseSoi(const StringC &soi,
		const CharsetInfo *idCharset,
		const CharsetInfo &resultCharset,
		StringC &result,
		Boolean &needSmcrd);

void ParsedSystemId::unparse(const CharsetInfo &resultCharset,
			     Boolean isNdata,
			     StringC &result) const
{
  size_t len = size();
  result.resize(0);
  size_t i;
  for (i = 0; i < maps.size(); i++) {
    if (maps[i].type == Map::catalogDocument)
      result += resultCharset.execToDesc("<CATALOG>");
    else if (maps[i].type == Map::catalogPublic) {
      result += resultCharset.execToDesc("<CATALOG PUBLIC=\"");
      result += maps[i].publicId;
      result += resultCharset.execToDesc("\">");
    }
  }
  for (i = 0; i < len; i++) {
    const StorageObjectSpec &sos = (*this)[i];
    result += resultCharset.execToDesc('<');
    result += resultCharset.execToDesc(sos.storageManager->type());
    if (sos.notrack)
      result += resultCharset.execToDesc(" NOTRACK");
    if (!sos.search)
      result += resultCharset.execToDesc(" NOSEARCH");
    if (!sos.storageManager->requiresCr()
        && sos.records != (isNdata ? StorageObjectSpec::asis : StorageObjectSpec::find)) {
      result += resultCharset.execToDesc(' ');
      result += resultCharset.execToDesc(FSIParser::recordsName(sos.records));
    }
    if (sos.codingSystemName && sos.codingSystemType != StorageObjectSpec::special) {
      if (!sos.zapEof)
	result += resultCharset.execToDesc(" NOZAPEOF");
      result += resultCharset.execToDesc(sos.codingSystemType == StorageObjectSpec::bctf
					 ? " BCTF="
					 : " ENCODING=");
      result += resultCharset.execToDesc(sos.codingSystemName);
    }
    Boolean needSmcrd = 0;
    if (sos.baseId.size() != 0) {
      result += resultCharset.execToDesc(" SOIBASE='");
      unparseSoi(sos.baseId,
		 sos.storageManager->idCharset(),
		 resultCharset,
		 result,
		 needSmcrd);
      result += resultCharset.execToDesc('\'');
    }
    StringC tem;
    unparseSoi(sos.specId,
	       sos.storageManager->idCharset(),
	       resultCharset,
	       tem,
	       needSmcrd);
    if (needSmcrd)
      result += resultCharset.execToDesc(" SMCRD='^'");
    result += resultCharset.execToDesc('>');
    result += tem;
  }
}

void unparseSoi(const StringC &soi,
		const CharsetInfo *idCharset,
		const CharsetInfo &resultCharset,
		StringC &result,
		Boolean &needSmcrd)
{
  if (!idCharset) {
    for (size_t i = 0; i < soi.size(); i++) {
      char buf[32];
      sprintf(buf, "&#%lu;", (unsigned long)soi[i]);
      result += resultCharset.execToDesc(buf);
    }
    return;
  }
  for (size_t i = 0; i < soi.size(); i++) {
    UnivChar univ;
    WideChar to;
    ISet<WideChar> toSet;
    if (!idCharset->descToUniv(soi[i], univ)
	|| univ >= 127
	|| univ < 32
	|| univ == 36		// $
	|| univ == 96		// `
#ifndef MSDOS_FILENAMES
	|| univ == 92		// backslash
#endif
	|| univ == 94		// ^
	|| resultCharset.univToDesc(univ, to, toSet) != 1) {
      needSmcrd = 1;
      char buf[32];
      sprintf(buf, "^%lu;", (unsigned long)soi[i]);
      result += resultCharset.execToDesc(buf);
    }
    else {
      switch (univ) {
      case 34:		// double quote
      case 35:		// #
      case 39:		// apostrophe
      case 60:		// <
	{
	  char buf[32];
	  sprintf(buf, "&#%lu;", (unsigned long)to);
	  result += resultCharset.execToDesc(buf);
	}
	break;
      default:
	result += Char(to);
	break;
      }
    }
  }
}

#ifdef SP_NAMESPACE
}
#endif