// See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // Finite-State Transducer (FST) archive classes. #ifndef FST_EXTENSIONS_FAR_FAR_H_ #define FST_EXTENSIONS_FAR_FAR_H_ #include #include #include #include #include #include #include #include namespace fst { enum FarEntryType { FET_LINE, FET_FILE }; enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 }; inline bool IsFst(const string &filename) { std::ifstream strm(filename, std::ios_base::in | std::ios_base::binary); if (!strm) return false; return IsFstHeader(strm, filename); } // FST archive header class class FarHeader { public: const string &ArcType() const { return arctype_; } const string &FarType() const { return fartype_; } bool Read(const string &filename) { FstHeader fsthdr; if (filename.empty()) { // Header reading unsupported on stdin. Assumes STList and StdArc. fartype_ = "stlist"; arctype_ = "standard"; return true; } else if (IsSTTable(filename)) { // Checks if STTable. ReadSTTableHeader(filename, &fsthdr); fartype_ = "sttable"; arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); return true; } else if (IsSTList(filename)) { // Checks if STList. ReadSTListHeader(filename, &fsthdr); fartype_ = "stlist"; arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); return true; } else if (IsFst(filename)) { // Checks if FST. std::ifstream istrm(filename, std::ios_base::in | std::ios_base::binary); fsthdr.Read(istrm, filename); fartype_ = "fst"; arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); return true; } return false; } private: string fartype_; string arctype_; }; enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2, FAR_FST = 3, }; // This class creates an archive of FSTs. template class FarWriter { public: using Arc = A; // Creates a new (empty) FST archive; returns null on error. static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT); // Adds an FST to the end of an archive. Keys must be non-empty and // in lexicographic order. FSTs must have a suitable write method. virtual void Add(const string &key, const Fst &fst) = 0; virtual FarType Type() const = 0; virtual bool Error() const = 0; virtual ~FarWriter() {} protected: FarWriter() {} }; // This class iterates through an existing archive of FSTs. template class FarReader { public: using Arc = A; // Opens an existing FST archive in a single file; returns null on error. // Sets current position to the beginning of the achive. static FarReader *Open(const string &filename); // Opens an existing FST archive in multiple files; returns null on error. // Sets current position to the beginning of the achive. static FarReader *Open(const std::vector &filenames); // Resets current position to beginning of archive. virtual void Reset() = 0; // Sets current position to first entry >= key. Returns true if a match. virtual bool Find(const string &key) = 0; // Current position at end of archive? virtual bool Done() const = 0; // Move current position to next FST. virtual void Next() = 0; // Returns key at the current position. This reference is invalidated if // the current position in the archive is changed. virtual const string &GetKey() const = 0; // Returns pointer to FST at the current position. This is invalidated if // the current position in the archive is changed. virtual const Fst *GetFst() const = 0; virtual FarType Type() const = 0; virtual bool Error() const = 0; virtual ~FarReader() {} protected: FarReader() {} }; template class FstWriter { public: void operator()(std::ostream &strm, const Fst &fst) const { fst.Write(strm, FstWriteOptions()); } }; template class STTableFarWriter : public FarWriter { public: using Arc = A; static STTableFarWriter *Create(const string &filename) { auto *writer = STTableWriter, FstWriter>::Create(filename); return new STTableFarWriter(writer); } void Add(const string &key, const Fst &fst) final { writer_->Add(key, fst); } FarType Type() const final { return FAR_STTABLE; } bool Error() const final { return writer_->Error(); } private: explicit STTableFarWriter(STTableWriter, FstWriter> *writer) : writer_(writer) {} std::unique_ptr, FstWriter>> writer_; }; template class STListFarWriter : public FarWriter { public: using Arc = A; static STListFarWriter *Create(const string &filename) { auto *writer = STListWriter, FstWriter>::Create(filename); return new STListFarWriter(writer); } void Add(const string &key, const Fst &fst) final { writer_->Add(key, fst); } constexpr FarType Type() const final { return FAR_STLIST; } bool Error() const final { return writer_->Error(); } private: explicit STListFarWriter(STListWriter, FstWriter> *writer) : writer_(writer) {} std::unique_ptr, FstWriter>> writer_; }; template class FstFarWriter : public FarWriter { public: using Arc = A; explicit FstFarWriter(const string &filename) : filename_(filename), error_(false), written_(false) {} static FstFarWriter *Create(const string &filename) { return new FstFarWriter(filename); } void Add(const string &key, const Fst &fst) final { if (written_) { LOG(WARNING) << "FstFarWriter::Add: only one FST supported," << " subsequent entries discarded."; } else { error_ = !fst.Write(filename_); written_ = true; } } constexpr FarType Type() const final { return FAR_FST; } bool Error() const final { return error_; } ~FstFarWriter() final {} private: string filename_; bool error_; bool written_; }; template FarWriter *FarWriter::Create(const string &filename, FarType type) { switch (type) { case FAR_DEFAULT: if (filename.empty()) return STListFarWriter::Create(filename); case FAR_STTABLE: return STTableFarWriter::Create(filename); case FAR_STLIST: return STListFarWriter::Create(filename); case FAR_FST: return FstFarWriter::Create(filename); default: LOG(ERROR) << "FarWriter::Create: Unknown FAR type"; return nullptr; } } template class FstReader { public: Fst *operator()(std::istream &strm) const { return Fst::Read(strm, FstReadOptions()); } }; template class STTableFarReader : public FarReader { public: using Arc = A; static STTableFarReader *Open(const string &filename) { auto *reader = STTableReader, FstReader>::Open(filename); if (!reader || reader->Error()) return nullptr; return new STTableFarReader(reader); } static STTableFarReader *Open(const std::vector &filenames) { auto *reader = STTableReader, FstReader>::Open(filenames); if (!reader || reader->Error()) return nullptr; return new STTableFarReader(reader); } void Reset() final { reader_->Reset(); } bool Find(const string &key) final { return reader_->Find(key); } bool Done() const final { return reader_->Done(); } void Next() final { return reader_->Next(); } const string &GetKey() const final { return reader_->GetKey(); } const Fst *GetFst() const final { return reader_->GetEntry(); } constexpr FarType Type() const final { return FAR_STTABLE; } bool Error() const final { return reader_->Error(); } private: explicit STTableFarReader(STTableReader, FstReader> *reader) : reader_(reader) {} std::unique_ptr, FstReader>> reader_; }; template class STListFarReader : public FarReader { public: using Arc = A; static STListFarReader *Open(const string &filename) { auto *reader = STListReader, FstReader>::Open(filename); if (!reader || reader->Error()) return nullptr; return new STListFarReader(reader); } static STListFarReader *Open(const std::vector &filenames) { auto *reader = STListReader, FstReader>::Open(filenames); if (!reader || reader->Error()) return nullptr; return new STListFarReader(reader); } void Reset() final { reader_->Reset(); } bool Find(const string &key) final { return reader_->Find(key); } bool Done() const final { return reader_->Done(); } void Next() final { return reader_->Next(); } const string &GetKey() const final { return reader_->GetKey(); } const Fst *GetFst() const final { return reader_->GetEntry(); } constexpr FarType Type() const final { return FAR_STLIST; } bool Error() const final { return reader_->Error(); } private: explicit STListFarReader(STListReader, FstReader> *reader) : reader_(reader) {} std::unique_ptr, FstReader>> reader_; }; template class FstFarReader : public FarReader { public: using Arc = A; static FstFarReader *Open(const string &filename) { std::vector filenames; filenames.push_back(filename); return new FstFarReader(filenames); } static FstFarReader *Open(const std::vector &filenames) { return new FstFarReader(filenames); } explicit FstFarReader(const std::vector &filenames) : keys_(filenames), has_stdin_(false), pos_(0), error_(false) { std::sort(keys_.begin(), keys_.end()); streams_.resize(keys_.size(), 0); for (size_t i = 0; i < keys_.size(); ++i) { if (keys_[i].empty()) { if (!has_stdin_) { streams_[i] = &std::cin; has_stdin_ = true; } else { FSTERROR() << "FstFarReader::FstFarReader: standard input should " "only appear once in the input file list"; error_ = true; return; } } else { streams_[i] = new std::ifstream( keys_[i], std::ios_base::in | std::ios_base::binary); if (streams_[i]->fail()) { FSTERROR() << "FstFarReader::FstFarReader: Error reading file: " << filenames[i]; error_ = true; return; } } } if (pos_ >= keys_.size()) return; ReadFst(); } void Reset() final { if (has_stdin_) { FSTERROR() << "FstFarReader::Reset: Operation not supported on standard input"; error_ = true; return; } pos_ = 0; ReadFst(); } bool Find(const string &key) final { if (has_stdin_) { FSTERROR() << "FstFarReader::Find: Operation not supported on standard input"; error_ = true; return false; } pos_ = 0; // TODO ReadFst(); return true; } bool Done() const final { return error_ || pos_ >= keys_.size(); } void Next() final { ++pos_; ReadFst(); } const string &GetKey() const final { return keys_[pos_]; } const Fst *GetFst() const final { return fst_.get(); } constexpr FarType Type() const final { return FAR_FST; } bool Error() const final { return error_; } ~FstFarReader() final { for (size_t i = 0; i < keys_.size(); ++i) { if (streams_[i] != &std::cin) { delete streams_[i]; } } } private: void ReadFst() { fst_.reset(); if (pos_ >= keys_.size()) return; streams_[pos_]->seekg(0); fst_.reset(Fst::Read(*streams_[pos_], FstReadOptions())); if (!fst_) { FSTERROR() << "FstFarReader: Error reading Fst from: " << keys_[pos_]; error_ = true; } } std::vector keys_; std::vector streams_; bool has_stdin_; size_t pos_; mutable std::unique_ptr> fst_; mutable bool error_; }; template FarReader *FarReader::Open(const string &filename) { if (filename.empty()) return STListFarReader::Open(filename); else if (IsSTTable(filename)) return STTableFarReader::Open(filename); else if (IsSTList(filename)) return STListFarReader::Open(filename); else if (IsFst(filename)) return FstFarReader::Open(filename); return nullptr; } template FarReader *FarReader::Open(const std::vector &filenames) { if (!filenames.empty() && filenames[0].empty()) return STListFarReader::Open(filenames); else if (!filenames.empty() && IsSTTable(filenames[0])) return STTableFarReader::Open(filenames); else if (!filenames.empty() && IsSTList(filenames[0])) return STListFarReader::Open(filenames); else if (!filenames.empty() && IsFst(filenames[0])) return FstFarReader::Open(filenames); return nullptr; } } // namespace fst #endif // FST_EXTENSIONS_FAR_FAR_H_