// See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // Common classes for Multi Pushdown Transducer (MPDT) expansion/traversal. #ifndef FST_EXTENSIONS_MPDT_MPDT_H_ #define FST_EXTENSIONS_MPDT_MPDT_H_ #include #include #include #include #include #include namespace fst { enum MPdtType { MPDT_READ_RESTRICT, // Can only read from first empty stack MPDT_WRITE_RESTRICT, // Can only write to first empty stack MPDT_NO_RESTRICT, // No read-write restrictions }; namespace internal { // PLEASE READ THIS CAREFULLY: // // When USEVECTOR is set, the stack configurations --- the statewise // representation of the StackId's for each substack --- is stored in a vector. // I would like to do this using an array for efficiency reasons, thus the // definition of StackConfig below. However, while this *works* in that tests // pass, etc. It causes a memory leak in the compose and expand tests, evidently // in the map[] that is being used to store the mapping between these // StackConfigs and the external StackId that the caller sees. There are no // memory leaks when I use a vector, only with this StackId. Why there should be // memory leaks given that I am not mallocing anything is a mystery. In case you // were wondering, clearing the map at the end does not help. template struct StackConfig { StackConfig() : array_() {} StackConfig(const StackConfig &config) { array_ = config.array_; } StackId &operator[](const int index) { return array_[index]; } const StackId &operator[](const int index) const { return array_[index]; } StackConfig &operator=(const StackConfig &config) { if (this == &config) return *this; array_ = config.array_; return *this; } std::array array_; }; template class CompConfig { public: using Config = StackConfig; bool operator()(const Config &x, const Config &y) const { for (Level level = 0; level < nlevels; ++level) { if (x.array_[level] < y.array_[level]) { return true; } else if (x.array_[level] > y.array_[level]) { return false; } } return false; } }; // Defines the KeyPair type used as the key to MPdtStack.paren_id_map_. The hash // function is provided as a separate struct to match templating syntax. template struct KeyPair { Level level; size_t underlying_id; KeyPair(Level level, size_t id) : level(level), underlying_id(id) {} inline bool operator==(const KeyPair &rhs) const { return level == rhs.level && underlying_id == rhs.underlying_id; } }; template struct KeyPairHasher { inline size_t operator()(const KeyPair &keypair) const { return std::hash()(keypair.level) ^ (std::hash()(keypair.underlying_id) << 1); } }; template class MPdtStack { public: using Label = Level; using Config = StackConfig; using ConfigToStackId = std::map>; MPdtStack(const std::vector> &parens, const std::vector &assignments); MPdtStack(const MPdtStack &mstack); ~MPdtStack() { for (Level level = 0; level < nlevels; ++level) delete stacks_[level]; } StackId Find(StackId stack_id, Label label); // For now we do not implement Pop since this is needed only for // ShortestPath(). // For Top we find the first non-empty config, and find the paren ID of that // (or -1) if there is none, then map that to the external stack_id to return. ssize_t Top(StackId stack_id) const { if (stack_id == -1) return -1; const auto config = InternalStackIds(stack_id); Level level = 0; StackId underlying_id = -1; for (; level < nlevels; ++level) { if (!Empty(config, level)) { underlying_id = stacks_[level]->Top(config[level]); break; } } if (underlying_id == -1) return -1; const auto it = paren_id_map_.find(KeyPair(level, underlying_id)); if (it == paren_id_map_.end()) return -1; // NB: shouldn't happen. return it->second; } ssize_t ParenId(Label label) const { const auto it = paren_map_.find(label); return it != paren_map_.end() ? it->second : -1; } // TODO(rws): For debugging purposes only: remove later. string PrintConfig(const Config &config) const { string result = "["; for (Level i = 0; i < nlevels; ++i) { char s[128]; snprintf(s, sizeof(s), "%d", config[i]); result += string(s); if (i < nlevels - 1) result += ", "; } result += "]"; return result; } bool Error() { return error_; } // Each component stack has an internal stack ID for a given configuration and // label. // This function maps a configuration of those to the stack ID the caller // sees. inline StackId ExternalStackId(const Config &config) { const auto it = config_to_stack_id_map_.find(config); StackId result; if (it == config_to_stack_id_map_.end()) { result = next_stack_id_++; config_to_stack_id_map_.insert( std::pair(config, result)); stack_id_to_config_map_[result] = config; } else { result = it->second; } return result; } // Retrieves the internal stack ID for a corresponding external stack ID. inline const Config InternalStackIds(StackId stack_id) const { auto it = stack_id_to_config_map_.find(stack_id); if (it == stack_id_to_config_map_.end()) { it = stack_id_to_config_map_.find(-1); } return it->second; } inline bool Empty(const Config &config, Level level) const { return config[level] <= 0; } inline bool AllEmpty(const Config &config) { for (Level level = 0; level < nlevels; ++level) { if (!Empty(config, level)) return false; } return true; } bool error_; Label min_paren_; Label max_paren_; // Stores level of each paren. std::unordered_map paren_levels_; std::vector> parens_; // As in pdt.h. std::unordered_map paren_map_; // As in pdt.h. // Maps between internal paren_id and external paren_id. std::unordered_map, size_t, KeyPairHasher> paren_id_map_; // Maps between internal stack ids and external stack id. ConfigToStackId config_to_stack_id_map_; std::unordered_map stack_id_to_config_map_; StackId next_stack_id_; // Array of stacks. PdtStack *stacks_[nlevels]; }; template MPdtStack::MPdtStack( const std::vector> &parens, // NB: Label = Level. const std::vector &assignments) : error_(false), min_paren_(kNoLabel), max_paren_(kNoLabel), parens_(parens), next_stack_id_(1) { using Label = Level; if (parens.size() != assignments.size()) { FSTERROR() << "MPdtStack: Parens of different size from assignments"; error_ = true; return; } std::vector> vectors[nlevels]; for (Level i = 0; i < assignments.size(); ++i) { // Assignments here start at 0, so assuming the human-readable version has // them starting at 1, we should subtract 1 here const auto level = assignments[i] - 1; if (level < 0 || level >= nlevels) { FSTERROR() << "MPdtStack: Specified level " << level << " out of bounds"; error_ = true; return; } const auto &pair = parens[i]; vectors[level].push_back(pair); paren_levels_[pair.first] = level; paren_levels_[pair.second] = level; paren_map_[pair.first] = i; paren_map_[pair.second] = i; const KeyPair key(level, vectors[level].size() - 1); paren_id_map_[key] = i; if (min_paren_ == kNoLabel || pair.first < min_paren_) { min_paren_ = pair.first; } if (pair.second < min_paren_) min_paren_ = pair.second; if (max_paren_ == kNoLabel || pair.first > max_paren_) { max_paren_ = pair.first; } if (pair.second > max_paren_) max_paren_ = pair.second; } using Config = StackConfig; Config neg_one; Config zero; for (Level level = 0; level < nlevels; ++level) { stacks_[level] = new PdtStack(vectors[level]); neg_one[level] = -1; zero[level] = 0; } config_to_stack_id_map_[neg_one] = -1; config_to_stack_id_map_[zero] = 0; stack_id_to_config_map_[-1] = neg_one; stack_id_to_config_map_[0] = zero; } template MPdtStack::MPdtStack( const MPdtStack &mstack) : error_(mstack.error_), min_paren_(mstack.min_paren_), max_paren_(mstack.max_paren_), parens_(mstack.parens_), next_stack_id_(mstack.next_stack_id_) { for (const auto &kv : mstack.paren_levels_) { paren_levels_[kv.first] = kv.second; } for (const auto &paren : mstack.parens_) parens_.push_back(paren); for (const auto &kv : mstack.paren_map_) { paren_map_[kv.first] = kv.second; } for (const auto &kv : mstack.paren_id_map_) { paren_id_map_[kv.first] = kv.second; } for (auto it = mstack.config_to_stack_id_map_.begin(); it != mstack.config_to_stack_id_map_.end(); ++it) { config_to_stack_id_map_[it->first] = it->second; } for (const auto &kv : mstack.stack_id_to_config_map_) { using Config = StackConfig; const Config config(kv.second); stack_id_to_config_map_[kv.first] = config; } for (Level level = 0; level < nlevels; ++level) stacks_[level] = mstack.stacks_[level]; } template StackId MPdtStack::Find(StackId stack_id, Level label) { // Non-paren. if (min_paren_ == kNoLabel || label < min_paren_ || label > max_paren_) { return stack_id; } const auto it = paren_map_.find(label); // Non-paren. if (it == paren_map_.end()) return stack_id; ssize_t paren_id = it->second; // Gets the configuration associated with this stack_id. const auto config = InternalStackIds(stack_id); // Gets the level. const auto level = paren_levels_.find(label)->second; // If the label is an open paren we push: // // 1) if the restrict type is not MPDT_WRITE_RESTRICT, or // 2) the restrict type is MPDT_WRITE_RESTRICT, and all the stacks above the // level are empty. if (label == parens_[paren_id].first) { // Open paren. if (restrict == MPDT_WRITE_RESTRICT) { for (Level upper_level = 0; upper_level < level; ++upper_level) { if (!Empty(config, upper_level)) return -1; // Non-empty stack blocks. } } // If the label is an close paren we pop: // // 1) if the restrict type is not MPDT_READ_RESTRICT, or // 2) the restrict type is MPDT_READ_RESTRICT, and all the stacks above the // level are empty. } else if (restrict == MPDT_READ_RESTRICT) { for (Level lower_level = 0; lower_level < level; ++lower_level) { if (!Empty(config, lower_level)) return -1; // Non-empty stack blocks. } } const auto nid = stacks_[level]->Find(config[level], label); // If the new ID is -1, that means that there is no valid transition at the // level we want. if (nid == -1) { return -1; } else { using Config = StackConfig; Config nconfig(config); nconfig[level] = nid; return ExternalStackId(nconfig); } } } // namespace internal } // namespace fst #endif // FST_EXTENSIONS_MPDT_MPDT_H_