Logo Search packages:      
Sourcecode: speech-tools version File versions  Download package

wfst_ops.cc

/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                         Copyright (c) 1997                            */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission is hereby granted, free of charge, to use and distribute  */
/*  this software and its documentation without restriction, including   */
/*  without limitation the rights to use, copy, modify, merge, publish,  */
/*  distribute, sublicense, and/or sell copies of this work, and to      */
/*  permit persons to whom this work is furnished to do so, subject to   */
/*  the following conditions:                                            */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*   4. The authors' names are not used to endorse or promote products   */
/*      derived from this software without specific prior written        */
/*      permission.                                                      */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                     Author :  Alan W Black                            */
/*                     Date   :  November 1997                           */
/*-----------------------------------------------------------------------*/
/*                                                                       */
/* Basic WFST operations: minimization, determinization, intersection,   */
/*   union, composition                                                  */
/*                                                                       */
/*=======================================================================*/
#include <iostream.h>
#include <stdlib.h>
#include "EST_WFST.h"
#include "wfst_aux.h"
#include "EST_String.h"
#include "EST_TList.h"
#include "EST_TKVL.h"
#include "EST_THash.h"

Declare_TList_T(EST_WFST_MultiState *,EST_WFST_MultiStateP)

  // Declare_KVL(int, EST_IList)

      typedef EST_TKVI<int, EST_IList> KVI_int_EST_IList_t; 
      typedef EST_TKVL<int, EST_IList> KVL_int_EST_IList_t; 
      
      static EST_IList int_EST_IList_kv_def_EST_IList_s; 
      static int int_EST_IList_kv_def_int_s; 
      
      EST_IList *EST_TKVL< int, EST_IList >::default_val=&int_EST_IList_kv_def_EST_IList_s; 
      int *EST_TKVL< int, EST_IList >::default_key=&int_EST_IList_kv_def_int_s; 
      
      Declare_TList_N(KVI_int_EST_IList_t, 0)


#if defined(INSTANTIATE_TEMPLATES)
#include "../base_class/EST_TList.cc"

  Instantiate_TList_T_MIN(EST_WFST_MultiState *,EST_WFST_MultiStateP)

#include "../base_class/EST_TKVL.cc"

        //  Instantiate_KVL(int, EST_IList)

        template class EST_TKVL<int, EST_IList>; 
        template class EST_TKVI<int, EST_IList>; 
// ostream &operator<<(ostream &s, EST_TKVI< int , EST_IList > const &i){  return s << i.k << "\t" << i.v << "\n"; } 
//    ostream& operator << (ostream& s,  EST_TKVL< int , EST_IList > const &l) {EST_Litem *p; for (p = l.list.head(); p ; p = next(p)) s << l.list(p).k << "\t" << l.list(p).v << endl; return s;} 
        Instantiate_TIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer_k, int, KVL_int_EST_IList_kitt) 
        Instantiate_TStructIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer, KVI_int_EST_IList_t, KVL_int_EST_IList_itt) 
        Instantiate_TIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer, KVI_int_EST_IList_t, KVL_int_EST_IList_itt) 

        // Instantiate_TList(KVI_int_EST_IList_t)

      template class EST_TList< TLIST_KVI_int_EST_IList_t_VAL >; 
      template class EST_TItem< TLIST_KVI_int_EST_IList_t_VAL >; 
      template const char *error_name(EST_TList< KVI_int_EST_IList_t > val); 

        Instantiate_TIterator_T( EST_TList<KVI_int_EST_IList_t>, EST_TList<KVI_int_EST_IList_t>::IPointer, KVI_int_EST_IList_t, TList_KVI_int_EST_IList_t_itt);


#endif

typedef EST_TList<EST_WFST_MultiState *> Agenda;

static enum wfst_state_type intersect_state_type(wfst_list &wl,
                                     EST_WFST_MultiState *ms);
static int check_distinguished(const EST_WFST &nmwfst,
                         int p, int q,
                         wfst_marks &marks,
                         wfst_assumes &assumptions);

void EST_WFST_MultiState::add(int i)
{
    // If of set type only add it if its not already there
    EST_Litem *p;

    if (p_type == wfst_ms_set)
      for (p=head(); p != 0; p=next(p))
      {
          if ((*this)(p) == i)
            return;
          else if (i < (*this)(p))  // keep the list ordered 
          {
            insert_before(p,i);
            return;
          }
      }

    append(i);
}

int multistate_index(EST_WFST_MultiStateIndex &index,
                 EST_WFST_MultiState *ms,int proposed) 
{
    // Returns proposed if ms is not already in index, otherwise
    // returns the value that was proposed when it was first new.

    // I'll have to make this more efficient in future.
    EST_String istring("");
    EST_Litem *p;
    int ns,found;

    for (p=ms->head(); p != 0; p = next(p))
      istring += itoString((*ms)(p)) + " ";

    ns = index.val(istring,found);
    if (found)
      return ns;
    else
    {
        index.add_item(istring,proposed);
      return proposed;
    }
}

static int pair_check(EST_THash<int,int> &pairs_done, int i, int o, int odim)
{
    int p;
    int found;

    p = (i*odim)+o;  // unique number representing i/o pair

    pairs_done.val(p,found);
    if (!found)
    {   // first time seeing this pair
      pairs_done.add_item(p,1);
      return 0;
    }
    return 1;

}

00164 void EST_WFST::determinize(const EST_WFST &ndwfst)
{
    // Determinise a non-deterministic WFST
    EST_WFST_MultiState *start_state,*nms,*current;
    int ns;
    Agenda multistate_agenda;
    EST_WFST_MultiStateIndex index(100);
    int i,o, new_name;
    int c=0;
    EST_Litem *sp, *tp;

    clear();
    p_in_symbols.copy(ndwfst.p_in_symbols);
    p_out_symbols.copy(ndwfst.p_out_symbols);

    // Create a starting state and add it to this WFST
    start_state = new EST_WFST_MultiState(wfst_ms_set);
    start_state->add(ndwfst.start_state());
    ndwfst.add_epsilon_reachable(start_state);
    
    p_start_state = add_state(ndwfst.ms_type(start_state));
    start_state->set_name(p_start_state);

    multistate_agenda.append(start_state);  // initialize agenda

    while (multistate_agenda.length() > 0)
    {
      EST_THash<int,int> pairs_done(100);
      current = multistate_agenda.first();
      multistate_agenda.remove(multistate_agenda.head());
      if ((c % 100) == 99)
          cout << "Determinizing " << summary() << " Agenda " 
            << multistate_agenda.length() << endl;
      c++;

      for (sp=current->head(); sp != 0; sp=next(sp))
      {
          const EST_WFST_State *s = ndwfst.state((*current)(sp));
          for (tp=s->transitions.head(); tp != 0; tp = next(tp))
          {
            i = s->transitions(tp)->in_symbol();
            o = s->transitions(tp)->out_symbol();
            // Need to check if i/o has already been proposed
            if (pair_check(pairs_done,i,o,p_out_symbols.length()) == 1)
                continue;  // already prosed those
//    for (i=0; i < p_in_symbols.length(); i++)
//    {   // start at 2 to skip any and epsilon characters -- hmm bad
//        for (o=0; o < p_out_symbols.length(); o++)
//        {
            if ((i==o) && (i==0)) 
                continue;  // don't deal here with epsilon transitions
            nms = apply_multistate(ndwfst,current,i,o);
            if ((nms->length() == 0) ||
                (ndwfst.ms_type(nms) == wfst_error))
            {
                delete nms;
                continue;    // no state to go to
            }
            new_name = multistate_index(index,nms,p_num_states);
            if (new_name == p_num_states)  // genuinely new
            {   // create a real state and add it to the agenda
                ns = add_state(ndwfst.ms_type(nms));
                nms->set_name(ns);
                multistate_agenda.append(nms);  
            }
            else
            {
                nms->set_name(new_name);
                delete nms;
            }

            // Add new transition to current state
            p_states[current->name()]
                ->add_transition(nms->weight(),
                             nms->name(),
                             i,o);
            
          }
      }
      delete current;

      // Probably want some progress summary
    }

}

00250 EST_WFST_MultiState *EST_WFST::apply_multistate(const EST_WFST &wfst,
                                    EST_WFST_MultiState *ms,
                                    int in, int out) const
{
    // Apply in and out to ms and find all new states it becomes
    EST_Litem *p;
    EST_WFST_MultiState *new_ms = new EST_WFST_MultiState(wfst_ms_set);
    
    new_ms->clear();
    
    for (p=ms->head(); p != 0; p=next(p))
      // Add all new possible states from ms(p) given in/out
      wfst.transition_all((*ms)(p),in,out,new_ms);

    // Add epsilon reachable states from any states in multistates
    wfst.add_epsilon_reachable(new_ms);

    return new_ms;

}

00271 enum wfst_state_type EST_WFST::ms_type(EST_WFST_MultiState *ms) const
{
    // Returns wfst_error if ms contains an error state, wfst_final
    // if there is at least one final and wfst_non_final
    EST_Litem *p;
    enum wfst_state_type r = wfst_nonfinal;

    for (p=ms->head(); p != 0; p = next(p))
      if (p_states((*ms)(p))->type() == wfst_error)
          return wfst_error;
      else if (p_states((*ms)(p))->type() == wfst_licence)
          // wfst_licence states are generated in KK compilation
          r = wfst_licence;
      else if ((p_states((*ms)(p))->type() == wfst_final) &&
             (r != wfst_licence))
          r = wfst_final;

    if (r == wfst_licence)
      return wfst_nonfinal;
    else 
      return r;
}

00294 void EST_WFST::transition_all(int state,
                        int in,
                        int out,
                        EST_WFST_MultiState *ms) const
{
    // Find all possible new states from state given in/out
    const EST_WFST_State *s = p_states(state);
    EST_Litem *i;

    for (i=s->transitions.head(); i != 0; i=next(i))
    {
      if ((in == s->transitions(i)->in_symbol()) &&
          (out == s->transitions(i)->out_symbol()))
          ms->add(s->transitions(i)->state());
    }
}

static int is_a_member(const EST_IList &ii, int i)
{
    for (EST_Litem *p=ii.head(); p != 0; p=next(p))
      if (ii(p) == i)
          return TRUE;
    return FALSE;
}

00319 void EST_WFST::add_epsilon_reachable(EST_WFST_MultiState *ms) const
{
    // As ms->add() adds in order we need to copy to a new list and append
    // to it any new eplison accessible states
    EST_Litem *p;
    EST_IList ii;
    int ie = p_in_symbols.name(get_c_string(epsilon_label()));
    int oe = p_out_symbols.name(get_c_string(epsilon_label()));

    for (p=ms->head(); p != 0; p=next(p))
      ii.append((*ms)(p));

    for (p=ii.head(); p != 0; p=next(p))
    {
      const EST_WFST_State *s = p_states(ii(p));
      EST_Litem *i;

      for (i=s->transitions.head(); i != 0; i=next(i))
      {
          if ((ie == s->transitions(i)->in_symbol()) &&
            (oe == s->transitions(i)->out_symbol()))
          {
            // Add to end of ii if not already there
            int nstate = s->transitions(i)->state();
            if (!is_a_member(ii,nstate));
            {
                ii.append(nstate);
                ms->add(nstate); // gets added in order
            }
          }
      }
    }
}

/************************************************************************/
/*  Intersection of a list of transducers, i.e. what is accepted by all */
/************************************************************************/
void EST_WFST::intersection(wfst_list &wl)
{
    // This is very similar to determinisation, similar complexity too
    EST_WFST_MultiState *start_state = new EST_WFST_MultiState(wfst_ms_list);
    EST_WFST_MultiState *nms,*current;
    int ns;
    Agenda multistate_agenda;
    EST_WFST_MultiStateIndex index(100);
    int i,o, new_name, n;
    EST_Litem *p,*q;
    int c=0;

    // Initialize this WFST from the given ones
    clear();
    p_in_symbols.copy(wl.first().p_in_symbols);
    p_out_symbols.copy(wl.first().p_out_symbols);

    // Determinize each input WFST and make a new start state consisting of 
    // the start states of each of the input WFSTs
    for (p=wl.tail(); p != 0; p=prev(p))
    {
      if (!wl(p).deterministic())
      {
          cout << "...intersection deterministing" << endl;
          EST_WFST tt = wl(p);
          wl(p).determinize(tt);
      }
      start_state->add(wl(p).start_state());
    }
    
    p_start_state = add_state(intersect_state_type(wl,start_state));
    // Label multistate start start with single-state name
    start_state->set_name(p_start_state);

    multistate_agenda.append(start_state);  // initialize agenda

    while (multistate_agenda.length() > 0)
    {
      current = multistate_agenda.first();
      multistate_agenda.remove(multistate_agenda.head());
      if ((c % 100) == 99)
          cout << "Intersection " << summary() << " Agenda " 
            << multistate_agenda.length() << endl;
      c++;

      // For all possible in/out pairs
      for (i=0; i < p_in_symbols.length(); i++)
      {   // start at 2 to skip any and epsilon characters -- hmm bad
          for (o=0; o < p_out_symbols.length(); o++)
          {
            if ((i==o) && (i==0)) // shouldn't be epsilon/epsilon here 
                continue;
            nms = new EST_WFST_MultiState(wfst_ms_list);
            // Increment multistate to new mulitstate for each individual
            // state using each WFST
            for (n=0,p=wl.head(),q=current->head(); 
                 (p != 0) && (q != 0);
                 p=next(p),q=next(q),n++)
                nms->add(wl(p).transition((*current)(q),i,o));
            if (intersect_state_type(wl,nms) == wfst_error)
            {
                delete nms;
                continue;    // no state to go to
            }
            new_name = multistate_index(index,nms,p_num_states);
            if (new_name == p_num_states)  // genuinely new and unseen
            {   // create a real state and add it to the agenda
                ns = add_state(intersect_state_type(wl,nms));
                nms->set_name(ns);
                multistate_agenda.append(nms);  
            }
            else  // already seen this state, and is already named
            {
                nms->set_name(new_name);
                delete nms;
            }

            // Add new transition to current state
            p_states[current->name()]
                ->add_transition(nms->weight(),nms->name(),i,o);
          }
      }
      delete current;
      // Probably want some progress summary
    }

}

static enum wfst_state_type intersect_state_type(wfst_list &wl,
                                     EST_WFST_MultiState *ms)
{
    // Find the state type of the combined states
    // If any are error, return error, if one is nonfinal return nonfinal
    // otherwise its final
    EST_Litem *p,*q;
    enum wfst_state_type r = wfst_final;

    for (p=wl.head(),q=ms->head(); 
       (p != 0) && (q != 0); 
       p=next(p),q=next(q))
    {
      if ((*ms)(q) == WFST_ERROR_STATE)
          return wfst_error;

      enum wfst_state_type dd = wl(p).state((*ms)(q))->type();

      if (dd == wfst_error)
          return wfst_error;
      else if (dd == wfst_nonfinal)
          r = wfst_nonfinal;
    }
    return r;
}

00470 void EST_WFST::intersection(const EST_WFST &a, const EST_WFST &b)
{
    // Intersect two WFSTs
    wfst_list wl;

    wl.append(a);
    wl.append(b);
    
    intersection(wl);
}

/*******************************************************************/
/*  Minimization is of complexity of O(n^2)                        */
/*******************************************************************/
00484 void EST_WFST::minimize(const EST_WFST &nmwfst)
{
    // Minimize a WFST
    int p,q;
    wfst_marks marks(nmwfst.num_states());
    wfst_assumes assumptions;

    //  For each combination of states
    for (p=0; p < nmwfst.num_states()-1; p++)
      for (q=p+1; q < nmwfst.num_states(); q++)
          check_distinguished(nmwfst,p,q,marks,assumptions);

    // The marked array now has all different and lists to equivalent states
    // Build an array mapping old name to new name.
    int num_new_states;
    int i;
    EST_IVector state_map;

    marks.find_state_map(state_map,num_new_states);

    // Build the new minimized WFST mapping existing transitions
    clear();
    p_in_symbols.copy(nmwfst.p_in_symbols);
    p_out_symbols.copy(nmwfst.p_out_symbols);

    init(num_new_states);
    p_start_state = state_map(nmwfst.p_start_state);

    for (i=0; i < nmwfst.num_states(); i++)
    {
      if (p_states[state_map(i)] == 0)
          p_states[state_map(i)] = 
            copy_and_map_states(state_map,nmwfst.state(i),nmwfst);
    }

}

static int check_distinguished(const EST_WFST &nmwfst,
                         int p, int q,
                         wfst_marks &marks,
                         wfst_assumes &assumptions)
{
    // Check to see if these two are equivalent
    EST_Litem *t;
    EST_IList from_p,from_q;
    
    if (marks.distinguished(p,q))   // been here, done that
      return TRUE;
    else if (marks.undistinguished(p,q)) // been here too
      return FALSE;
    // Not been here yet so do some work to try to find out if
    // these states can be distinguished
    else if ((nmwfst.state(p)->type() != nmwfst.state(q)->type()) ||
           (nmwfst.state(p)->num_transitions() != 
            nmwfst.state(q)->num_transitions()))
    { // Different final/non-final type or different number
      // of transitions so obviously different states
      marks.distinguish(p,q);
      return TRUE;
    }
    else 
    {   // Have to check their transitions individually
      for (t=nmwfst.state(p)->transitions.head(); t != 0; t=next(t))
      {
          int in = nmwfst.state(p)->transitions(t)->in_symbol();
          int out = nmwfst.state(p)->transitions(t)->out_symbol();
          int y = nmwfst.state(p)->transitions(t)->state();
          int z = nmwfst.transition(q,in,out);
          if ((z == WFST_ERROR_STATE) || 
            (marks.distinguished(y,z)))
          {   // no equiv transition or obviously different states
            marks.distinguish(p,q);
            return TRUE;
          }
          else if (equivalent_to(y,z,assumptions))
            continue;
          else   // Potential equivalence, record y,z for later
          {
            from_p.append(y);
            from_q.append(z);
          }
      }
      // All transitions had potential match so only now
      // actually check their follow sets
      EST_Litem *yp, *zp;
      int tl = FALSE;
      if (assumptions.length() == 0)
          tl = TRUE;
      // assume they are undistinguished
      add_assumption(p,q,assumptions);
      for (yp=from_p.head(),zp=from_q.head(); 
           yp != 0; yp=next(yp),zp=next(zp))
          if (check_distinguished(nmwfst,from_p(yp),from_q(zp),
                            marks,
                            assumptions))
          {
            marks.distinguish(p,q);  // set the distinguished
            assumptions.clear();
            return TRUE;
          }
      // ok I give up, they are the same
      if (tl)
      {   // This is equivalent given the assumptions (and no
          // higher level assumptions) so we can mark these states
          // as undistinguished (and all the assumptions)
          mark_undistinguished(marks,assumptions);
          assumptions.clear();
      }
      return FALSE;
    }
}

void EST_WFST::extend_alphabets(const EST_WFST &b)
{
    // Extend current in/out alphabets to accomodate anthing in b's
    // that are not in a's
    // This guarantees that the number in this will still be valid
    EST_StrList ivocab, ovocab;
    int i;
    
    for (i=0; i<p_in_symbols.length(); i++)
      ivocab.append(in_symbol(i));
    for (i=0; i<b.p_in_symbols.length(); i++)
      if (!strlist_member(ivocab,b.in_symbol(i)))
          ivocab.append(b.in_symbol(i));
    for (i=0; i<p_out_symbols.length(); i++)
      ovocab.append(out_symbol(i));
    for (i=0; i<b.p_out_symbols.length(); i++)
      if (!strlist_member(ovocab,b.out_symbol(i)))
          ovocab.append(b.out_symbol(i));

    p_in_symbols.init(ivocab);
    p_out_symbols.init(ovocab);
}

EST_WFST_State *EST_WFST::copy_and_map_states(const EST_IVector &state_map,
                                    const EST_WFST_State *s,
                                    const EST_WFST &b) const
{
    // Copy s into new state mapping new states to those in state map
    EST_WFST_State *ns = new EST_WFST_State(state_map(s->name()));
    EST_Litem *i;

    ns->set_type(s->type());

    for (i=s->transitions.head(); i != 0; i=next(i))
    {
      int mapped_state= state_map(s->transitions(i)->state());
      if (mapped_state != WFST_ERROR_STATE)
          ns->add_transition(s->transitions(i)->weight(),
                mapped_state,
                in_symbol(b.in_symbol(s->transitions(i)->in_symbol())),
                out_symbol(b.out_symbol(s->transitions(i)->out_symbol())));
    }

    return ns;
}

/***********************************************************************/
/* Build a WFST which doesn't accept any of the strings that a accepts */
/* This keeps the same in/out alphabet                                 */
/***********************************************************************/
00646 void EST_WFST::complement(const EST_WFST &a)
{
    int i;

    copy(a);  

    for (i=0; i < num_states(); i++)
    {
      if (p_states[i]->type() == wfst_final)
          p_states[i]->set_type(wfst_nonfinal);
      else if (p_states[i]->type() == wfst_nonfinal)
          p_states[i]->set_type(wfst_final);
      // errors remain errors
    }
}

static int noloopstostart(const EST_WFST &a)
{
    // TRUE if there are no transitions leading to the start state
    // when this is true there is a union operation which preserves
    // deterministicness
    int i;

    for (i=0; i < a.num_states(); i++)
    {
      const EST_WFST_State *s = a.state(i);
      EST_Litem *p;
      for (p=s->transitions.head(); p != 0; p=next(p))
      {
          if (s->transitions(p)->state() == a.start_state())
            return FALSE;
      }
    }
    return TRUE;
}

int EST_WFST::deterministiconstartstates(const EST_WFST &a, const EST_WFST &b) const
{
    // TRUE if there are no common transition labels from a and b's
    // start state
    EST_IMatrix tab;
    int in,out;

    tab.resize(a.p_in_symbols.length(),a.p_out_symbols.length());
    tab.fill(0);

    for (EST_Litem *t=a.state(a.start_state())->transitions.head();
       t != 0; t=next(t))
    {
      tab(a.state(a.start_state())->transitions(t)->in_symbol(),
          a.state(a.start_state())->transitions(t)->out_symbol()) = 1;
    }

    for (EST_Litem *tt=b.state(b.start_state())->transitions.head();
       tt != 0; tt=next(tt))
    {
      in = a.in_symbol(b.in_symbol(b.state(b.start_state())->transitions(tt)->in_symbol()));
      out = a.out_symbol(b.out_symbol(b.state(b.start_state())->transitions(tt)->out_symbol()));
      if (in == -1) 
          continue;  // obviously not a clash
      else if (out == -1) 
          continue;  // obviously not a clash
      else if (tab(in,out) == 1)
          return FALSE;
    }
    return TRUE;
}

/***********************************************************************/
/* Build a WFST which accepts both strings of a and of b               */
/***********************************************************************/
00717 void EST_WFST::uunion(const EST_WFST &a,const EST_WFST &b)
{
    EST_IVector smap;
    int i;

    copy(a);
    extend_alphabets(b);

    if (a.deterministic() && b.deterministic() &&
      noloopstostart(a) && noloopstostart(b) &&
      deterministiconstartstates(a,b))
    {
      // This does the union without the epsilon and will preserve
      // determistic wfsts in this special case
      smap.resize(b.num_states());
      smap[0] = start_state();
      for (i=1; i < b.num_states(); ++i)
          smap[i] = i+a.num_states()-1;

      more_states(a.num_states()+b.num_states()-1);
      p_num_states += b.num_states()-1;
      for (i=1; i < b.num_states(); i++)
          p_states[smap(i)] = copy_and_map_states(smap,b.state(i),b);

      const EST_WFST_State *s = b.state(b.start_state());
      EST_Litem *p;
      for (p=s->transitions.head(); p != 0; p=next(p))
      {
          int mapped_state= smap(s->transitions(p)->state());
          if (mapped_state != WFST_ERROR_STATE)
            p_states[start_state()]->
             add_transition(s->transitions(p)->weight(),
               mapped_state,
               in_symbol(b.in_symbol(s->transitions(p)->in_symbol())),
               out_symbol(b.out_symbol(s->transitions(p)->out_symbol())));
      }
    }
    else
    {   // do it the hard way
      smap.resize(b.num_states());
      for (i=0; i < b.num_states(); ++i)
          smap[i] = i+a.num_states();

      more_states(a.num_states()+b.num_states());
      p_num_states += b.num_states();
      for (i=0; i < b.num_states(); i++)
          p_states[smap(i)] = copy_and_map_states(smap,b.state(i),b);

      // Actually do the union bit by adding an epsilon transition from
      // the start of a to start state of b
      p_states[start_state()]->add_transition(0.0,smap[b.start_state()],
                                    in_epsilon(), out_epsilon());
    }

}

/***********************************************************************/
/* Build a WFST which a followed by b                                  */
/***********************************************************************/
00776 void EST_WFST::concat(const EST_WFST &a,const EST_WFST &b)
{
    EST_IVector smap;
    int i;

    copy(a);  
    extend_alphabets(b);

    smap.resize(b.num_states());
    for (i=0; i < b.num_states(); ++i)
      smap[i] = i+a.num_states();

    more_states(a.num_states()+b.num_states());

    // everything final in a becomes non final and an epsilon transition
    // goes from them to the start of b
    for (i=0; i < num_states(); i++)
    {
      if (p_states[i]->type() == wfst_final)
      {
          p_states[i]->set_type(wfst_nonfinal);
          p_states[i]->add_transition(0.0,smap[b.start_state()],
                              in_epsilon(), out_epsilon());
      }
    }

    p_num_states += b.num_states();
    for (i=0; i < b.num_states(); i++)
      p_states[smap(i)] = copy_and_map_states(smap,b.state(i),b);

}

/***********************************************************************/
/* Build a WFST from composing a and b (feeding output of a to         */
/* input of b)                                                         */
/***********************************************************************/
00812 void EST_WFST::compose(const EST_WFST &a,const EST_WFST &b)
{
    EST_WFST_MultiState *start_state = new EST_WFST_MultiState(wfst_ms_list);
    EST_WFST_MultiState *nms,*current;
    Agenda multistate_agenda;
    EST_WFST_MultiStateIndex index(100);
    wfst_list wl;
    EST_WFST t;
    int i,new_name;
    EST_Litem *p,*q;

    clear();
    p_in_symbols.copy(a.p_in_symbols);
    p_out_symbols.copy(b.p_out_symbols);

    // Unfortunately need to needlessly copy a and b here
    wl.append(a);
    start_state->add(a.start_state());
    wl.append(b);
    start_state->add(b.start_state());

    p_start_state = add_state(intersect_state_type(wl,start_state));
    // Label multistate start start with single-state name
    start_state->set_name(p_start_state);

    multistate_agenda.append(start_state);  // initialize agenda

    while (multistate_agenda.length() > 0)
    {
      current = multistate_agenda.first();
      multistate_agenda.remove(multistate_agenda.head());

      // For all possible in/out pairs
      for (i=0; i < p_in_symbols.length(); i++)
      {   // start at 2 to skip any and epsilon characters -- hmm bad
          // find transitions
          wfst_translist transa;

          wl.first().transduce(current->first(),i,transa);
          for (p=transa.head(); p != 0; p=next(p))
          {
            wfst_translist transb;
            // feed a's out to b'is in
            wl.last().transduce(
                          current->last(),
                    b.in_symbol(a.out_symbol(transa(p)->out_symbol())),
                    transb);
            for (q = transb.head(); q != 0; q=next(q))
            {
                nms = new EST_WFST_MultiState(wfst_ms_list);
                nms->add(transa(p)->state());
                nms->add(transb(q)->state());
                
                if (intersect_state_type(wl,nms) == wfst_error)
                {
                  delete nms;
                  continue;    // no state to go to
                }
                new_name = multistate_index(index,nms,p_num_states);
                if (new_name == p_num_states)  // genuinely new and unseen
                {   // create a real state and add it to the agenda
                  int ns = add_state(intersect_state_type(wl,nms));
                  nms->set_name(ns);
                  multistate_agenda.append(nms);  
                }
                else  // already seen this state, and is already named
                  nms->set_name(new_name);

                // Add new transition to current state
                p_states[current->name()]
                  ->add_transition(nms->weight(),nms->name(),
                               i,transb(q)->out_symbol());
                
            }
            
          }
      }
      delete current;
      // Probably want some progress summary
    }

}

/***********************************************************************/
/* Build a WFST which accepts strings in a but not in b                */
/***********************************************************************/
00898 void EST_WFST::difference(const EST_WFST &a,const EST_WFST &b)
{
    EST_WFST compb;

    // This is sort of a complement, but not quite
    // But what would the name of this operation be?
    compb.copy(b);
    for (int i=0; i < compb.num_states(); i++)
    {
      if (compb.p_states[i]->type() == wfst_final)
          compb.p_states[i]->set_type(wfst_error);
    }

    uunion(a,compb);
}

/***********************************************************************/
/* Remove states from which a final state can't be reached             */
/***********************************************************************/
00917 void EST_WFST::remove_error_states(const EST_WFST &a)
{
    // Find all states which are reachable from the start state, and
    // can reach a final state.  Mark all others as error states
    wfst_list wl;

    wl.append(a);
    EST_WFST &ab = wl.first();

    ab.current_tag = ++traverse_tag;
    for (int i=0; i < ab.p_num_states; i++)
      ab.can_reach_final(i);
    // This will copy only the non-error states
    intersection(wl);

}

int EST_WFST::can_reach_final(int state)
{
    // Return TRUE iff this state is final or can reach a final state

    if (p_states[state]->type() == wfst_final)
      return TRUE;
    else if (p_states[state]->type() == wfst_error)
      return FALSE;
    else if (p_states[state]->tag() == current_tag)  
      // Been here and it is reachable
      return TRUE;
    else
    {
      EST_Litem *i;
      enum wfst_state_type current_val = p_states[state]->type();
      enum wfst_state_type r = wfst_error;
      // temporarily set this to error to stop infinite recursion
      p_states[state]->set_type(wfst_error);

      for (i=p_states[state]->transitions.head(); i != 0; i=next(i))
          // if any transition goes to something that reaches a final state
          // set the value back to its original
          if (can_reach_final(p_states[state]->transitions(i)->state()))
            r = current_val;

      // Will be set back to original value iff a final state
      // is reachable from here
      p_states[state]->set_type(r);
      if (r == wfst_error)
          return FALSE;
      else
      {
          p_states[state]->set_tag(current_tag);
          return TRUE;
      }
    }
}

/***********************************************************************/
/* True is wfst is deterministic                                       */
/***********************************************************************/
00975 int EST_WFST::deterministic() const
{
    // True if all states contains no multiple arcs with the same symbol
    EST_IMatrix tab;

    tab.resize(p_in_symbols.length(),p_out_symbols.length());

    for (int i=0; i < p_num_states; i++)
    {
      tab.fill(0);
      for (EST_Litem *t=state(i)->transitions.head(); t != 0; t=next(t))
      {
          if (tab(state(i)->transitions(t)->in_symbol(),
                state(i)->transitions(t)->out_symbol()) == 1)
            return FALSE;
          else
            tab(state(i)->transitions(t)->in_symbol(),
                state(i)->transitions(t)->out_symbol()) = 1;
      }
    }
    return TRUE;
}

Generated by  Doxygen 1.6.0   Back to index