Logo Search packages:      
Sourcecode: speech-tools version File versions  Download package

relation_io.cc

/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                      Copyright (c) 1995,1996                          */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission is hereby granted, free of charge, to use and distribute  */
/*  this software and its documentation without restriction, including   */
/*  without limitation the rights to use, copy, modify, merge, publish,  */
/*  distribute, sublicense, and/or sell copies of this work, and to      */
/*  permit persons to whom this work is furnished to do so, subject to   */
/*  the following conditions:                                            */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*   4. The authors' names are not used to endorse or promote products   */
/*      derived from this software without specific prior written        */
/*      permission.                                                      */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                      Author :  Paul Taylor updated by awb             */
/*                      Date   :  Feb 1999                               */
/*-----------------------------------------------------------------------*/
/*                      Relation class file i/o, label files             */
/*                                                                       */
/*=======================================================================*/
#include <stdlib.h>
#include <stdio.h>
#include <fstream.h>
#include "EST_unix.h"
#include "EST_types.h"
#include "ling_class/EST_Relation.h"
#include "EST_string_aux.h"
#include "EST_cutils.h"
#include "EST_TList.h"
#include "EST_Option.h"
#include "relation_io.h"

#define DEF_SAMPLE_RATE 16000
#define HTK_UNITS_PER_SECOND 10000000

static EST_Regex RXleadingwhitespace("^[ \t\n\r][ \t\n\r]*.*$");

EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s, 
                           int sample);

EST_read_status load_esps_label(EST_TokenStream &ts,EST_Relation &rel)
{
    ts.set_SingleCharSymbols(";");
    ts.set_quotes('"','\\');
    EST_String key, val;

    // Skip the header
    while (!ts.eof())
    {
      key = ts.get().string();
        if (key == "#")
            break;

      val = ts.get_upto_eoln().string();
      // delete leading whitespace
      if (val.matches(RXleadingwhitespace))
          val = val.after(RXwhite);
      rel.f.set(key, val);
    }
          
    if (ts.peek() == "") return format_ok;

    while (!ts.eof())
    {
      EST_Item *si = rel.append();
      EST_String name;
      
      si->set("end",(float)atof(ts.get().string()));
      ts.get();  // skip the color;

      for (name = ""; (!ts.eoln()) && (ts.peek() != ";"); )
      {
          EST_Token &t = ts.get();
          if (name.length() > 0)    // preserve internal whitespace
            name += t.whitespace();  
          name += t.string();
      }
      si->set_name(name);
      
      if (ts.peek().string() == ";") // absorb separator
      {
          ts.get();
          si->features().load(ts);
      }
    }
    return format_ok;
}

EST_write_status save_esps_label(const EST_String &filename, 
                         const EST_Relation &s,
                         bool evaluate_ff)
{
    ostream *outf;
    if (filename == "-")
      outf = &cout;
    else
      outf = new ofstream(filename);
    
    if (!(*outf))  
    {
      cerr << "save_esps_label: can't open label output file \"" << 
          filename << "\"" << endl;
      return write_fail;
    }

    EST_write_status st=save_esps_label(outf, s, evaluate_ff);
  
    if (outf != &cout)
      delete outf;

    return st;
}

EST_write_status save_esps_label(ostream *outf,
                         const EST_Relation &s,
                         bool evaluate_ff)
{
    EST_Item *ptr;
    
    *outf << "separator ;\n";
    if (!s.f.present("nfields"))
      *outf << "nfields 1\n";

    EST_Features::Entries p;
    for (p.begin(s.f); p; ++p)
      *outf << p->k << " " << p->v << endl;

    *outf << "#\n";
/*    if (f("timing_style") == "event")
        *outf << "timing_style event\n";
    else if (f("timing_style") == "unit")
        *outf << "timing_style unit\n";
*/
    
    for (ptr = s.head(); ptr != 0; ptr = next(ptr))
    {
      *outf << "\t";
      outf->precision(5);
      outf->setf(ios::fixed, ios::floatfield);
      outf->width(8);
      //    outf->fill('0');
      if (s.f("timing_style","0") == "event")
          *outf << ptr->F("time",0);
      else
          *outf << ptr->F("end",0);
      
      *outf << " 26 \t" << ptr->S("name","0");

      EST_Features f2;
      f2 = ptr->features();
      f2.remove("name");
      f2.remove("end");
      if (evaluate_ff)
          evaluate(ptr,f2);

      if (f2.length() > 0)
      {
          *outf << " ; ";
          f2.save(*outf);
      }
      *outf << endl;
    }
    
    return write_ok;
}

EST_read_status load_ogi_label(EST_TokenStream &ts, EST_Relation &s)
{
    // This function reads OGI style label files. The start, end
    // time and names of the labels are mandatory. 
    EST_String key, val;
    float sr;
    int isr;
    
    // set up the character constant values for this stream
    ts.set_SingleCharSymbols(";");

    // Skip over header

    while(!ts.eof())
      {
      if ((ts.peek().col() == 0) && (ts.peek() == "END"))
        {
          if (ts.peek() == "END")
            { // read rest of header
            ts.get();
            ts.get();
            ts.get();
            }
          break;
        }
      key = ts.get().string();
      val = ts.get().string();
      }

    sr = 1000.0 / atof(val);
    isr = (int)sr;
    
    if (ts.eof())
    {
      cerr << "Error: couldn't find header in label file " 
           << ts.filename() << endl;
      return wrong_format;
    }

    if (read_label_portion(ts, s, isr) == misc_read_error)
    {
      cerr << "error: in label file " << ts.filename() << " at line " <<
          ts.linenum() << endl;
      return misc_read_error;
    }
    return format_ok;
}

EST_read_status load_words_label(EST_TokenStream &ts, EST_Relation &s)
{
    // This function reads label files in the form of simple word strings 
    // with no timing information.
    EST_Item *item;

    while (!ts.eof())
    {
      item = s.append();
      item->set("name",(EST_String)ts.get());
      item->set("end",0.0);
    }

    return format_ok;
}

static float convert_long_num_string_to_time(const char *s,int sample)
{
    // For those label files that think 100 nanosecond times are cool
    // we have to proveide a special function to convert them as 
    // this quickly gets beyond the capabilities of ints.

    if (strlen(s) < 15)
      return atof(s)/sample;
    else
    {
      double a = 0,d;
      int i=0;
      for (i=0; 
           (strchr(" \n\r\t",s[i]) != NULL) && (s[i] != '\0');
           i++);

      for ( ;
            (s[i] != '\0') && (s[i] >= '0') && (s[i] <= '9');
            i++)
      {
          a = a*10;
          d = s[i]-'0';
          a += (d/(double)sample);
      }
      return a;
    }
}

EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s, 
                           int sample)
{
    EST_Item *item;
    float hstart, hend;
    EST_String str;
    
    while(!ts.eof())
    {
      str = ts.get().string();
      if (str == ".")
          return format_ok;
      
      item = s.append();
      
      hstart = convert_long_num_string_to_time(str,sample);
      str = ts.get().string();
      hend = convert_long_num_string_to_time(str,sample);
      
      item->set("end",hend);                   // time 
      item->set("name",ts.get().string());       // name
      
      if (!ts.eoln())
          item->set("rest_lab",ts.get_upto_eoln().string());
    }
    
    return format_ok;
}    

EST_read_status load_sample_label(EST_TokenStream &ts,
                          EST_Relation &s, int sample)
{
    
    if (sample == 0)    // maybe this should be an error
      sample = DEF_SAMPLE_RATE;
    
    // set up the character constant values for this stream
    ts.set_SingleCharSymbols(";");
    
    s.clear();
    if (read_label_portion(ts, s, sample) == misc_read_error)
    {
      cerr << "error: in label file " << ts.filename() << " at line " <<
          ts.linenum() << endl;
      return misc_read_error;
    }
    return format_ok;
}

EST_write_status save_htk_label(const EST_String &filename, 
                        const EST_Relation &a)
{
    ostream *outf;
    if (filename == "-")
      outf = &cout;
    else
      outf = new ofstream(filename);
    
    if (!(*outf))
    {
      cerr << "save_htk_label: can't open label output file \"" << 
          filename << "\"" << endl;
      return write_fail;
    }

    EST_write_status s = save_htk_label(outf, a);

    
    if (outf != &cout)
      delete outf;
    
    return s;
}

EST_write_status save_htk_label(ostream *outf,
                        const EST_Relation &a)
{
    EST_Item *ptr;
    float end,start;
    
    outf->precision(6);

    start = end = 0;
    for (ptr = a.head(); ptr != 0; ptr = next(ptr))
    {
      outf->width(15);
      cout.setf(ios::left,ios::adjustfield);
      *outf << (int)(start * HTK_UNITS_PER_SECOND);
      outf->width(15);
      end = ptr->F("end",0.0);
      *outf << (int)(end * HTK_UNITS_PER_SECOND);
      *outf << " " << ptr->name() << endl;
      start = end;
    }

    return write_ok;
}

#if 0
EST_write_status save_label_spn(const EST_String &filename, 
                        const EST_Relation &a)
{
    EST_Stream_Item *ptr;
    
    ostream *outf;
    if (filename == "-")
      outf = &cout;
    else
      outf = new ofstream(filename);
    
    if (!(*outf))
    {
      cerr << "save_label_spn: can't open label output file \"" 
          << filename << "\"" << endl;
      return write_fail;
    }
    
    ptr = a.head();
    outf->precision(3);
    outf->setf(ios::left, ios::adjustfield);
    outf->width(8);
    *outf << ptr->name();
    outf->setf(ios::fixed, ios::floatfield);
    outf->width(8);
    *outf << (ptr->dur() * 1000.0) << "\t (0,140)" << endl;
    
    for (; next(ptr) != 0; ptr = next(ptr))
    {
      outf->precision(3);
      outf->setf(ios::left, ios::adjustfield);
      outf->width(8);
      *outf << ptr->name();
      outf->setf(ios::fixed, ios::floatfield);
      outf->width(8);
      *outf << (ptr->dur() * 1000.0) << endl;
    }
    //    outf->precision(3);
    //    outf->setf(ios::left, ios::adjustfield);
    outf->width(8);
    *outf << ptr->name();
    outf->setf(ios::fixed, ios::floatfield);
    outf->width(8);
    *outf << (ptr->dur() * 1000.0) << "\t (99,80)" << endl;
    
    if (outf != &cout)
      delete outf;
    
    return write_ok;
}

EST_write_status save_label_names(const EST_String &filename, 
                          const EST_Relation &a, 
                          const EST_String &features)
{
    EST_Stream_Item *ptr;
    
    ostream *outf;
    if (filename == "-")
      outf = &cout;
    else
      outf = new ofstream(filename);
    
    if (!(*outf))  
    {
      cerr << "save_label_name: can't open label output file \"" 
          << filename << "\"" << endl;
      return misc_write_error;
    }
    
    for (ptr = a.head(); next(ptr) != 0; ptr = next(ptr))
    {
      *outf << ptr->name();
      if ((features != "") && (features != "OneLine"))
          *outf << endl;
      else
          *outf << " ";
    }
    
    *outf << ptr->name() << endl;
    
    if (outf != &cout)
      delete outf;
    return write_ok;
}
#endif

EST_write_status save_RelationList(const EST_String &filename, 
                           const EST_RelationList &plist, 
                           int time, int path)
{
    EST_Litem *p;
    EST_Item *ptr;
    EST_String outname;
    float start,end;
    
    ostream *outf;
    if (filename == "-")
      outf = &cout;
    else
      outf = new ofstream(filename);
    
    if (!(*outf))
    {
      cerr << "save_StreamList: can't open MLF output file \"" 
          << filename << "\"\n";
      return write_fail;
    }
    
    *outf << "#!MLF!#\n";     // MLF header/identifier
    outf->precision(6);

    start = end = 0;
    for (p = plist.head(); p != 0; p = next(p))
    {
      outname = path ? plist(p).name() : basename(plist(p).name());
      *outf << "\"*/" << outname<<"\"\n";
      for (ptr = plist(p).head(); ptr != 0; ptr = next(ptr))
      {
          if (time)
          {
            outf->width(15);
            cout.setf(ios::left,ios::adjustfield);
            *outf << (int)(start * HTK_UNITS_PER_SECOND);
            outf->width(15);
            end = ptr->F("end",0.0);
            *outf << (int)(end * HTK_UNITS_PER_SECOND) << " ";
            start = end;
          }
          *outf << ptr->S("name","0") << endl;
      }
      *outf << ".\n";
    }
    
    if (outf != &cout)
      delete outf;
    return write_ok;
}    

EST_write_status save_WordList(const EST_String &filename, 
                         const EST_RelationList &plist, 
                         int style)
{
    EST_Litem *p;
    EST_Item *ptr;
    
    ostream *outf;
    if (filename == "-")
      outf = &cout;
    else
      outf = new ofstream(filename);
    
    if (!(*outf))
    {
      cerr << "save:WordList: can't open WordList output file \"" 
          << filename << "\"\n";
      return write_fail;
    }
    
    for (p = plist.head(); p != 0; p = next(p))
    {
      for (ptr = plist(p).head(); next(ptr) != 0; ptr = next(ptr))
      {
          *outf << ptr->name();
          if (style == 0)
            *outf << endl;
          else
            *outf << " ";
      }
      if (ptr != 0)
          *outf << ptr->name() << endl;
    }
    
    if (outf != &cout)
      delete outf;
    return write_ok;
}    

EST_write_status save_ind_RelationList(const EST_String &filename, 
                               const EST_RelationList &plist, 
                               const EST_String &features, 
                               int path)
{
    EST_Litem *p;
    EST_String outname;
    (void) filename;
    (void) features;
    
    for (p = plist.head(); p != 0; p = next(p))
    {
      outname = path ? plist(p).name() : basename(plist(p).name());
      if (plist(p).save(outname,false) != write_ok)
          return misc_write_error;
    }
    
    return write_ok;
}    

EST_read_status load_RelationList(const EST_String &filename, 
                          EST_RelationList &plist)
{
    EST_TokenStream ts;
    EST_String fns, name;
    
    if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
    {
      cerr << "Can't open label input file " << filename << endl;
      return misc_read_error;
    }
    // set up the character constant values for this stream
    ts.set_SingleCharSymbols(";");
    
    // Skip over header
    if (ts.get().string() != "#!MLF!#")
    {
      cerr << "Not MLF file\n";
      return wrong_format;
    }

    while(!ts.eof())
    {
      // put filename in as stream name. The filename is usually surrounded
      // by quotes, so remove these.
      fns = ts.get().string();
      strip_quotes(fns);
      EST_Relation s(fns);
      s.f.set("name", fns); // simonk
      plist.append(s);

      if (read_label_portion(ts, plist.last(), 10000000) == misc_read_error)
      {
          cerr << "error: in reading MLF file\n";
          cerr << "section for file " << fns << 
            " at line " << ts.linenum() << " is badly formatted\n";
          
          return misc_read_error;
      }
    }

    return format_ok;
}

static void pad_ends(EST_Relation &s, float length)
{
    // add evenly spaced dummy end values to Relation
    EST_Item *p;
    int i;
    
    for (i = 0, p = s.head(); p; p = next(p), ++i)
      p->set("end",(length * float(i)/float(s.length())));
}

EST_read_status read_RelationList(EST_RelationList &plist, 
                          EST_StrList &files, EST_Option &al)
{
    EST_Litem *p, *plp;
    
    if (al.val("-itype", 0) == "mlf")
    {
      if (load_RelationList(files.first(), plist) != format_ok)
          exit (-1);
    }
    else
      for (p = files.head(); p; p = next(p))
      {
          EST_Relation s(files(p));
          plist.append(s);
          plp = plist.tail();
          if (al.present("-itype"))
          {
            if (plist(plp).load(files(p), al.val("-itype")) != format_ok)
                exit (-1);
          }
          else if (plist(plp).load(files(p)) != format_ok)
            exit (-1);
          if ((al.val("-itype", 0) == "words") && (al.present("-length")))
            pad_ends(s, al.fval("-length"));
          
      }
    
    return format_ok;
}

Generated by  Doxygen 1.6.0   Back to index