#!/bin/csh -f # Preprocesses and segments a TDC text using Segmenter. # The output is printed at the standard output. # Usage: segment TDC_file # Commands set BLANKLINE = '/a/kaml4/usr2/staff/ychali/project/tools/blankline' set ECHOS = '/usr/bin/echo .START' set ECHO = '/usr/bin/echo ' set CAT = cat set LAYOUT = '/a/kaml4/usr2/staff/ychali/segmenter/segmenter/bin/util/layoutRecognizer -m wsj' set TERMER = '/a/kaml4/usr2/staff/ychali/segmenter/segmenter/bin/termer' set SEGMENTER = '/a/kaml4/usr2/staff/ychali/segmenter/segmenter/bin/segmenter' set RM = rm # variables set TMP1 = temp1 set TMP2 = temp2 set TMP3 = temp3 # call blankline $BLANKLINE $1 $TMP2 # call echo ($ECHOS; $ECHO; $ECHO) > $TMP1 # call cat $CAT $TMP1 $TMP2 > $TMP3 # call layout $LAYOUT $TMP3 > "$TMP3.sgml" # call termer and segmenter $TERMER $TMP3.sgml | $SEGMENTER # call rm $RM $TMP1 $TMP2 $TMP3 $TMP3.sgml