=========================================================
	=							=
	=  A simple BNF description of the DIPETT/HAIKU output	=
	=							=
	=========================================================

jfd, 1 oct


Notation:
---------
&Uppercase_initialled is a variable standing for a terminal identifier e.g. &Atom
Upper-case initial denotes a category, further decomposed		e.g. &Sentence1
| 	is for alternatives
//  	is for optionals
[...] 	is for any list
* 	is for iteration
etc.	means just that; will have to be completed

% precedes comments, and ? is just where I'm not sure

Sources:
--------
 - trees, especially Claire's trees (in ~kate/..share/tmp/cevans/DIPETT/trees, 
	most of them compressed)
 - DIPETT source files

Comment on format and utilization:
----------------------------------
There are the usual differences between this BNF and the implemented DCG format: 
 - constrained not shown - this BNF is overgenerating
 - rippling of vars expressed differently: here with Variables
	standing for terms, vs in the DCG with variables are args and call to
	further rules
 - there are less arguments; 
 - structures of optionality are a bit simplified here - 
		another cause for overgenerating.

In all, it is less than obvious to retrieve the relevant structures
from the source files only.

About 'arities':
----------------

Only 'parse_tree' is a real predicate name. All other identifiers
appearing before parentheses are just functors.

The 'arity' of these functors is not fixed, because of the optionality
of some of their 'arguments', notably inside the description of
entities, and from the presence or absence of token lists.


===================================================================

		PARSE TREES FROM DIPETT

===================================================================


ParseTree ::= parse_tree(&Number, Inputstring, Parsetree1).    
		| parse_tree(&Number, Inputstring, error).
	% note dot. 'parse_tree' is the only actual predicate name; 
	% 	the rest, hereunder, are mere functors

Inputstring ::= '[...]'  
	%e.g. '[this,amount,is,already,included,in,your,etc.]'

ParseTree1 ::=	ParseTree1complete
	      | ParseTree1fragmentary

ParseTree1complet ::= 
		parse_tree__decla_or_imper(Sentence1, end_of_input(period))
	      | parse_tree__inter(Sentence1, end_of_input(period)

ParseTree1fragmentary ::=
	parse_tree__frag(sentence_fragment(...)
	| parse_tree__frag_series([ Sentence_fragment*
		/skipped_token(''Quotedatom'')/
		Sentence_fragment*], end_of_input(period))

Quotedatom ::= '&Atom'

Sentence1 ::= simple_sentence(structure(C), /Tokens/)
	    | complex_sentence(structure(single_main_clause(Clause), 
			next_main_clause(Clause))

Clause ::= declarative(Statement)  
	| imperative(Statement)  
	| etc.

Statement ::= statement( Subj, Predicate, /Complement/, /Tokens/)      
			% note different "arities" of functor 'statement'

Sentence_fragment ::= sentence_fragment(Statement)
		   |  sentence_fragment(NP_equiv)
		   |  sentence_fragment(PP)
		   |  etc.

Tokens ::= tokens([...])

----------------------------
Statement
----------------------------

Subj ::= subj(NP_equiv)

Predicate ::= predicate( regular(VP), Voice, Complement, /Tokens/))

--------------------
Verb
--------------------

VP ::= verb(&Verb,tense([Tense]),neg(Neg),trans(Trans))
		% note brackets around Tense, 
		% 	necessary because some tense idents have a hyphen

Tense::= infinitive 
	| present_simple  | present_continuous] | present_perfect_continuous 
	| past_simple | pre_past_simple | pre_past_continuous 
	| future_continuous | future_perfect_simple | future_perfect_continuous 
	| could_conditional_present_simple | should_conditional_present_simple 
	| would_conditional_present_simple | etc.

Neg ::= yes  |  no

Trans ::= tr  |  intr  |  tr_intr  | stat

Voice ::= active |  passive


--------------------
Complement
--------------------
	% DIPETT_6_PARSER_V.PL, l 1476

Complement ::= complement(SVpattern(Complement0)
		| s_qualifier(SVpattern(S_qualifier)

	% in the Haiku output, what I call the SVpattern is processed into 
	%       a CMP and a CP
	% Constraints between the SVpattern and Complement0 read unclear

SVpattern ::=  SVelement/_SVelement/*

SVelement ::= sva | svc | svo | svoc | svoa

Complement0 ::=   NP_equiv			% no brackets, no tokens
		| [Complement1 /,Complement1/ /,Tokens/ ]  
				% if any other simple type or if 
				% complex complement, there are brackets and tokens
		| nil

Complement1 ::=   NP_equiv
		| CDirobj
		| CIobj
		| CAttr
		| PP		% note insertion of tokens

% rem: It is not that there are no []s when we have a simple complement. 
% rather: a simple noun_phrase, possibly coordinated, has no brackets.
% all other simple complements, and all complex complements, have brackets

CDirobj ::= dirobj(NP_equiv)	% Tokens?

CIobj ::= iobj(PP)   % new

CAttr ::= attrs(adj,(Adj,Deg)),Tokens)	% i.e.  [Attr,Tokens]

Cattrs_compl ::= attrs_compl( [E, PP] )   	% ?

% PP : see under


S_qualifier ::= [Attr, Tokens]		% for statives
		| etc. ?


% Counting examples of various types of Complement1, in file ~trees/may26.trees
%   
%   entity()			............
%   conj_nps()			..
%   nominal_clause()		..
%   [ pp(), T]   		.....
%   [entity(),[pp(),T] ]	.....
%   [iobj( [pp(),T] ), T]	.
%   [attrs(), T]
%
% s_qualifier(svc_sva(attrs_compl([attrs(adj(_,_), T]))))	....


--------------------
NP_equiv, especially Entity
--------------------
	% in file DIPETT_8_PARSER_N.PL

NP_equiv ::= Nounphrase			% the most common
	   | Nominal_clause
	   | Special_function_word

Nounphrase ::= Entity
	| conj_nps(Coord, Entity, Nounphrase)      % to check

Nominal_clause ::= To_infinitive_clause
		 | Ing_clause
		 | Wh_interro_declarative_clause
		 | That_clause

To_infinitive_clause ::= simple_to_infinitive_clause(Clause)		?
Ing_clause ::= ing_clause(Clause) 					?
Wh_interro_declarative_clause ::= Whether_if_clause(&Atom, Clause) 	?

Special_function_word ::= 'quiz-example-command'
		| etc.


Entity ::= entity( /Intensifier/ 
		    /,Determiner/
		    /,Attributes/ 
		     ,Number
		     ,Headnoun
		    /,np_postmodifiers/ 
		    /Tokens/ )

Intensifier ::= intensifier(Intensifier0)

Intensifier0 ::= Adv

Determiner ::= determinatives( deter(Det), /Predeterminer/, /Postdeterminer/)

Det ::= a | an | the | each | etc.

Predeterminer ::= predeter(X) 		?

Postdeterminer ::= postdeter(X) 	?

Number ::= sg | sg3 | pl

Headnoun ::= head_noun(noun(&Ident),  /,Noun_Premodifier/ /,Noun_post_modifier/
	   | head_noun(&Propernoun)
	   | Ref

Ref ::= pers_pron(Pers_pron)
	| &Atom
	| [&Numeric]

Pers_pron :== you | etc.

Noun_Premodifier ::= pre_modif(&Atom)
		   | pre_modif([' Hyphenated '])	% e.g. ['common-law']

Noun_post_modifier ::= quiz_token(&A) | ref__or__var(&A) | date_form(&A) 
		| number_special(&A) | dollars(&A) | percentage(&A) | nil

Coord ::= and  |  or  | etc. 

Clause ::= To_infinitive_clause  | etc.      ??

NP_postmodifiers ::= [NP_postmodifiers1 /,NP_postmodifiers1/*  /,Tokens/]

NP_postmodifiers1 ::= PP  |  Adv

Adv ::= &Atom
	| Adverbial

Adverbial ::= adv_clause(Subordinator, Statement)


----------------------------
PP
----------------------------
	% DIPETT_8_PARSER_N.PL, line 992 and foll.

PP ::=  PP_simple
      | PP_conj

PP_simple ::= pp(Prep, Entity  /,Tokens/)		% note: not a list

PP_conj ::= [conj_pps( Conj, [ PP_simple, PP]), /,Tokens/ ]	  % note brackets
								% note tokens
	% ayayay, are Tokens added at analysis of PP level, or complement level??

	  | [conj_pps( implicit_and, [PP_simple, p_asyndetic_conj_pp( PP)) /, Tokens)]

Prep ::= in | for | to | at | etc.     % 96 in dictionary


===================================================================

		STRUCTURES OF HAIKU_CA OUTPUT

===================================================================

Clausal_Case_Structures ::=
	[clauses([ SStatement /SStatement/* ])]

SStatement ::= [ '*statementN*', Case_structure]       % e.g. '*statement1*'

Case_structure ::= case_structure(Verb, CMP, CP, Subject, Complement0)
		% Subject and Complement0 as in parse tree; see above

CMP ::= CMPelement /-CMPelement/*

CMPelement ::= psubj | pobj | Prep

CP ::=  CPelement /-CPelement/*

CPelement ::= agt | etc.


	% no other difference with parse trees