# # 2014 Jun 09 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #------------------------------------------------------------------------- # # This script generates the implementations of the following C functions, # which are part of the porter tokenizer implementation: # # static int fts5PorterStep1B(char *aBuf, int *pnBuf); # static int fts5PorterStep1B2(char *aBuf, int *pnBuf); # static int fts5PorterStep2(char *aBuf, int *pnBuf); # static int fts5PorterStep3(char *aBuf, int *pnBuf); # static int fts5PorterStep4(char *aBuf, int *pnBuf); # set O(Step1B2) { { at {} ate 1 } { bl {} ble 1 } { iz {} ize 1 } } set O(Step1B) { { "eed" fts5Porter_MGt0 "ee" 0 } { "ed" fts5Porter_Vowel "" 1 } { "ing" fts5Porter_Vowel "" 1 } } set O(Step2) { { "ational" fts5Porter_MGt0 "ate" } { "tional" fts5Porter_MGt0 "tion" } { "enci" fts5Porter_MGt0 "ence" } { "anci" fts5Porter_MGt0 "ance" } { "izer" fts5Porter_MGt0 "ize" } { "logi" fts5Porter_MGt0 "log" } { "bli" fts5Porter_MGt0 "ble" } { "alli" fts5Porter_MGt0 "al" } { "entli" fts5Porter_MGt0 "ent" } { "eli" fts5Porter_MGt0 "e" } { "ousli" fts5Porter_MGt0 "ous" } { "ization" fts5Porter_MGt0 "ize" } { "ation" fts5Porter_MGt0 "ate" } { "ator" fts5Porter_MGt0 "ate" } { "alism" fts5Porter_MGt0 "al" } { "iveness" fts5Porter_MGt0 "ive" } { "fulness" fts5Porter_MGt0 "ful" } { "ousness" fts5Porter_MGt0 "ous" } { "aliti" fts5Porter_MGt0 "al" } { "iviti" fts5Porter_MGt0 "ive" } { "biliti" fts5Porter_MGt0 "ble" } } set O(Step3) { { "icate" fts5Porter_MGt0 "ic" } { "ative" fts5Porter_MGt0 "" } { "alize" fts5Porter_MGt0 "al" } { "iciti" fts5Porter_MGt0 "ic" } { "ical" fts5Porter_MGt0 "ic" } { "ful" fts5Porter_MGt0 "" } { "ness" fts5Porter_MGt0 "" } } set O(Step4) { { "al" fts5Porter_MGt1 "" } { "ance" fts5Porter_MGt1 "" } { "ence" fts5Porter_MGt1 "" } { "er" fts5Porter_MGt1 "" } { "ic" fts5Porter_MGt1 "" } { "able" fts5Porter_MGt1 "" } { "ible" fts5Porter_MGt1 "" } { "ant" fts5Porter_MGt1 "" } { "ement" fts5Porter_MGt1 "" } { "ment" fts5Porter_MGt1 "" } { "ent" fts5Porter_MGt1 "" } { "ion" fts5Porter_MGt1_and_S_or_T "" } { "ou" fts5Porter_MGt1 "" } { "ism" fts5Porter_MGt1 "" } { "ate" fts5Porter_MGt1 "" } { "iti" fts5Porter_MGt1 "" } { "ous" fts5Porter_MGt1 "" } { "ive" fts5Porter_MGt1 "" } { "ize" fts5Porter_MGt1 "" } } proc sort_cb {lhs rhs} { set L [string range [lindex $lhs 0] end-1 end-1] set R [string range [lindex $rhs 0] end-1 end-1] string compare $L $R } proc create_step_function {name data} { set T(function) { static int fts5Porter${name}(char *aBuf, int *pnBuf){ int ret = 0; int nBuf = *pnBuf; switch( aBuf[nBuf-2] ){ ${switchbody} } return ret; } } set T(case) { case '${k}': ${ifstmts} break; } set T(if_0_0_0) { if( ${match} ){ *pnBuf = nBuf - $n; } } set T(if_1_0_0) { if( ${match} ){ if( ${cond} ){ *pnBuf = nBuf - $n; } } } set T(if_0_1_0) { if( ${match} ){ ${memcpy} *pnBuf = nBuf - $n + $nRep; } } set T(if_1_1_0) { if( ${match} ){ if( ${cond} ){ ${memcpy} *pnBuf = nBuf - $n + $nRep; } } } set T(if_1_0_1) { if( ${match} ){ if( ${cond} ){ *pnBuf = nBuf - $n; ret = 1; } } } set T(if_0_1_1) { if( ${match} ){ ${memcpy} *pnBuf = nBuf - $n + $nRep; ret = 1; } } set T(if_1_1_1) { if( ${match} ){ if( ${cond} ){ ${memcpy} *pnBuf = nBuf - $n + $nRep; ret = 1; } } } set switchbody "" foreach I $data { set k [string range [lindex $I 0] end-1 end-1] lappend aCase($k) $I } foreach k [lsort [array names aCase]] { set ifstmts "" foreach I $aCase($k) { set zSuffix [lindex $I 0] ;# Suffix text for this rule set zRep [lindex $I 2] ;# Replacement text for rule set xCond [lindex $I 1] ;# Condition callback (or "") set n [string length $zSuffix] set nRep [string length $zRep] set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)" set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);" set cond "${xCond}(aBuf, nBuf-$n)" set bMemcpy [expr {$nRep>0}] set bCond [expr {$xCond!=""}] set bRet [expr {[llength $I]>3 && [lindex $I 3]}] set t $T(if_${bCond}_${bMemcpy}_${bRet}) lappend ifstmts [string trim [subst -nocommands $t]] } set ifstmts [join $ifstmts "else "] append switchbody [subst -nocommands $T(case)] } puts [subst -nocommands $T(function)] } puts [string trim { /************************************************************************** *************************************************************************** ** GENERATED CODE STARTS HERE (mkportersteps.tcl) */ }] foreach step [array names O] { create_step_function $step $O($step) } puts [string trim { /* ** GENERATED CODE ENDS HERE (mkportersteps.tcl) *************************************************************************** **************************************************************************/ }]