";
// echo "
".$perc_key. " VAL ".$perc_val."";
// echo "The file $file_name exists";
$sw_corp=1;
$f = fopen($file_name, "r");
$word_count=0;
$line_count=0;
$nl=0;
while ($line = fgets($f, 100000) and $nl < 3)
{
$nl=$nl+1;
$str_repl_line=$line;
require_once("ustr-repl-nl.php");
// echo "
".urlencode($line);
$str_repl_line=trim($str_repl_line);
$str_repl_line=strtolower($str_repl_line);
$lstr_repl_line=str_replace("the_title:","",$str_repl_line);
// echo "
LINE ".$str_repl_line;
if (substr($str_repl_line,0,10) == "the_title:")
{
if ($str_repl_line != null and $srelt != $str_repl_line)
{
$bzv1_dtls=explode(" ",$str_repl_line);
foreach($bzv1_dtls as $dtls_key => $dtls_val)
{
if (strlen($dtls_val) > 3)
{
$sstr_repl_line=str_replace(" ","+",$lstr_repl_line);
$ltref_domn="".$tref_corp."";
$dtls_rslt[$lstr_repl_line."!!".$ltref_domn]=$dtls_rslt[$lstr_repl_line."!!".$ltref_domn]+1;
}
}
$invr_bzv1=substr($str_repl_line,10,256);
$sinvr_bzv1=str_replace(" ","+",$invr_bzv1);
// echo '
Corpus<category><pattern>'.$tref_corp.'</pattern><template><think><set name="invrcorp">'.$tref_corp.'</set><set name="invrchbt">'.$tref_chbt.'</set><set name="invrcorp">'.$tref_corp.'</set><set name="invrchbt">'.$tref_chbt.'</set><set name="invrtaal">'.$tref_taal.'</set><set name="invrsmdm">'.$tref_domn.'</set><set name="invripad">http://www.'.$tref_ipad.'</set></think><link><text>'.$tref_chbt.'</text><url>https://www.semanta.nl/semanta/gesprek.php?chbt=<get name="invrchbt"></get></url></link></template></category>';
// echo '
Chatbot<category><pattern>'.$tref_chbt.'</pattern><template><think><get name="invrcorp">'.$tref_corp.'</get><get name="invrchbt">'.$tref_chbt.'</get><get name="invrcorp">'.$tref_corp.'</get><get name="invrchbt">'.$tref_chbt.'</get><get name="invrtaal">'.$tref_taal.'</get><get name="invrsmdm">'.$tref_domn.'</get><get name="invripad">http://www.'.$tref_ipad.'</get></think><link><text>'.$tref_chbt.'</text><url>https://www.semanta.nl/semanta/gesprek.php?chbt=<get name="invrchbt"></get></url></link></template></category>';
$ltref_domn="".$tref_corp."";
// echo "".$tref_taal." | ".$ltref_domn." | ".substr($str_repl_line,10,256)." | ".$tref_chbt." | ".$tref_corp." |
";
// echo "".substr($str_repl_line,10,256)." |
";
$srelt = $str_repl_line;
}
}
}
$corp_array[$perc_val]=$corp_array[$perc_val]+1;
}
else
{
// echo "
".$perc_val."";
}
////////////////////////////////////
$chbt_array[$tref_corp."@".$tref_chbt."@".$tref_taal."@".$tref_domn."@".$tref_ipad."@".$str_repl_line]=$chbt_array[$tref_corp."@".$tref_chbt."@".$tref_taal."@".$tref_domn."@".$tref_ipad."@".$str_repl_line]+1;
echo "".substr($str_repl_line,10,256)." | ".$tref_chbt." | ".$tref_corp." |
";
}
}
}
}
}
asort($dtls_rslt);
// print_r($dtls_rslt);
echo "";
foreach($dtls_rslt as $rslt_key => $rslt_val)
{
echo "";
// echo "
DTLS KEY ".$rslt_key."DTLS VAL ".$rslt_val;
$rslt_rslt=explode("!!",$rslt_key);
// print_r($rslt_rslt);
if ($drelt != $rslt_rslt[1])
{
echo "".$rslt_rslt[1]." | ".$rslt_rslt[0];
$drelt = $rslt_rslt[1];
}
else
{
echo ", ".$rslt_rslt[0];
}
}
echo " |
";
// print_r($domn_array);
echo "Voor Semanta zijn de volgende onderwerpen geidentificeerd door Lingvistica:
";
ksort($chbt_array);
// echo "
Aantal corpora= ".count($corp_array);
// echo "";
foreach($chbt_array as $chbt_key => $chbt_val)
{
$domn_dtls=explode("@",$chbt_key);
$domn_chbt=$domn_dtls[1];
$domn_corp=$domn_dtls[0];
$domn_taal=$domn_dtls[2];
$domn_domn=$domn_dtls[3];
$domn_srvr=$domn_dtls[4];
$domn_titl=$domn_dtls[5];
// echo "
".$domn_srvr;
$domn_tabl=str_replace("@","",$chbt_key);
// echo " | ".$domn_tabl."
";
// echo "
FRST LETR ".$frst_letr;
if ($frelt !=$chbt_key[0])
{
$frst_letr="-".strtoupper($chbt_key[0])."-";
// echo "
".$frst_letr;
// $chbt_key[0]=$frst_letr;
$ltref_domn="".$frst_letr."-".$chbt_key."";
$frelt=$chbt_key[0];
// echo "".$domn_titl." | ".$ltref_domn.">/td> |
";
}
$ltref_domn="".$chbt_key."";
// echo "".$domn_table." | ".$ltref_domn." |
";
}
echo "
";
?>