Copyright 2004 The Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Lucene Domain Index is full integration of Lucene project running inside the Oracle database using Oracle JVM.
Oracle provides a full featured JVM inside your Oracle Database compliant with JDK 1.4 in 10g release and 1.5 in 11g.
Oracle include a full feature Enterprise Engine named Oracle Text made in C and fully integrated to the Oracle Engine, but:
on Oracle Text you can not:
db.str=test
db.usr=LUCENE
db.pwd=LUCENE
dba.usr=sys
dba.pwd=change_on_install
javac.debug=true
javac.source=1.4
javac.target=1.4
MAVEN_HOME=/usr/local/mavenUpload, install and test your code into the database
ORACLE_BASE=/u01/app/oracle
ORACLE_HOME=$ORACLE_BASE/product/11.1.0.6.0/db_1
ORACLE_SID=test
JAVA_HOME=$ORACLE_HOME/jdk
PATH=$MAVEN_HOME/bin:$HOME/bin:$ORACLE_HOME/bin:$JAVA_HOME/bin:/usr/local/bin:$PATH
LD_LIBRARY_PATH=$ORACLE_HOME/lib:/usr/local/lib
CVS_RSH=ssh
umask 022
export PATH LD_LIBRARY_PATH ORACLE_HOME ORACLE_BASE ORACLE_SID JAVA_HOME CVS_RSH NLS_LANG
# ant install-ojvm
# ant test-domain-index
# ant jit-lucene-classesThis target force to translate all Lucene, Snowball and OJVMDirectory classes to assembler.
db.str=orcl
db.usr=LUCENE
db.pwd=LUCENE
dba.usr=sys
dba.pwd=change_on_install
javac.debug=true
javac.source=1.4
javac.target=1.4
MAVEN_HOME=/usr/local/mavenIf you are re-installing Oracle Lucene OJVM integration first drop any Lucene Domain Index not installed at Lucene's schema.
ORACLE_BASE=/u01/app/oracle
ORACLE_HOME=$ORACLE_BASE/product/10.2.0/db_1
ORACLE_SID=orcl
JAVA_HOME=$ORACLE_HOME/jdk
PATH=$MAVEN_HOME/bin:$HOME/bin:$ORACLE_HOME/bin:$JAVA_HOME/bin:/usr/local/bin:$PATH
LD_LIBRARY_PATH=$ORACLE_HOME/lib:/usr/local/lib
CVS_RSH=ssh
umask 022
export PATH LD_LIBRARY_PATH ORACLE_HOME ORACLE_BASE ORACLE_SID JAVA_HOME CVS_RSH NLS_LANG
# ant ncomp-ojvm
# ant test-domain-index
cd /tmp
cvs -d:pserver:anonymous@dbprism.cvs.sourceforge.net:/cvsroot/dbprism login
cvs -z3 -d:pserver:anonymous@dbprism.cvs.sourceforge.net:/cvsroot/dbprism co -P ojvm
# cd $LUCENE_ROOT/contrib
# cp -rp /tmp/ojvm .
<target name="jar-test" depends="compile-test">
<jar destfile="${build.dir}/${final.name}-test.jar" basedir="${build.dir}/classes/test" excludes="**/*.java"/>
</target>
<target name="test" depends="compile-test" description="Runs unit tests">
<fail unless="junit.present">
##################################################################
JUnit not found.
Please make sure junit.jar is in ANT_HOME/lib, or made available
to Ant using other mechanisms like -lib or CLASSPATH.
##################################################################
</fail>
............
<!-- contrib/ojvm uses these system properties to connect to the target database -->
<sysproperty key="db.str" value="${db.str}"/>
<sysproperty key="db.usr" value="${db.usr}"/>
<sysproperty key="db.pwd" value="${db.pwd}"/>
............
<delete file="${build.dir}/test/junitfailed.flag" />
</target>
# cd $LUCENE_ROOT/contrib/ojvm
# ant jar-core
# ant jar-test
db.str=orcl
db.usr=LUCENE
db.pwd=LUCENE
dba.usr=sys
dba.pwd=change_on_install
javac.debug=true
javac.source=1.4
javac.target=1.4
# ant install-ojvm
# ant generate-maven-artifacts
# ant ncomp-ojvm
ant jit-lucene-classes
ant jit-oracle-classes
-- connected as sysdba
begin
dbms_java.grant_permission('SCOTT','SYS:java.util.logging.LoggingPermission', 'control', '' );
commit;
end;
/
[mochoa@mochoa ojvm]$ ant test-domain-index
Buildfile: build.xml
[echo] Building ojvm...
test-domain-index:
[exec]
[exec] SQL*Plus: Release 11.1.0.6.0 - Production on Wed Dec 5 17:43:24 2007
[exec]
[exec] Copyright (c) 1982, 2007, Oracle. All rights reserved.
[exec]
[exec]
[exec] Connected to:
[exec] Oracle Database 11g Release 11.1.0.6.0 - Production
[exec]
[exec]
[exec] Table dropped.
[exec]
[exec]
[exec] Table created.
[exec]
[exec] SQL> Disconnected from Oracle Database 11g Release 11.1.0.6.0 - Production
[echo] See output at ../../build/testLuceneDomainIndex.txt
[mochoa@mochoa ojvm]$ ant -Ddb.usr=scott -Ddb.pwd=tiger -Ddb.str=test "-Djunit.includes=**/AllTests.java" ojvm-test
Buildfile: build.xml
[echo] Building ojvm...
ojvm-test:
[echoproperties] #Ant properties
[echoproperties] #Wed Dec 05 17:56:30 ART 2007
.........
common.test:
[junit] Testsuite: org.apache.lucene.index.TestDBIndex
[junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 5.883 sec
[junit]
[junit] ------------- Standard Output ---------------
[junit] Table created: T1
[junit] Index created: IT1
[junit] Index altered: IT1
[junit] Inserted rows: 40 total bytes inserted: 421 avg text length: 10
[junit] Index synced: IT1 elapsed time: 249 ms.
[junit] Avg Sync time: 6
[junit] Index optimized: IT1 elapsed time: 46 ms.
[junit] Avg Optimize time: 1
[junit] Row deleted 41, from: 10 to: 50 elapsed time: 2005 ms. Avg time: 48 ms.
[junit] Index droped: IT1
[junit] Table droped: T1
[junit] ------------- ---------------- ---------------
.............
[junit] Testsuite: org.apache.lucene.indexer.TestQueryHits
[junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 4.158 sec
[junit]
[junit] ------------- Standard Output ---------------
[junit] iteration from: 13775 to: 13785
[junit] Step time: 1291 ms.
[junit] iteration from: 13785 to: 13795
[junit] Step time: 157 ms.
[junit] iteration from: 13795 to: 13805
[junit] Step time: 144 ms.
[junit] iteration from: 13805 to: 13815
[junit] Step time: 147 ms.
[junit] iteration from: 13815 to: 13825
[junit] Step time: 145 ms.
[junit] iteration from: 13825 to: 13835
[junit] Step time: 147 ms.
[junit] iteration from: 13835 to: 13845
[junit] Step time: 145 ms.
[junit] iteration from: 13845 to: 13855
[junit] Step time: 150 ms.
[junit] iteration from: 13855 to: 13865
[junit] Step time: 278 ms.
[junit] iteration from: 13865 to: 13875
[junit] Step time: 146 ms.
[junit] Elapsed time: 3159
[junit] Hits: 18387
[junit] Elapsed time: 653
[junit] ------------- ---------------- ---------------
[junit] Testsuite: org.apache.lucene.indexer.TestTableIndexer
[junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 0.685 sec
[junit]
[delete] Deleting: /u01/src/lucene-2.2.0/build/contrib/ojvm/test/junitfailed.flag
BUILD SUCCESSFUL
Total time: 8 minutes 22 seconds
org.apache.lucene.indexer.TestQueryHits use a table which is very big to create and destroy it at setup() and tearDown() methods. Before run this test create the table with:
create table test_source_big as (select * from all_source);
and his index on 10g with:
create index source_big_lidx on test_source_big(text)
indextype is lucene.LuceneIndex parameters('AutoTuneMemory:true;Analyzer:org.apache.lucene.analysis.SimpleAnalyzer;MergeFactor:500;FormatCols:line(0000);ExtraCols:line "line"');
create index source_big_lidx on test_source_big(text)
indextype is lucene.LuceneIndex
parameters('AutoTuneMemory:true;FormatCols:line(0000);ExtraCols:line "line";Analyzer:org.apache.lucene.analysis.SimpleAnalyzer;MergeFactor:500;LobStorageParameters:PCTVERSION 0 DISABLE STORAGE IN ROW CHUNK 32768 CACHE READS FILESYSTEM_LIKE_LOGGING');
If you want to execute only one or some specific test override Ant's property junit.includes, for example:
ant -Ddb.usr=scott -Ddb.pwd=tiger -Ddb.str=test "-Djunit.includes=**/TestDBIndex.java" ojvm-test
Note that this argument is enclosed by "" to prevent Unix shell replacement.
-- connected as sysdba
begin
dbms_java.grant_permission('SCOTT','SYS:java.util.logging.LoggingPermission', 'control', '' );
commit;
end;
/
create table t1 (
f1 number,
f2 varchar2(200),
f3 varchar2(200),
f4 number unique);
create index it1 on t1(f2) indextype is lucene.LuceneIndex
parameters('Analyzer:org.apache.lucene.analysis.SimpleAnalyzer');
create index it1 on t1(f2) indextype is lucene.LuceneIndex
parameters('Stemmer:English');
This limiis due a limitation in Oracle DBMS AQ table name. Every Lucene Domain Index have associated a AQ queue named idx_name$Q and his queue table idx_name$QT.
create index it1 on t1(f2) indextype is lucene.LuceneIndex
parameters('Stemmer:English;ExtraCols:F1 "f1"');
CREATE OR REPLACE TRIGGER L$IT1
BEFORE UPDATE OF f1 ON t1
FOR EACH ROW
BEGIN
:new.f2 := :new.f2;
END;
/
create table t2 (
f4 number primary key,
f5 VARCHAR2(200));
create table t1 (
f1 number,
f2 VARCHAR2(4000),
f3 number,
CONSTRAINT t1_t2_fk FOREIGN KEY (f3)
REFERENCES t2(f4) ON DELETE cascade);
create index it1 on t1(f3) indextype is lucene.LuceneIndex
parameters('ExtraCols:L$MT.f2 "f2",t2.f5 "f5";ExtraTabs:t2;WhereCondition:L$MT.f3=t2.f4');
select L$MT.f3,L$MT.f2 "f2",t2.f5 "f5" from t1 L$MT,t2 where L$MT.rowid=? and L$MT.f3=t2.f4;
CREATE OR REPLACE TRIGGER L$IT1
BEFORE UPDATE OF f2 ON t1
FOR EACH ROW
BEGIN
:new.f3 := :new.f3;
END;
/
CREATE OR REPLACE TRIGGER LT$IT1
AFTER UPDATE OF f5 ON t2
FOR EACH ROW
DECLARE
ridlist sys.ODCIRidList;
BEGIN
SELECT ROWID
BULK COLLECT INTO ridlist
FROM T1 WHERE F3=:NEW.f4;
LuceneDomainIndex.enqueueChange(USER||'.IT1',ridlist,'update');
END;
/
Lucene Domain Index can be customized with a parameter named UserDataStore, it defines which class is responsible for creating Lucene documents. Lucene documents are a list of Field one for each column indexed plus and extra field named rowid stored compressed and untokenized. By default UserDataStore is defined to org.apache.lucene.indexer.DefaultUserDataStore.
Default User Data Store supports left padding for NUMBER or FLOAT columns, and left character padding for VARCHAR2 or CHAR columns. To define padding, FormatCols parameter at create or alter index DDL command can be used. Here an example:
create table t1 (f1 number primary key, f2 varchar2(200), f3 number(4,2)) ORGANIZATION INDEX;
insert into t1 values (1, 'ravi', 3.46);
insert into t1 values (3, 'murthy', 15.87);
commit;
create index it1 on t1(f2) indextype is lucene.LuceneIndex
parameters('Stemmer:English;FormatCols:F2(zzzzzzzzzzzzzzz),F3(00.00);ExtraCols:F3');
Document<stored/compressed,indexed<rowid:*BAEAPBQCwQL+> indexed,tokenized<F2:zzzzzzzzzzzravi> indexed<F3:03.46>>
Document<stored/compressed,indexed<rowid:*BAEAPBQCwQT+> indexed,tokenized<F2:zzzzzzzzzmurthy> indexed<F3:15.87>>
create table t1 (f1 VARCHAR2(10), f2 XMLType);
insert into t1 values ('1', XMLType('<emp id="1"><name>ravi</name></emp>'));
insert into t1 values ('3', XMLType('<emp id="3"><name>murthy</name></emp>'));
commit;
create index it1 on t1(f1) indextype is lucene.LuceneIndex
parameters('ExtraCols:F2;FormatCols:F1(000),F2(/emp/name)');
Document<stored/compressed,indexed<rowid:AAATciAAEAAADwcAAA> indexed,tokenized<F1:001> indexed,tokenized<F2:ravi >>
Document<stored/compressed,indexed<rowid:AAATciAAEAAADwcAAB> indexed,tokenized<F1:003> indexed,tokenized<F2:murthy >>
create table emails (
emailFrom VARCHAR2(256),
emailTo VARCHAR2(256),
subject VARCHAR2(4000),
emailDate DATE,
bodyText CLOB)
/
create index emailbodyText on emails(bodyText) indextype is lucene.LuceneIndex
parameters('Analyzer:org.apache.lucene.analysis.StopAnalyzer;ExtraCols:emailDate "emailDate",subject "subject",emailFrom "emailFrom",emailTo "emailTo"');
-- required to Sort by subject
alter index emailbodyText parameters('LogLevel:INFO;FormatCols:subject(NOT_ANALYZED),emailFrom(NOT_ANALYZED),emailTo(NOT_ANALYZED)');
The translation rules to Lucene Fields is:
create index it1 on t1(f1) indextype is lucene.LuceneIndex
parameters('ExtraCols:F2,extractValue(F2,''/emp/@id'') "id";FormatCols:F1(000),F2(/emp/name),id(00)');
Document<stored/compressed,indexed<rowid:AAATciAAEAAADwcAAA> indexed,tokenized<F1:001> indexed,tokenized<F2:ravi > indexed,tokenized<id:01>>
Document<stored/compressed,indexed<rowid:AAATciAAEAAADwcAAB> indexed,tokenized<F1:003> indexed,tokenized<F2:murthy >
indexed,tokenized<id:03>>
create index pages_lidx_all on pages p (value(p))
indextype is Lucene.LuceneIndex
parameters('SyncMode:OnLine;LogLevel:WARNING;Stemmer:Spanish;ExtraCols:extractValue(object_value,''/page/title'') "title",extractValue(object_value,''/page/revision/comment'') "comment",extract(object_value,''/page/revision/text/text()'') "text",extractValue(object_value,''/page/revision/timestamp'') "revisionDate";IncludeMasterColumn:false;LobStorageParameters:PCTVERSION 0 ENABLE STORAGE IN ROW CHUNK 32768 CACHE READS FILESYSTEM_LIKE_LOGGING');
create index it1 on t1(f2) indextype is lucene.LuceneIndex
parameters('PopulateIndex:false;LogLevel:ALL;IncludeMasterColumn:false;ExtraCols:F1,extractValue(F2,''/emp/name/text()'') "name",extractValue(F2,''/emp/@id'') "id";FormatCols:F1(000),id(00)');
-- A this point it1 index is ready but not populated
select lscore(1),f2 from t1 where lcontains(f2, 'name:ravi',1) > 0;
-- Populate Index
alter index it1 rebuild parameters('Analyzer:org.apache.lucene.analysis.WhitespaceAnalyzer');
-- query, returns one row
select lscore(1),f2 from t1 where lcontains(f2, 'name:ravi',1) > 0;
alter index it1
parameters('MaxBufferedDocs:500;AutoTuneMemory:false');
alter index it1
parameters('MaxBufferedDocs:500;AutoTuneMemory:false;SyncMode:OnLine');
alter index it1 parameters('~SyncMode:OnLine');
alter index it1 rebuild
parameters('Analyzer:org.apache.lucene.analysis.StopAnalyzer;MaxBufferedDocs:500;AutoTuneMemory:false);
alter index it1 rebuild parameters('SyncMode:OnLine;MergeFactor:100;BatchCount:1000');
commit; -- notify change to AQ Callback
drop index it1;
SQL> select lscore(1),f2 from t1 where lcontains(f1, 'F2:ravi',1) > 0;
LSCORE(1)
----------
F2
--------------------------------------------------------------------------------------------------------------------------------------------
1
<emp id="1">
<name>ravi</name>
</emp>
SQL>
SQL> select lscore(1) from t1 where lcontains(f1,'001',1)>0;
LSCORE(1)
----------
1
SQL>
SQL> select f1,lscore(1) sc,extractValue(f2,'/emp/@id') id from t1
2 where lcontains(f1, '003 OR (F2:ravi AND id:01)',1)>0;
F1 SC ID
---------- ---------------- -----------------------------
1 .577350259 1
3 .288675129 3
select /*+ DOMAIN_INDEX_SORT */ ... where lcontains(col,'rownum:[20 TO 40] AND word1',1)>0 order by lscore(1) DESC;
SQL> SELECT /*+ DOMAIN_INDEX_SORT */ subject FROM emails
2 where lcontains(bodytext,'security','subject:ASC',1)>0;
SUBJECT
--------------------------------------------------------------------------------------------------------------------------------------------
Re: lucene injection
Re: lucene injection
Re: lucene injection
Re: lucene injection
lucene injection
Elapsed: 00:00:00.04
SQL> SELECT /*+ DOMAIN_INDEX_SORT */ subject FROM emails
2 where lcontains(bodytext,'security','subject:DESC',1)>0;
SUBJECT
--------------------------------------------------------------------------------------------------------------------------------------------
lucene injection
Re: lucene injection
Re: lucene injection
Re: lucene injection
Re: lucene injection
Elapsed: 00:00:00.17
SQL> SELECT /*+ DOMAIN_INDEX_SORT */ subject,emailFrom FROM emails
2 where lcontains(bodytext,'security','subject:ASC:string,emailFrom:DESC:string',1)>0;
SUBJECT EMAILFROM
----------------------------------------- -------------------------------------------------------------------------
Re: lucene injection lucenelist2005@danielnaber.de
Re: lucene injection lucenelist2005@danielnaber.de
Re: lucene injection erik@ehatchersolutions.com
Re: lucene injection codeshepherd@gmail.com
lucene injection codeshepherd@gmail.com
Elapsed: 00:00:00.06
SQL> SELECT /*+ DOMAIN_INDEX_SORT */ subject,emailFrom FROM emails
2 where lcontains(bodytext,'security','subject:ASC:string,emailFrom:ASC:string',1)>0;
SUBJECT EMAILFROM
------------------------------------------ ------------------------------------------------------------------------
Re: lucene injection codeshepherd@gmail.com
Re: lucene injection erik@ehatchersolutions.com
Re: lucene injection lucenelist2005@danielnaber.de
Re: lucene injection lucenelist2005@danielnaber.de
lucene injection codeshepherd@gmail.com
Elapsed: 00:00:00.05
SQL> SELECT /*+ DOMAIN_INDEX_SORT */ subject,emailFrom FROM emails
2 where lcontains(bodytext,'security',1)>0;
SUBJECT EMAILFROM
------------------------------------------- ----------------------------------------------------------------------
lucene injection codeshepherd@gmail.com
Re: lucene injection erik@ehatchersolutions.com
Re: lucene injection codeshepherd@gmail.com
Re: lucene injection lucenelist2005@danielnaber.de
Re: lucene injection lucenelist2005@danielnaber.de
Elapsed: 00:00:00.09
SQL> SELECT lscore(1),subject FROM emails
2 where lcontains(bodytext,'security',1)>0;
LSCORE(1) SUBJECT
-------------- ------------------------------------------------------------------------------------------------------
.241440386 lucene injection
.22763218 Re: lucene injection
.199178159 Re: lucene injection
.140840232 Re: lucene injection
.140840232 Re: lucene injection
Elapsed: 00:00:00.10
SQL> SELECT lscore(1),subject FROM emails
2 where lcontains(bodytext,'security',1)>0 order by lscore(1) asc;
LSCORE(1) SUBJECT
------------- -----------------------------------------------------------------------------------------------------
.140840232 Re: lucene injection
.140840232 Re: lucene injection
.199178159 Re: lucene injection
.22763218 Re: lucene injection
.241440386 lucene injection
Elapsed: 00:00:00.11
SQL> SELECT /*+ DOMAIN_INDEX_SORT */ lscore(1),subject FROM emails
2 where lcontains(bodytext,'security','subject:DESC',1)>0;
LSCORE(1) SUBJECT
------------- ----------------------------------------------------------------------------------------------------
.241440386 lucene injection
.22763218 Re: lucene injection
.199178159 Re: lucene injection
.140840232 Re: lucene injection
.140840232 Re: lucene injection
Elapsed: 00:00:00.07
SQL> SELECT /*+ DOMAIN_INDEX_SORT */ lscore(1),subject FROM emails
2 where lcontains(bodytext,'security','subject:ASC',1)>0;
LSCORE(1) SUBJECT
------------- -------------------------------------------------------------------------------------------------
.140840232 Re: lucene injection
.140840232 Re: lucene injection
.199178159 Re: lucene injection
.22763218 Re: lucene injection
.241440386 lucene injection
Elapsed: 00:00:00.07
SQL> SELECT /*+ DOMAIN_INDEX_SORT BAD */ lscore(1),subject FROM emails
2 where lcontains(bodytext,'security','subject:ASC',1)>0;
LSCORE(1) SUBJECT
------------- -------------------------------------------------------------------------------------------------
.140840232 Re: lucene injection
.140840232 Re: lucene injection
.199178159 Re: lucene injection
.22763218 Re: lucene injection
.241440386 lucene injection
Elapsed: 00:00:00.07
SQL> SELECT /*+ DOMAIN_INDEX_SORT GOOD */ subject FROM emails
2 where lcontains(bodytext,'security','subject:ASC')>0;
SUBJECT
-------------------------------------------------------------------------------------------------
Re: lucene injection
Re: lucene injection
Re: lucene injection
Re: lucene injection
lucene injection
Elapsed: 00:00:00.02
SQL> select LuceneDomainIndex.countHits('EMAILBODYTEXT','security') hits from dual;
HITS
----------
5
Elapsed: 00:00:00.02
SQL> select LuceneDomainIndex.countHits('EMAILBODYTEXT','security') hits from dual;
HITS
----------
5
Elapsed: 00:00:00.02
SQL> select emailFrom FROM emails
2 where lcontains(bodytext,'security',1)>0;
EMAILFROM
--------------------------------------------------------------------------------------------------------------------------------------------
codeshepherd@gmail.com
erik@ehatchersolutions.com
codeshepherd@gmail.com
lucenelist2005@danielnaber.de
lucenelist2005@danielnaber.de
Elapsed: 00:00:00.08
SQL> select LuceneDomainIndex.countHits('EMAILBODYTEXT','security') from dual;
LUCENEDOMAININDEX.COUNTHITS('EMAILBODYTEXT','SECURITY')
------------------------------------------------------------------------------
5
Elapsed: 00:00:00.02
SQL> select emailFrom FROM emails
2 where lcontains(bodytext,'security','emailFrom:ASC',1)>0;
EMAILFROM
--------------------------------------------------------------------------------------------------------------------------------------------
codeshepherd@gmail.com
codeshepherd@gmail.com
erik@ehatchersolutions.com
lucenelist2005@danielnaber.de
lucenelist2005@danielnaber.de
Elapsed: 00:00:00.04
SQL> select /*+ FIRST_ROWS DOMAIN_INDEX_SORT */ lhighlight(1),extractValue(object_value,'/page/title')
from pages where lcontains(object_value,'rownum:[1 TO 10] AND (musica tango rock)',1)>0;
SQL> select count(*) from pages where lcontains(object_value,'musica tango rock')>0;
SQL> SELECT /*+ DOMAIN_INDEX_SORT */ lhighlight(1) txt,lscore(1) sc,subject
2 FROM emails where lcontains(bodytext,'security OR mysql','subject:ASC',1)>0;
TXT SC SUBJECT
On Dec 21, 2006, at 4:56 AM, Deepan wrote:> I am bothered about <B>security</B> .27477634 Re: lucene injection
problems with lucene. Is it vulnerable to> any kind of injection like <B>mysql</
B> injection? many times the query from> user is passed to lucene for search wit
hout validating.Rest easy. There are no known <B>security</B> issues with Lucen
e, and ithas even undergone a recent static code analysis by Fortify (see theluc
ene-dev e-mail list
create index pages_lidx_all on pages p (value(p))
indextype is Lucene.LuceneIndex
parameters('PopulateIndex:false;DefaultColumn:text;SyncMode:Deferred;LogLevel:INFO;
Analyzer:org.apache.lucene.analysis.SpanishWikipediaAnalyzer;
ExtraCols:extractValue(object_value,''/page/title'') "title",
extractValue(object_value,''/page/revision/comment'') "comment",
extract(object_value,''/page/revision/text/text()'') "text",
extractValue(object_value,''/page/revision/timestamp'') "revisionDate";
FormatCols:revisionDate(day);IncludeMasterColumn:false;
LobStorageParameters:PCTVERSION 0 ENABLE STORAGE IN ROW CHUNK 32768 CACHE READS FILESYSTEM_LIKE_LOGGING');
SQL> select /*+ DOMAIN_INDEX_SORT */ lhighlight(1),extractValue(object_value,'/page/title') from pages where lcontains(object_value,'rownum:[1 TO 10] AND (musica tango rock)',1)>0;
<page xmlns="http://www.mediawiki.org/xml/export-0.3/">
<title><B>Música</B> de Argentina... [[Latinoamérica|latinoamericanos]] con más desarrollo en su [[<B>música</B>]].
Se encuentra una gran... argentinos, un instrumento tradicional andino]]
Aún se mantiene la <B>música</B> de los [[IndÃgenas_en_Argentina... de grandes corrientes de [[inmigración|inmigrantes]] europeos, la <B>música</B> argentina se enriqueció
Música de Argentina
musical emparentado con la [[habanera]] y el [[<B>tango</B> (<B>música</B>)|<B>tango</B>]].
==Diferencias con el <B>tango</B>==
Aunque tanto la milonga como el <B>tango</B> están en [[compás]] de 2/4, las 8 [[semicorchea]]s de la milonga están distribuidas en 3 + 3 + 2 en cambio el <B>tango</B> posee un ritmo más «cuadrado». Las letras...]] criticó en algún momento el <B>tango</B> y prefirió la milonga, que no trasmite la melancolÃa
Milonga (género musical)
create or replace and compile java source named "org.apache.lucene.search.highlight.MyHTMLFormatter" as
package org.apache.lucene.search.highlight;
public class MyHTMLFormatter extends SimpleHTMLFormatter {
public MyHTMLFormatter() {
super("<span class=\"myhighlightclass\">","</span>");
}
}
/
show errors
alter index emailbodyText
parameters('Formatter:org.apache.lucene.search.highlight.MyHTMLFormatter;MaxNumFragmentsRequired:3;FragmentSeparator:...;FragmentSize:50');
SELECT * FROM
TABLE(phighlight(
'EMAILBODYTEXT',
'lucene OR mysql',
'SUBJECT,BODYTEXT',
'select /*+ DOMAIN_INDEX_SORT FIRST_ROW */ lscore(1) sc,e.*
from eMails e where lcontains(bodytext,''security OR mysql'',''subject:ASC'',1)>0'
));
SELECT * FROM
TABLE(rhighlight(
'EMAILBODYTEXT',
'lucene OR mysql',
'SUBJECT,BODYTEXT',
'EMAILRSET',
CURSOR(select /*+ DOMAIN_INDEX_SORT FIRST_ROW */ lscore(1) sc,e.*
from eMails e where lcontains(bodytext,'security OR mysql','subject:ASC',1)>0)
));
CREATE TYPE EMAILR AS OBJECT
(
sc NUMBER,
emailFrom VARCHAR2(256),
emailTo VARCHAR2(256),
subject VARCHAR2(4000),
emailDate DATE,
bodyText CLOB
);
CREATE OR REPLACE TYPE EMAILRSET AS TABLE OF EMAILR;
select rowid,lscore(1),text from test_source_big where lcontains(text,'"procedure java"~10',1)>0 order by lscore(1) desc;
AAAOaPAAEAAAAnnABV 1.00000003 procedure (C, Java or PL/SQL), optionally qualified
AAAOaPAAEAAAA0aAAV .84852819 STATIC PROCEDURE refreshParameterCache as LANGUAGE JAVA NAME
.........
declare
ridlist sys.odciridlist;
begin
ridlist := MoreLike.this(index_name=>'SOURCE_BIG_LIDX',x=>'AAAOaPAAEAAAAnnABV',minTermFreq=>1);
FOR i IN (select rowid,text from test_source_big where rowid in (select * from table(ridlist_table(ridlist)))) LOOP
dbms_output.put_line('rowid: '||i.rowid||' text: '||i.text);
END LOOP;
end;
/
rowid: AAAOaPAAEAAAAhLAAc text: -- after issuing insert, update, delete or anonymous PL/SQL calls
rowid: AAAOaPAAEAAAAjrAAo text: -- QUALIFIED_SQL_NAME
rowid: AAAOaPAAEAAAAk5AAe text: -- ORA-06502: PL/SQL: numeric or value error: character string buffer
.....
rowid: AAAOaPAAEAAAAtXAAb text: -- The name of the Java class, PL/SQL package or object type implementing
SQL> select lfacets('SOURCE_BIG_LIDX,TEXT:procedure,TEXT:java') from dual;
create table source_categories (
cat_code number(4),
cat_name varchar2(256),
cat_parent number(4),
CONSTRAINT PK_SOURCE_CATEGORIES PRIMARY KEY (cat_code),
CONSTRAINT FK_CAT_PARENT FOREIGN KEY (cat_parent)
REFERENCES source_categories (cat_code)
);
insert into source_categories values (1,'TEXT:procedure',null);
insert into source_categories values (2,'TEXT:function',null);
...
insert into source_categories values (6,'TEXT:java',1);
insert into source_categories values (7,'TEXT:(pl sql)',1);
insert into source_categories values (8,'TEXT:wrapped',1);
...
insert into source_categories values (21,'line:[1 TO 1000]',1);
insert into source_categories values (22,'line:[1001 TO 2000]',1);
insert into source_categories values (23,'line:[2001 TO 3000]',1);
SQL> select ljoin(lfacets('SOURCE_BIG_LIDX,'||
case level when 1 then cat_name
ELSE PRIOR cat_name||','|| cat_name
END
)), cat_code,level
FROM source_categories
start with cat_parent is null
CONNECT BY PRIOR cat_code = cat_parent
group by cat_code,level;
LJOIN(LFACETS('SOURCE_BIG_LIDX,'||CASELEVELWHEN1THENCAT_NAMEELSEPRIORCAT_NAME||','||CAT_NAMEEND)) CAT_CODE LEVEL
TEXT:procedure(5116) 1 1
TEXT:function(5574) 2 1
TEXT:trigger(96) 3 1
TEXT:package(860) 4 1
TEXT:(object type)(5140) 5 1
TEXT:procedure,TEXT:java(9) 6 2
.....
TEXT:procedure,line:[1 TO 1000](3) 21 2
TEXT:procedure,line:[1001 TO 2000](615) 22 2
...
SQL> select ljoin(lfacets('SOURCE_BIG_LIDX,'||
case level when 1 then cat_name
ELSE PRIOR cat_name||','|| cat_name
END
)), cat_parent
FROM source_categories
start with cat_parent is null
CONNECT BY PRIOR cat_code = cat_parent
group by cat_parent;
LJOIN(LFACETS('SOURCE_BIG_LIDX,'||CASELEVELWHEN1THENCAT_NAMEELSEPRIORCAT_NAME||','||CAT_NAMEEND)) CAT_PARENT
----------------------------------------------------------------------------------------------------------------------------------------- ---------------
TEXT:procedure,TEXT:java(11),TEXT:procedure,TEXT:(pl sql)(70),TEXT:procedure,line:[1 TO 1000](3),TEXT:procedure,TEXT:wrapped(21),TEXT:proced
ure,line:[1001 TO 2000](675),TEXT:procedure,line:[3001 TO 4000](105),TEXT:procedure,line:[4001 TO 5000](10),TEXT:procedure,line:[2001 TO 300
0](199) 1
TEXT:function,TEXT:java(22),TEXT:function,TEXT:wrapped(85),TEXT:function,line:[1 TO 1000](0),TEXT:function,TEXT:(pl sql)(87),TEXT:function,l
ine:[1001 TO 2000](835),TEXT:function,line:[3001 TO 4000](21),TEXT:function,line:[4001 TO 5000](0),TEXT:function,line:[2001 TO 3000](338) 2
TEXT:trigger,TEXT:java(1),TEXT:trigger,line:[1 TO 1000](0),TEXT:trigger,TEXT:wrapped(0),TEXT:trigger,TEXT:(pl sql)(1),TEXT:trigger,line:[100
1 TO 2000](33),TEXT:trigger,line:[3001 TO 4000](0),TEXT:trigger,line:[4001 TO 5000](0),TEXT:trigger,line:[2001 TO 3000](0) 3
TEXT:package,TEXT:java(7),TEXT:package,line:[1 TO 1000](0),TEXT:package,TEXT:(pl sql)(25),TEXT:package,TEXT:wrapped(137),TEXT:package,line:[
1001 TO 2000](54),TEXT:package,line:[3001 TO 4000](5),TEXT:package,line:[4001 TO 5000](0),TEXT:package,line:[2001 TO 3000](5) 4
TEXT:(object type),TEXT:java(56),TEXT:(object type),TEXT:(pl sql)(106),TEXT:(object type),line:[1 TO 1000](1),TEXT:(object type),TEXT:wrappe
d(76),TEXT:(object type),line:[1001 TO 2000](441),TEXT:(object type),line:[4001 TO 5000](0),TEXT:(object type),line:[3001 TO 4000](28),TEXT:
(object type),line:[2001 TO 3000](119) 5
TEXT:procedure(5574),TEXT:(object type)(5584),TEXT:package(868),TEXT:trigger(114),TEXT:function(6167)
6 rows selected.
CREATE MATERIALIZED VIEW source_facets
AS
select ljoin(lfacets('SOURCE_BIG_LIDX,'||
case level when 1 then cat_name
ELSE PRIOR cat_name||','|| cat_name
END
)), cat_code,level
FROM source_categories
start with cat_parent is null
CONNECT BY PRIOR cat_code = cat_parent
group by cat_code,level;
select * from table(high_freq_terms('SOURCE_BIG_LIDX','TEXT',10)) order by docFreq desc;
select * from table(high_freq_terms('SOURCE_BIG_LIDX',null,10));
select * from table(high_freq_terms('SOURCE_BIG_LIDX','line',100));
begin
LuceneDomainIndex.sync(USER||'.IT1');
commit; -- release locks
end;
begin
LuceneDomainIndex.optimize(USER||'.IT1');
commit; -- release locks
end;
begin
LuceneDomainIndex.xdbExport('IT1');
commit; -- makes change visible to Ftp or WebDAV
end;
SQL> create table SOURCE_BIG_LIDX$T$BK as (select * from SOURCE_BIG_LIDX$T);
... you can export now using exp tool SOURCE_BIG_LIDX$T$BK because is regular table ...
-bash-3.2$ exp
Export: Release 10.2.0.3.0 - Production on Fri Mar 27 02:46:18 2009
Copyright (c) 1982, 2005, Oracle. All rights reserved.
Username: scott/tiger
Connected to: Oracle Database 10g Release 10.2.0.3.0 - Production
Enter array fetch buffer size: 4096 >
Export file: expdat.dmp > SOURCE_BIG_LIDX_BK.dmp
(2)U(sers), or (3)T(ables): (2)U > 3
Export table data (yes/no): yes > yes
Compress extents (yes/no): yes >
Export done in US7ASCII character set and AL16UTF16 NCHAR character set
server uses AL32UTF8 character set (possible charset conversion)
About to export specified tables via Conventional Path ...
Table(T) or Partition(T:P) to be exported: (RETURN to quit) > SOURCE_BIG_LIDX$T$BK
. . exporting table SOURCE_BIG_LIDX$T$BK 19 rows exported
Table(T) or Partition(T:P) to be exported: (RETURN to quit) >
Export terminated successfully without warnings.
.... Now you can drop your index and re-create again without populating it ....
SQL> select count(*) from test_source_big where lcontains(text,'function')>0;
COUNT(*)
----------
6167
SQL> drop index SOURCE_BIG_LIDX;
Index dropped.
SQL> create index source_big_lidx on test_source_big(text)
2 indextype is lucene.LuceneIndex
3 parameters('PopulateIndex:false;AutoTuneMemory:true;Analyzer:org.apache.lucene.analysis.SimpleAnalyzer;MergeFactor:500;FormatCols:line(0000);ExtraCols:line "line"');
Index created.
SQL> drop table SOURCE_BIG_LIDX$T$BK;
Table dropped.
.... Restore your .dmp now and check again if your index returns a correct result ....
-bash-3.2$ imp scott/tiger
Import: Release 10.2.0.3.0 - Production on Fri Mar 27 02:49:40 2009
Copyright (c) 1982, 2005, Oracle. All rights reserved.
Connected to: Oracle Database 10g Release 10.2.0.3.0 - Production
Import file: expdat.dmp > SOURCE_BIG_LIDX_BK.dmp
Enter insert buffer size (minimum is 8192) 30720>
Export file created by EXPORT:V10.02.01 via conventional path
import done in US7ASCII character set and AL16UTF16 NCHAR character set
import server uses AL32UTF8 character set (possible charset conversion)
List contents of import file only (yes/no): no >
Ignore create error due to object existence (yes/no): no >
Import grants (yes/no): yes >
Import table data (yes/no): yes >
Import entire export file (yes/no): no > yes
. importing SCOTT's objects into SCOTT
. importing SCOTT's objects into SCOTT
. . importing table "SOURCE_BIG_LIDX$T$BK" 19 rows imported
Import terminated successfully without warnings.
.... Check first that your index do not have information and populate them with Lucene Index information ....
SQL> conn scott/tiger
Connected.
SQL> select count(*) from test_source_big where lcontains(text,'function')>0;
COUNT(*)
----------
0
SQL> truncate table SOURCE_BIG_LIDX$T;
Table truncated.
SQL> insert into SOURCE_BIG_LIDX$T (select * from SOURCE_BIG_LIDX$T$BK);
19 rows created.
SQL> exit
..... and connect again to refresh Lucene Domain Index in memory structures ....
SQL> conn scott/tiger
Connected.
SQL> select count(*) from test_source_big where lcontains(text,'function')>0;
COUNT(*)
----------
6167
|
Operation |
Base Table (row/table) |
Index Table (SCHEMA.IDX$T) |
Queue Table (SCHEMA.IDX$QT) |
|
Insert |
X/RX (1) |
NONE | NONE |
|
Update |
X/RX |
NONE | NONE |
|
Delete | X/RX |
X/RS(updateCount)|X/RX (3) | NONE |
|
Manually Sync |
X/RS (2) |
X/RS(updateCount)|X/RX |
DBMS_AQ.BLOCKED (4) |
|
Automatically Sync | X/RS | X/RS(updateCount)|X/RX | DBMS_AQ.BLOCKED |
|
Optimize |
NONE | X/RS(updateCount)|X/RX | NONE |
create index source_small_lidx on test_source_small(text)
indextype is lucene.LuceneIndex
parameters('FormatCols:line(0000);ExtraCols:line "line";Analyzer:org.apache.lucene.analysis.StopAnalyzer;MergeFactor:100');
alter index source_small_lidx parameters('MergeFactor:2');
alter table source_small_lidx$t storage (buffer_pool keep) modify lob (data) (storage (buffer_pool keep));
SQL> select sum(file_size) from source_small_lidx$t where deleted='N';
SUM(FILE_SIZE)
--------------
147444
SQL> explain plan for
2 SELECT subject FROM emails where lcontains(bodytext,'security',1)>0
3 order by subject ASC;
Explained.
Elapsed: 00:00:00.58
SQL> set echo off
PLAN_TABLE_OUTPUT
-----------------------------------------------------------------------------------------------------
Plan hash value: 1542204867
Predicate Information (identified by operation id):
Id
Operation
Name
Rows
Bytes
Cost (%CPU) Time
0
SELECT STATEMENT
1
4016 3 (34) 00:00:01 1
SORT ORDER BY
1
4016 3 (34) 00:00:01 2
TABLE ACCESS BY INDEX ROWID EMAILS 1
4016 2 (0) 00:00:01 * 3
DOMAIN INDEX EMAILBODYTEXT
---------------------------------------------------
3 - access("LUCENE"."LCONTAINS"("BODYTEXT",'security',1)>0)
SQL> explain plan for
2 SELECT /*+ DOMAIN_INDEX_SORT */ subject FROM emails
3 where lcontains(bodytext,'security','subject:ASC',1)>0;
Explained.
Elapsed: 00:00:00.01
SQL> set echo off
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------------------------------------
Plan hash value: 1450245214
Predicate Information (identified by operation id):
Id
Operation
Name
Rows
Bytes
Cost (%CPU) Time
0
SELECT STATEMENT
1
4016 2 (0) 00:00:01 1
TABLE ACCESS BY INDEX ROWID EMAILS 1
4016 2 (0) 00:00:01 * 2
DOMAIN INDEX EMAILBODYTEXT
---------------------------------------------------
2 - access("LUCENE"."LCONTAINS"("BODYTEXT",'security','subject:ASC',1)>0)
dbms_java.grant_permission( 'USER_NAME', 'SYS:java.util.logging.LoggingPermission', 'control', '' )
create index it1 on t1(f2) indextype is lucene.LuceneIndexYou can use ExtraCols with f3 or F3 but FormatCols should be F3 because f3 is returned by the SQL select operation as F3 during the table full scan, also Lucene Index will have a document with a Field F3 instead of f3. If you want to use f3 as is you can re-write DDL index creation with:
parameters('Stemmer:English;FormatCols:F2(zzzzzzzzzzzzzzz),F3(00.00);ExtraCols:F3');
create index it1 on t1(f2) indextype is lucene.LuceneIndexWith this sentence Lucene will create documents with two field F2 and f3, F2 is uppercase because is the master column of the index and his passed as "F2" by ODCI API but, due is the default Field of the query, you can omit his name at lcontains syntax, F3 now is lowercase and will be indexed as a Field "f3".
parameters('Stemmer:English;FormatCols:F2(zzzzzzzzzzzzzzz),f3(00.00);ExtraCols:F3 "f3"');
SQL> drop index source_big_lidx force;Note that "drop index ... force" will de-register Lucene Domain Index from Oracle's system views, then Lucene Domain Index storage's table is manually dropped, finally connected as SYS Lucene Domain Index AQ's table is dropped.
Index dropped.
SQL> select table_name from tabs;
TABLE_NAME
------------------------------
DEPT
EMP
BONUS
SALGRADE
SOURCE_BIG_LIDX$QT
DR$SOURCE_BIG_IDX$I
DR$SOURCE_BIG_IDX$R
SOURCE_BIG_LIDX$T
TEST_SOURCE_BIG
DR$SOURCE_BIG_IDX$N
DR$SOURCE_BIG_IDX$K
11 rows selected.
SQL> drop table SOURCE_BIG_LIDX$T;
Table dropped.
SQL> conn / as sysdba
connected.
SQL>exec DBMS_AQADM.DROP_QUEUE ('SCOTT.SOURCE_BIG_LIDX$Q')
BEGIN DBMS_AQADM.DROP_QUEUE ('SCOTT.SOURCE_BIG_LIDX$Q'); END;
*
ERROR at line 1:
ORA-01403: no data found
ORA-06512: at "SYS.DBMS_AQADM_SYS", line 3359
ORA-06512: at "SYS.DBMS_AQADM", line 167
ORA-06512: at line 1
SQL> exec DBMS_AQADM.DROP_QUEUE_TABLE(queue_table => 'SCOTT.SOURCE_BIG_LIDX$QT', force=>true);
PL/SQL procedure successfully completed.
SQL> exit
An Analyzer builds TokenStreams, which analyze text. It thus represents a policy for extracting index terms from text.
Typical implementations first build a Tokenizer, which breaks the stream of characters from the Reader into raw Tokens. One or more TokenFilters may then be applied to the output of the Tokenizer.
Analyzer, PerFieldAnalyzer or Stemmer parameter affects indexing and query expressions, so if you want to change this parameter on a exists index you to must rebuild it, the priority of these three parameters is first check for the Stemmer if its not present check for PerFieldAnalyzer if its not present checks for Analyzer parameter, finally if none of them are defined will use SimpleAnalyzer.This parameter is fully qualified Java class name which extends org.apache.lucene.analysis.Analyzer. For example:
create table t1 (f1 VARCHAR2(10), f2 XMLType);
insert into t1 values ('1', XMLType('<emp id="1"><name>ravi</name></emp>'));
insert into t1 values ('3', XMLType('<emp id="3"><name>murthy</name></emp>'));
create index it1 on t1(f2) indextype is lucene.LuceneIndex
parameters('IncludeMasterColumn:false;
ExtraCols:F1,extractValue(F2,''/emp/name/text()'') "name",extractValue(F2,''/emp/@id'') "id";
FormatCols:F1(000),id(00)');
alter index it1 rebuild parameters('PerFieldAnalyzer:F1(org.apache.lucene.analysis.KeywordAnalyzer),id(org.apache.lucene.analysis.KeywordAnalyzer)');
Full table scan (create index statement):
SELECT rowid,MasterTable.MasterColumn,ExtraCols FROM MasterTable,ExtraTabs where WhereCondition;
Find a particular rowid (insert,update operations):
SELECT MasterTable.MasterColumn,ExtraCols FROM MasterTable,ExtraTabs where MasterTable.rowid=:rowid AND WhereCondition;
Text in italic are injected by Lucene Domain Index and text in bold are user defined.
Note: Up to Lucene Domain Index 2.9.0, if you use a WhereCondition which have an OR operator put this where condition enclosed with () because the precedence of the OR over the AND operator makes that some queries returns more rows that the correct behavior, for example instead of:WhereCondition:T1.F1='AA' OR T1.F1='BB'put:WhereCondition:(T1.F1='AA' OR T1.F1='BB')this workaround fix some problems when working in OnLine mode. Starting with 2.9.1 version this extra () are not required.
# Limit the message that are printed on the console to INFO and above.
java.util.logging.ConsoleHandler.level = ALL
java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter
create index pages_lidx_all on pages p (value(p))
indextype is Lucene.LuceneIndex
parameters('PopulateIndex:false;DefaultColumn:text;SyncMode:Deferred;LogLevel:WARNING;Analyzer:org.apache.lucene.analysis.SpanishWikipediaAnalyzer;ExtraCols:extractValue(object_value,''/page/title'') "title",extractValue(object_value,''/page/revision/comment'') "comment",extract(object_value,''/page/revision/text/text()'') "text",extractValue(object_value,''/page/revision/timestamp'') "revisionDate";FormatCols:revisionDate(day);IncludeMasterColumn:false;LobStorageParameters:PCTVERSION 0 ENABLE STORAGE IN ROW CHUNK 32768 CACHE READS FILESYSTEM_LIKE_LOGGING');
Note the correlation between DefaultColumn and ExtraCols. ExtraCols defines a Lucene Field named "text" with a value calculated by the SQL expression extract(object_value,''/page/revision/text/text()''), then you can use a Lucene Field text as default Field in QueryParser syntax.
|
Name |
Null? |
Type |
|
NAME |
NOT NULL |
VARCHAR2(30) |
|
LAST_MODIFIED |
|
TIMESTAMP(6) |
|
FILE_SIZE |
|
NUMBER(38) |
|
DATA |
|
BLOB |
|
DELETED |
|
CHAR(1) |
|
Name |
Null? |
Type |
|
RIDLIST |
|
SYS.ODCIRIDLIST |
|
OPERATION |
|
VARCHAR2(32) |
create table T1 (
f1 number primary key,
f2 varchar2(200),
f3 varchar2(200),
f4 number)
create index IT1 on T1(f2) indextype is lucene.LuceneIndex
parameters('LogLevel:WARNING;Analyzer:org.apache.lucene.analysis.StopAnalyzer;MergeFactor:500;ExtraCols:F1;FormatCols:F1(0000)')
[junit] Testsuite: org.apache.lucene.index.TestDBIndex
[junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 3.836 sec
[junit]
[junit] ------------- Standard Output ---------------
[junit] Table created: T1
[junit] Index created: IT1
[junit] Index altered: IT1
[junit] Inserted rows: 40 total char inserted: 415 avg text length: 10
[junit] Index synced: IT1 elapsed time: 265 ms.
[junit] Avg Sync time: 6
[junit] Index optimized: IT1 elapsed time: 40 ms.
[junit] Avg Optimize time: 1
[junit] Row deleted 40, from: 10 to: 49 elapsed time: 1303 ms. Avg time: 32 ms.
[junit] Index droped: IT1
[junit] Table droped: T1
[junit] Testsuite: org.apache.lucene.index.TestDBIndexAddDoc
[junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 64.696 sec
[junit]
[junit] ------------- Standard Output ---------------
[junit] Table created: T1
[junit] Index created: IT1
[junit] Index altered: IT1
[junit] Index synced: IT1 elapsed time: 126 ms.
[junit] Inserted rows: 10 total char inserted: 49 avg text length: 4
[junit] Index synced: IT1 elapsed time: 142 ms.
[junit] Avg Sync time: 14
[junit] Inserted rows: 90 total char inserted: 988 avg text length: 10
[junit] Index synced: IT1 elapsed time: 374 ms.
[junit] Avg Sync time: 4
[junit] Inserted rows: 400 total char inserted: 9201 avg text length: 23
[junit] Index synced: IT1 elapsed time: 1276 ms.
[junit] Avg Sync time: 3
[junit] Inserted rows: 500 total char inserted: 11726 avg text length: 23
[junit] Index synced: IT1 elapsed time: 1601 ms.
[junit] Avg Sync time: 3
[junit] Inserted rows: 1000 total char inserted: 35950 avg text length: 35
[junit] Index synced: IT1 elapsed time: 4675 ms.
[junit] Avg Sync time: 4
[junit] Inserted rows: 3000 total char inserted: 110851 avg text length: 36
[junit] Index synced: IT1 elapsed time: 25480 ms.
[junit] Avg Sync time: 8
[junit] Index droped: IT1
[junit] Table droped: T1
[junit] Testsuite: org.apache.lucene.index.TestDBIndexDelDoc
[junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 20.543 sec
[junit]
[junit] ------------- Standard Output ---------------
[junit] Table created: T1
[junit] Index created: IT1
[junit] Index altered: IT1
[junit] Inserted rows: 500 total char inserted: 10238 avg text length: 20
[junit] Index synced: IT1 elapsed time: 1643 ms.
[junit] Row deleted 10, from: 1 to: 10 elapsed time: 356 ms. Avg time: 35 ms.
[junit] Row deleted 90, from: 11 to: 100 elapsed time: 2535 ms. Avg time: 28 ms.
[junit] Row deleted 400, from: 101 to: 500 elapsed time: 11526 ms. Avg time: 28 ms.
[junit] Index droped: IT1
[junit] Table droped: T1
[junit] Testsuite: org.apache.lucene.index.TestDBIndexParallel
[junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 97.7 sec
[junit]
[junit] ------------- Standard Output ---------------
[junit] Table created: T1
[junit] Index created: IT1
[junit] Index altered: IT1
[junit] FastSync: true
[junit] Deleter 1 deleting at block 70
[junit] Updater 1 updating at block 70
[junit] Inserter 2 inserting at block 90
[junit] No Row deleted at: 70 to: 79 elapsed time: 131 ms.
[junit] No Row updated at: 70 to: 79 elapsed time: 12 ms.
[junit] Searcher 2 searching row 30
[junit] Searcher 1 searching row 77
[junit] Not Found rows with: thirty elapsed time: 211 ms.
[junit] Not Found rows with: seventy-seven elapsed time: 170 ms.
[junit] Inserted rows: 10 total char inserted: 115 avg text length: 11
[junit] Searcher 2 searching row 62
[junit] Searcher 0 searching row 63
[junit] Searcher 1 searching row 49
[junit] Not Found rows with: sixty-two elapsed time: 64 ms.
[junit] Index synced: IT1 elapsed time: 283 ms.
[junit] Not Found rows with: sixty-three elapsed time: 215 ms.
[junit] Searcher 2 searching row 74
[junit] Not Found rows with: seventy-four elapsed time: 39 ms.
[junit] Not Found rows with: forty-nine elapsed time: 137 ms.
[junit] Searcher 1 searching row 95
[junit] Searcher 2 searching row 46
[junit] Found rows with: ninety-five elapsed time: 103 ms.
....
[junit] Updater 2 updating at block 20
[junit] No Row updated at: 20 to: 29 elapsed time: 3 ms.
[junit] Inserted rows: 10 total char inserted: 80 avg text length: 8
[junit] Searcher 0 searching row 97
[junit] Found rows with: ninety-seven elapsed time: 60 ms.
[junit] Index synced: IT1 elapsed time: 147 ms.
.....
[junit] Searcher 2 searching row 39
[junit] Searcher 1 searching row 84
[junit] Not Found rows with: thirty-nine elapsed time: 33 ms.
[junit] Not Found rows with: eighty-four elapsed time: 38 ms.
[junit] Updater 0 updating at block 90
[junit] Row updated 10, from: 90 to: 99 elapsed time: 16 ms. Avg time: 1 ms.
[junit] Index synced: IT1 elapsed time: 162 ms.
......
[junit] Inserted rows: 10 total char inserted: 125 avg text length: 12
[junit] Searcher 0 searching row 57
[junit] Searcher 1 searching row 28
[junit] Deleter 1 deleting at block 80
[junit] Searcher 2 searching row 64
[junit] No Row deleted at: 80 to: 89 elapsed time: 58 ms.
[junit] Not Found rows with: twenty-eight elapsed time: 112 ms.
[junit] Not Found rows with: fifty-seven elapsed time: 155 ms.
[junit] Index synced: IT1 elapsed time: 242 ms.
[junit] Searcher 0 searching row 98
[junit] Found rows with: ninety-eight elapsed time: 72 ms.
[junit] Not Found rows with: sixty-four elapsed time: 175 ms.
[junit] Searcher 0 searching row 27
[junit] Not Found rows with: twenty-seven elapsed time: 75 ms.
[junit] Searcher 1 searching row 5
[junit] Deleter 2 deleting at block 50
[junit] Searcher 2 searching row 84
[junit] Not Found rows with: eighty-four elapsed time: 20 ms.
[junit] Updater 2 updating at block 10
[junit] No Row deleted at: 50 to: 59 elapsed time: 28 ms.
[junit] Row updated 10, from: 10 to: 19 elapsed time: 36 ms. Avg time: 3 ms.
[junit] Found rows with: five elapsed time: 216 ms.
.................
[junit] Inserter 1 inserting at block 50
[junit] Found rows at: 50 position, ignoring insertions
[junit] Index droped: IT1
[junit] Table droped: T1
This test check some special features of lcontains operator such as in-line pagination, sort by and filter by expressions.
First create a table with 200 rows and then query them, his output look like:
[junit] Testsuite: org.apache.lucene.index.TestDBIndexSearchDoc
[junit] Tests run: 5, Failures: 0, Errors: 0, Time elapsed: 14.001 sec
[junit]
[junit] ------------- Standard Output ---------------
[junit] Table created: T1
[junit] Index created: IT1
[junit] Index altered: IT1
[junit] Inserted rows: 200 total char inserted: 3262 avg text length: 16
[junit] Index synced: IT1 elapsed time: 746 ms.
[junit] testFilterAll()
[junit] Excecution time: 129 ms.
[junit] 120 Score: 0.9606395 str: one hundred twenty
[junit] 119 Score: 0.25453204 str: one hundred nineteen
[junit] 118 Score: 0.25453204 str: one hundred eighteen
[junit] 117 Score: 0.25453204 str: one hundred seventeen
[junit] 116 Score: 0.25453204 str: one hundred sixteen
[junit] 115 Score: 0.25453204 str: one hundred fifteen
[junit] 114 Score: 0.25453204 str: one hundred fourteen
[junit] 113 Score: 0.25453204 str: one hundred thirteen
[junit] 112 Score: 0.25453204 str: one hundred twelve
[junit] 111 Score: 0.25453204 str: one hundred eleven
[junit] Index droped: IT1
[junit] Table droped: T1
[junit] Table created: T1
[junit] Index created: IT1
[junit] Index altered: IT1
[junit] Inserted rows: 200 total char inserted: 3262 avg text length: 16
[junit] Index synced: IT1 elapsed time: 721 ms.
[junit] testFilterBy()
[junit] Excecution time: 162 ms.
[junit] 103 Score: 1.0 str: one hundred three
[junit] 120 Score: 0.9606395 str: one hundred twenty
[junit] 101 Score: 0.28600293 str: one hundred one
[junit] 100 Score: 0.27352643 str: one hundred
....
[junit] 115 Score: 0.25453204 str: one hundred fifteen
[junit] 116 Score: 0.25453204 str: one hundred sixteen
[junit] Index droped: IT1
[junit] Table droped: T1
[junit] Table created: T1
[junit] Index created: IT1
[junit] Index altered: IT1
[junit] Inserted rows: 200 total char inserted: 3262 avg text length: 16
[junit] Index synced: IT1 elapsed time: 751 ms.
[junit] testFilterByOrderBy()
[junit] Excecution time: 138 ms.
[junit] 120 Score: 0.9606395 str: one hundred twenty
[junit] 119 Score: 0.25453204 str: one hundred nineteen
....
[junit] 103 Score: 1.0 str: one hundred three
[junit] 102 Score: 0.25453204 str: one hundred two
[junit] 101 Score: 0.28600293 str: one hundred one
[junit] 100 Score: 0.27352643 str: one hundred
[junit] Index droped: IT1
[junit] Table droped: T1
[junit] Table created: T1
[junit] Index created: IT1
[junit] Index altered: IT1
[junit] Inserted rows: 200 total char inserted: 3262 avg text length: 16
[junit] Index synced: IT1 elapsed time: 761 ms.
[junit] testPagination()
[junit] Excecution time: 193 ms.
[junit] 117 Score: 0.03489425 str: one hundred seventeen
[junit] 118 Score: 0.03489425 str: one hundred eighteen
....
[junit] 132 Score: 0.03489425 str: one hundred thirty-two
[junit] 134 Score: 0.03489425 str: one hundred thirty-four
[junit] Index droped: IT1
[junit] Table droped: T1
[junit] Table created: T1
[junit] Index created: IT1
[junit] Index altered: IT1
[junit] Inserted rows: 200 total char inserted: 3262 avg text length: 16
[junit] Index synced: IT1 elapsed time: 743 ms.
[junit] testCountHits()
[junit] Excecution time: 53 ms.
[junit] Hits: 126
[junit] Index droped: IT1
[junit] Table droped: T1
create table test_source_big as (select * from all_source);
create index source_big_lidx on test_source_big(text)
indextype is lucene.LuceneIndex
parameters('AutoTuneMemory:true;MergeFactor:500;FormatCols:line(0000);ExtraCols:line "line"');
create index source_big_lidx on test_source_big(text)
indextype is lucene.LuceneIndex
parameters('FormatCols:line(0000);ExtraCols:line "line";Analyzer:org.apache.lucene.analysis.StopAnalyzer;MergeFactor:500;LobStorageParameters:PCTVERSION 0 ENABLE STORAGE IN ROW CHUNK 32768 CACHE READS FILESYSTEM_LIKE_LOGGING');
[junit] Testsuite: org.apache.lucene.indexer.TestQueryHits
[junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 2.656 sec
[junit]
[junit] ------------- Standard Output ---------------
[junit] iteration from: 13775 to: 13785
[junit] Step time: 791 ms.
[junit] iteration from: 13785 to: 13795
[junit] Step time: 49 ms.
[junit] iteration from: 13795 to: 13805
[junit] Step time: 40 ms.
[junit] iteration from: 13805 to: 13815
[junit] Step time: 44 ms.
[junit] iteration from: 13815 to: 13825
[junit] Step time: 40 ms.
[junit] iteration from: 13825 to: 13835
[junit] Step time: 42 ms.
[junit] iteration from: 13835 to: 13845
[junit] Step time: 41 ms.
[junit] iteration from: 13845 to: 13855
[junit] Step time: 50 ms.
[junit] iteration from: 13855 to: 13865
[junit] Step time: 41 ms.
[junit] iteration from: 13865 to: 13875
[junit] Step time: 41 ms.
[junit] Elapsed time: 1877
[junit] Hits: 18387
[junit] Elapsed time: 564
LCONTAINS([schema.]column
[schema.]column,
text_query VARCHAR2
[,sort VARCHAR2]
[,label NUMBER])
RETURN NUMBER;
title:"The Right Way" AND text:go
title:"Do it right" AND right
title:Do it right
te?t
test*
te*t
roam~
roam~0.8
"jakarta apache"~10
mod_date:[20020101 TO 20030101]
title:{Aida TO Carmen}
jakarta apache
jakarta^4 apache
"jakarta apache"^4 "Apache Lucene"
"jakarta apache" jakarta
"jakarta apache" OR jakarta
"jakarta apache" AND "Apache Lucene"
+jakarta lucene
"jakarta apache" NOT "Apache Lucene"
"jakarta apache" -"Apache Lucene"
(jakarta OR apache) AND website
title:(+return +"pink panther")
+ - && || ! ( ) { } [ ] ^ " ~ * ? : \
\(1\+1\)\:2
LSCORE(label NUMBER)label
SELECT /*+ DOMAIN_INDEX_SORT */ lscore(1),subject FROM emails
where lcontains(bodytext,'security',1)>0;
LHIGHLIGHT(label NUMBER):VARCHAR2label
SELECT /*+ DOMAIN_INDEX_SORT */ lhighlight(1) txt,lscore(1) sc,subject
FROM emails where lcontains(bodytext,'security OR mysql','subject:ASC',1)>0;
PHIGHLIGHT(index_name VARCHAR2, qry VARCHAR2, cols VARCHAR2, stmt IN VARCHAR2) RETURN ANYDATASET
SELECT * FROM
TABLE(phighlight(
'EMAILBODYTEXT',
'lucene OR mysql',
'SUBJECT,BODYTEXT',
'select lscore(1) sc,e.* from eMails e where lcontains(bodytext,''rownum:[1 TO 10] AND (security OR mysql)'',''subject:ASC'',1)>0'
));
RHIGHLIGHT(index_name VARCHAR2, qry VARCHAR2, cols VARCHAR2, rType IN VARCHAR2, rws IN SYS_REFCURSOR) RETURN ANYDATASET
CREATE TYPE EMAILR AS OBJECT
( sc NUMBER,
emailFrom VARCHAR2(256),
emailTo VARCHAR2(256),
subject VARCHAR2(4000),
emailDate DATE,
bodyText CLOB);
CREATE OR REPLACE TYPE EMAILRSET AS TABLE OF EMAILR;
SELECT * FROM
TABLE(rhighlight(
'EMAILBODYTEXT',
'lucene OR mysql',
'SUBJECT,BODYTEXT',
'EMAILRSET',
CURSOR(select /*+ DOMAIN_INDEX_SORT FIRST_ROW */ lscore(1) sc,e.*
from eMails e where lcontains(bodytext,'rownum:[1 TO 10] AND (security OR mysql)','subject:ASC',1)>0)
));
FUNCTION this(index_name IN VARCHAR2,
x IN ROWID,
f IN NUMBER DEFAULT 1,
t IN NUMBER DEFAULT 10,
minTermFreq IN NUMBER DEFAULT 2,
minDocFreq IN NUMBER DEFAULT 5) RETURN sys.odciridlist
FUNCTION this(owner IN VARCHAR2,
index_name IN VARCHAR2,
x IN ROWID,
f IN NUMBER DEFAULT 1,
t IN NUMBER DEFAULT 10,
minTermFreq IN NUMBER DEFAULT 2,
minDocFreq IN NUMBER DEFAULT 5) RETURN sys.odciridlist
FUNCTION lfacets(input IN VARCHAR2) RETURN LUCENE.agg_tbl
FUNCTION ljoin(i_tbl in agg_tbl,
i_glue IN VARCHAR2 := ',') RETURN VARCHAR2
ant upgrade-domain-index
ant ncomp-lucene-ojvm (10g only)
ant jit-lucene-classes (11g only)
create index it1 on t1(f2) indextype is lucene.LuceneIndex parameters('Stemmer:English');.