|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectfr.gouv.culture.sdx.utils.AbstractSdxObject
fr.gouv.culture.sdx.utils.database.DatabaseBacked
fr.gouv.culture.sdx.documentbase.AbstractDocumentBase
fr.gouv.culture.sdx.documentbase.SDXDocumentBase
fr.gouv.culture.sdx.documentbase.LuceneDocumentBase
fr.gouv.culture.sdx.thesaurus.LuceneThesaurus
A lucene implementation of a thesaurus. This class uses a Lucene index where all concepts are stored as one document, and all relations of a document are stored as fields for the document.
| Nested Class Summary |
| Nested classes inherited from class fr.gouv.culture.sdx.thesaurus.SDXThesaurus |
SDXThesaurus.ConfigurationNode |
| Nested classes inherited from class fr.gouv.culture.sdx.documentbase.SDXDocumentBaseTarget |
SDXDocumentBaseTarget.ConfigurationNode |
| Field Summary | |
protected int |
defaultDepth
|
protected int[] |
defaultRelations
|
static java.lang.String |
FIELD_NAME_FTERM
Two field names |
static java.lang.String |
FIELD_NAME_FUF
|
protected org.apache.excalibur.source.impl.URLSource |
source
|
| Fields inherited from class fr.gouv.culture.sdx.documentbase.LuceneDocumentBase |
_fieldList, _xmlFieldList, DBELEM_ATTRIBUTE_REMOTE_ACCESS, ELEMENT_NAME_LUCENE_SDX_INTERNAL_FIELDS, INDEX_DIR_CURRENT, INDEX_DIR_MAIN, lastDocCount, luceneActiveIndex, luceneCurrentIndex, luceneSearchIndexList, SEARCH_INDEX_DIRECTORY_NAME, subIndexCount |
| Fields inherited from class fr.gouv.culture.sdx.documentbase.SDXDocumentBase |
_documentAdditionStatus, _isIndexOptimized, autoOptimize, baseIndexDir, DOC_ADD_STATUS_ADDED, DOC_ADD_STATUS_FAILURE, DOC_ADD_STATUS_IGNORED, DOC_ADD_STATUS_REFRESHED, DOC_ADD_STATUS_REPLACED, DOC_URL, ELEMENT_NAME_DEFAULT_HPP, ELEMENT_NAME_DEFAULT_MAXSORT, keepOriginalDocuments, scheduler, SDX_DATABASE_FORMAT, SDX_DATABASE_VERSION, SDX_DATABASE_VERSION_2_3, SDX_DATE, SDX_DATE_MILLISECONDS, SDX_ISO8601_DATE, SDX_USER, splitActive, splitDoc, splitSize, splitUnit, useCompoundFiles |
| Fields inherited from class fr.gouv.culture.sdx.utils.database.DatabaseBacked |
_database, CLASS_NAME_SUFFIX, DATABASE_DIR_NAME, databaseConf, dbLocation, dbPath, DEFAULT_DATABASE_TYPE |
| Fields inherited from class fr.gouv.culture.sdx.utils.AbstractSdxObject |
_configuration, _context, _description, _encoding, _id, _locale, _logger, _manager, _xmlizable_objects, _xmlLang, isToSaxInitialized |
| Fields inherited from interface fr.gouv.culture.sdx.thesaurus.SDXThesaurus |
CLASS_NAME_SUFFIX, PACKAGE_QUALNAME |
| Fields inherited from interface fr.gouv.culture.sdx.thesaurus.Thesaurus |
NAMESPACE_URI, RELATION_BROADER_TERM, RELATION_BROADER_TERMS, RELATION_EQUIVALENT_TERM, RELATION_NARROWER_TERM, RELATION_PARTIAL_EQIUVALENCE, RELATION_RELATED_TERM, RELATION_SCOPE_NOTE, RELATION_USE, RELATION_USED_FOR |
| Fields inherited from interface fr.gouv.culture.sdx.utils.Encodable |
DEFAULT_ENCODING |
| Fields inherited from interface fr.gouv.culture.sdx.utils.save.Saveable |
ALL_SAVE_ATTRIB, PATH_ATTRIB, SAVE_DIRECTORY_PARAM |
| Constructor Summary | |
LuceneThesaurus()
|
|
| Method Summary | |
void |
addConcept(Concept concept)
Adds a document. |
void |
addConcepts(Concept[] concepts)
Adds a group of documents. |
void |
build(org.xml.sax.InputSource source)
Builds a thesaurus from a SAX input source. |
void |
build(java.lang.String url)
Builds a thesaurus from a File. |
void |
compile()
|
void |
configure(org.apache.avalon.framework.configuration.Configuration configuration)
Sets the configuration options for this document base. |
protected void |
configureBase(org.apache.avalon.framework.configuration.Configuration configuration)
Overrides parent method and allows for configuration element <sdx:thesaurus> set's the path for the this thesaurus/document base |
void |
deleteConcept(Concept concept)
Removes a concept document with the given id and any sub concepts. |
void |
deleteConcepts(Concept[] concepts)
Removes a concept document with the given id and any sub concepts. |
Results |
expandQuery(Query query)
Expands a query returning the results of the expanded query |
Results |
expandQuery(Query query,
java.lang.String fieldName)
Expands a query returning the results of the expanded query |
Results |
expandQuery(Query query,
java.lang.String fieldName,
int[] relations,
int depth)
Expands a query returning the results of the expanded query |
Results |
expandQuery(Query query,
java.lang.String fieldName,
int[] relations,
int depth,
java.lang.String[] langs)
Expands a query returning the results of the expanded query |
Results |
expandQuery(Query query,
java.lang.String fieldName,
int relation,
int depth)
Expands a query returning the results of the expanded query |
Concept[] |
filterByLangs(Concept[] concepts,
java.lang.String[] langs)
Filters concepts by a list of languages |
protected LuceneConcept |
getConcept(org.apache.lucene.document.Document ldoc)
Builds a thesaurus from a SAX input source. |
Concept |
getConceptById(java.lang.String id)
Returns a document using its id. |
Concept |
getConceptByName(java.lang.String name)
Returns a document using its term. |
int |
getDefaultDepth()
Return's the default depth for searching relations |
int[] |
getDefaultRelations()
Return's the default set of relation int's for searching relations |
Concept[] |
getRelations(Concept concept)
Return's related concepts |
Concept[] |
getRelations(Concept[] concepts)
Returns related concepts |
Concept[] |
getRelations(Concept[] concepts,
int[] relations,
int depth)
Returns concepts related to a list of concepts. |
Concept[] |
getRelations(Concept[] concepts,
int[] relations,
int depth,
java.lang.String[] langs)
|
Concept[] |
getRelations(Concept[] concepts,
int relation,
int depth)
Returns concepts related to a list of concepts. |
Concept[] |
getRelations(Concept[] concepts,
int relation,
int depth,
java.lang.String[] langs)
Returns concepts related to a list of concepts, filtered by languages. |
Concept[] |
getRelations(Concept concept,
int[] relations,
int depth)
Returns concepts related to a list of concepts. |
Concept[] |
getRelations(Concept concept,
int[] relations,
int depth,
java.lang.String[] langs)
|
Concept[] |
getRelations(Concept concept,
int relation,
int depth)
Returns concepts related to a document. |
Concept[] |
getRelations(Concept concept,
int relation,
int depth,
java.lang.String[] langs)
Returns concepts related to a document, filtered by languages. |
Concept[] |
getRelations(java.lang.String searchTerm,
Concept[] concepts,
int[] relations,
int depth)
Returns concepts related to a list of concepts. |
Concept[] |
getRelations(java.lang.String searchTerm,
Concept[] concepts,
int relation,
int depth)
Returns concepts related to a list of concepts. |
Concept[] |
getRelations(java.lang.String searchTerm,
Concept concept,
int[] relations,
int depth)
Returns concepts related to a list of concepts. |
Concept[] |
getRelations(java.lang.String searchTerm,
Concept concept,
int relation,
int depth)
Returns concepts related to a document. |
java.lang.String |
getRelationTypeAbbreviation(int type)
Returns an abbreviation String for a relation type defined in the thesaurus |
int |
getRelationTypeInt(java.lang.String abbr)
Returns an int for a relation type String defined in the thesaurus |
protected org.apache.avalon.framework.configuration.Configuration[] |
getRepositoryConfigurationList(org.apache.avalon.framework.configuration.Configuration configuration)
|
void |
init()
Initializes the document base. |
protected boolean |
initToSax()
Init the LinkedHashMap _xmlizable_objects with the objects in order to describ them in XML |
protected void |
initVolatileObjectsToSax()
Init the LinkedHashMap _xmlizable_volatile_objects with the objects in order to describ them in XML Some objects need to be refresh each time a toSAX is called |
void |
load()
Loads a thesaurus in memory. |
void |
save()
Saves the contents. |
Concept[] |
search(java.lang.String query)
Searches for concepts. |
long |
size()
Returns the number of terms in the thesaurus. |
void |
unload()
Unloads the memory representation of the thesaurus. |
| Methods inherited from class fr.gouv.culture.sdx.documentbase.SDXDocumentBase |
add, checkIntegrity, configureIdGenerator, configureOAIComponents, configureOptimizeTriggers, configureRepositories, configureSplit, delete, deleteIndexableDocumentComponents, deleteRelationsToMastersFromDatabase, getByteSplitSize, getDocument, getDocument, getDocument, getDocument, getOwners, getRelated, getRepositoryForDocument, getRepositoryForStorage, getSplitDoc, getSplitSize, getSplitUnit, getUseCompoundFiles, handleParameters, index, index, isAutoOptimized, isIndexOptimized, rollbackIndexation, targetTriggered |
| Methods inherited from class fr.gouv.culture.sdx.documentbase.AbstractDocumentBase |
addOaiDeletedRecord, configurePipeline, createEntityForDocMetaData, delete, deletePhysicalDocument, getDefaultHitsPerPage, getDefaultMaxSort, getDefaultRepository, getIdGenerator, getIndexationPipeline, getMimeType, getOAIHarvester, getOAIRepository, getPooledRepositoryConnection, getRepository, getSourceValidity, isDefault, isUseMetadata, optimizeDatabase, optimizeRepositories, releasePooledRepositoryConnections, removeOaiDeletedRecord |
| Methods inherited from class fr.gouv.culture.sdx.utils.database.DatabaseBacked |
configure, getClassNameSuffix, getDatabase |
| Methods inherited from class fr.gouv.culture.sdx.utils.AbstractSdxObject |
configureDescription, contextualize, enableLogging, getBaseAttributes, getConfiguration, getContext, getDescription, getEncoding, getId, getLocale, getLog, getServiceManager, getXmlLang, service, setDescription, setEncoding, setId, setLocale, setUpSdxObject, setUpSdxObject, setXmlLang, toSAX, verifyConfigurationResources |
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Methods inherited from interface org.apache.excalibur.xml.sax.XMLizable |
toSAX |
| Methods inherited from interface fr.gouv.culture.sdx.documentbase.DocumentBase |
checkIntegrity, creationDate, delete, delete, getDefaultHitsPerPage, getDefaultMaxSort, getDefaultRepository, getDocument, getDocument, getDocument, getDocument, getIndexationPipeline, getMimeType, getOAIHarvester, getOAIRepository, getRepository, getSourceValidity, index, index, isDefault, lastModificationDate, optimize |
| Methods inherited from interface fr.gouv.culture.sdx.utils.SdxObject |
getLog |
| Methods inherited from interface org.apache.avalon.framework.logger.LogEnabled |
enableLogging |
| Methods inherited from interface org.apache.avalon.framework.context.Contextualizable |
contextualize |
| Methods inherited from interface org.apache.avalon.framework.service.Serviceable |
service |
| Methods inherited from interface fr.gouv.culture.sdx.utils.Identifiable |
getId, setId |
| Methods inherited from interface fr.gouv.culture.sdx.utils.Describable |
getDescription, setDescription |
| Methods inherited from interface fr.gouv.culture.sdx.utils.Encodable |
getEncoding, setEncoding |
| Methods inherited from interface fr.gouv.culture.sdx.utils.Localizable |
getLocale, getXmlLang, setLocale, setXmlLang |
| Methods inherited from interface fr.gouv.culture.sdx.search.Searchable |
getId, getIndex |
| Methods inherited from interface fr.gouv.culture.sdx.utils.save.Saveable |
backup, restore |
| Field Detail |
protected int defaultDepth
protected int[] defaultRelations
protected org.apache.excalibur.source.impl.URLSource source
public static final java.lang.String FIELD_NAME_FTERM
public static final java.lang.String FIELD_NAME_FUF
| Constructor Detail |
public LuceneThesaurus()
| Method Detail |
protected void configureBase(org.apache.avalon.framework.configuration.Configuration configuration)
throws org.apache.avalon.framework.configuration.ConfigurationException
configureBase in class SDXDocumentBaseconfiguration -
org.apache.avalon.framework.configuration.ConfigurationException
protected org.apache.avalon.framework.configuration.Configuration[] getRepositoryConfigurationList(org.apache.avalon.framework.configuration.Configuration configuration)
throws org.apache.avalon.framework.configuration.ConfigurationException
getRepositoryConfigurationList in class SDXDocumentBaseorg.apache.avalon.framework.configuration.ConfigurationExceptionpublic long size()
size in interface SDXThesaurus
public Concept[] search(java.lang.String query)
throws SDXException
search in interface SDXThesaurusquery - The concept term, already analyzed by an appropriate analyzer.
SDXException
public Results expandQuery(Query query)
throws SDXException
SDXThesaurus
expandQuery in interface SDXThesaurusquery - The query object which should be expanded using this thesaurus
SDXException
public Results expandQuery(Query query,
java.lang.String fieldName)
throws SDXException
SDXThesaurus
expandQuery in interface SDXThesaurusquery - The query object which should be expanded using this thesaurusfieldName - The name of field on which expansion should take place
SDXException
public Results expandQuery(Query query,
java.lang.String fieldName,
int relation,
int depth)
throws SDXException
SDXThesaurus
expandQuery in interface SDXThesaurusquery - The query object which should be expanded using this thesaurusfieldName - The name of field on which expansion should take placerelation - The relation type, for finding terms which should be used to expand the querydepth - The depth to which a relation should be followed, 0 finds the relation withing the matching concept
1, finds the relation within concept for the matching relation found by the "0 case", etc.
SDXException
public Results expandQuery(Query query,
java.lang.String fieldName,
int[] relations,
int depth)
throws SDXException
SDXThesaurus
expandQuery in interface SDXThesaurusquery - The query object which should be expanded using this thesaurusfieldName - The name of field on which expansion should take placerelations - The relation types, for finding terms which should be used to expand the querydepth - The depth to which a relation should be followed, 0 finds the relation withing the matching concept
1, finds the relation within concept for the matching relation found by the "0 case", etc. * @return A results object of the executed expanded query
SDXException
public Results expandQuery(Query query,
java.lang.String fieldName,
int[] relations,
int depth,
java.lang.String[] langs)
throws SDXException
SDXThesaurus
expandQuery in interface SDXThesaurusquery - The query object which should be expanded using this thesaurusfieldName - The name of field on which expansion should take placerelations - The relation types, for finding terms which should be used to expand the querydepth - The depth to which a relation should be followed, 0 finds the relation withing the matching concept
1, finds the relation within concept for the matching relation found by the "0 case", etc. * @return A results object of the executed expanded query
SDXException
public void addConcept(Concept concept)
throws SDXException
addConcept in interface SDXThesaurusconcept - The document to add.
SDXException
public void addConcepts(Concept[] concepts)
throws SDXException
SDXThesaurus
addConcepts in interface SDXThesaurusconcepts - The documents to add.
SDXException
public void deleteConcept(Concept concept)
throws SDXException
deleteConcept in interface SDXThesaurusconcept - The document.
SDXException
public void deleteConcepts(Concept[] concepts)
throws SDXException
deleteConcepts in interface SDXThesaurusconcepts - The documents.
SDXException
public Concept[] getRelations(Concept[] concepts)
throws SDXException
SDXThesaurus
getRelations in interface SDXThesaurusconcepts - The concepts for which relations are desired
SDXException
public Concept[] getRelations(Concept concept)
throws SDXException
SDXThesaurus
getRelations in interface SDXThesaurusconcept - The concept for which relations are desired
SDXException
public Concept[] getRelations(Concept concept,
int relation,
int depth)
throws SDXException
getRelations in interface SDXThesaurusconcept - The document.relation - The relation to use.depth - The depth to which a relation should be evaluated, 0 finds the relation withing the matching concept
1, finds the relation within concept for the matching relation found by the "0 case", etc.
SDXException
public Concept[] getRelations(java.lang.String searchTerm,
Concept concept,
int relation,
int depth)
throws SDXException
getRelations in interface SDXThesaurussearchTerm - The search term which was used to find the concept,
in the case that the searchTerm and related concept value are
equal the relation value will be returned in as Concept useful
for relation fields which are of relation word and would be found by a search(searchTerm)concept - The document.relation - The relation to use.depth - The levels up or down indicating the extent to which the relation search should be executed
SDXException
public Concept[] getRelations(Concept[] concepts,
int[] relations,
int depth)
throws SDXException
getRelations in interface SDXThesaurusconcepts - The list of concepts.relations - The relation types.depth - The depth to which a relation should be evaluated, 0 finds the relation withing the matching concept
1, finds the relation within concept for the matching relation found by the "0 case", etc.
SDXException
public Concept[] getRelations(Concept concept,
int[] relations,
int depth)
throws SDXException
getRelations in interface SDXThesaurusconcept - The list of concepts.relations - The relation types.depth - The depth to which a relation should be evaluated, 0 finds the relation withing the matching concept
1, finds the relation within concept for the matching relation found by the "0 case", etc.
SDXException
public Concept[] getRelations(Concept[] concepts,
int relation,
int depth)
throws SDXException
getRelations in interface SDXThesaurusconcepts - The list of concepts.relation - The relation relation .depth - The depth to which a relation should be evaluated, 0 finds the relation withing the matching concept
1, finds the relation within concept for the matching relation found by the "0 case", etc.
SDXException
public Concept[] getRelations(java.lang.String searchTerm,
Concept[] concepts,
int relation,
int depth)
throws SDXException
getRelations in interface SDXThesaurussearchTerm - The search term which was used to find the conceptconcepts - The list of concepts.relation - The relation relation .depth - The levels up or down indicating the extent to which the relation search should be executed
SDXException
public Concept[] getRelations(java.lang.String searchTerm,
Concept[] concepts,
int[] relations,
int depth)
throws SDXException
getRelations in interface SDXThesaurussearchTerm - The search term which was used to find the conceptconcepts - The list of concepts.relations - The relation types .depth - The levels up or down indicating the extent to which the relation search should be executed
SDXException
public Concept[] getRelations(java.lang.String searchTerm,
Concept concept,
int[] relations,
int depth)
throws SDXException
getRelations in interface SDXThesaurussearchTerm - The search term which was used to find the conceptconcept - The list of concepts.relations - The relation types .depth - The levels up or down indicating the extent to which the relation search should be executed
SDXException
public Concept getConceptByName(java.lang.String name)
throws SDXException
getConceptByName in interface SDXThesaurusname - The document's term name.
SDXException
public Concept getConceptById(java.lang.String id)
throws SDXException
getConceptById in interface SDXThesaurusid - The document's id.
SDXExceptionpublic void save()
save in interface SDXThesauruspublic void load()
load in interface SDXThesauruspublic void unload()
unload in interface SDXThesaurusprotected LuceneConcept getConcept(org.apache.lucene.document.Document ldoc)
public Concept[] getRelations(Concept concept,
int[] relations,
int depth,
java.lang.String[] langs)
throws SDXException
getRelations in interface SDXThesaurusSDXException
public Concept[] getRelations(Concept[] concepts,
int[] relations,
int depth,
java.lang.String[] langs)
throws SDXException
getRelations in interface SDXThesaurusSDXException
public Concept[] getRelations(Concept concept,
int relation,
int depth,
java.lang.String[] langs)
throws SDXException
getRelations in interface SDXThesaurusconcept - The document.relation - The relation to use.depth - The depth to which relations should be retrievedlangs - The list of languages for filtering concepts.
SDXException
public Concept[] getRelations(Concept[] concepts,
int relation,
int depth,
java.lang.String[] langs)
throws SDXException
getRelations in interface SDXThesaurusconcepts - The list of concepts.relation - The relation.depth - The depth to which relations should be retrievedlangs - The list of languages for filtering concepts.
SDXException
public void configure(org.apache.avalon.framework.configuration.Configuration configuration)
throws org.apache.avalon.framework.configuration.ConfigurationException
LuceneDocumentBase
configure in interface org.apache.avalon.framework.configuration.Configurableconfigure in class LuceneDocumentBaseconfiguration - The configuration object from which to build a document base.
Sample configuration entry:
<sdx:documentBase sdx:id = "myDocumentBaseName" sdx:type = "lucene">
<sdx:fieldList xml:lang = "fr-FR" sdx:variant = "" sdx:analyzerConf = "" sdx:analyzerClass = "">
<sdx:field code = "fieldName" type = "word" xml:lang = "fr-FR" sdx:analyzerClass = "" sdx:analyzerConf = ""/>
<sdx:field code = "fieldName2" type = "field" xml:lang = "fr-FR" brief = "true"/>
<sdx:field code = "fieldName3" type = "date" xml:lang = "fr-FR"/>
<sdx:field code = "fieldName4" type = "unindexed" xml:lang = "fr-FR"/>
</sdx:fieldList>
<sdx:index>
<sdx:pipeline sdx:id = "sdxIndexationPipeline">
<sdx:transformation src = "path to stylesheet, can be absolute or relative to the directory containing this file" sdx:id = "step2" sdx:type = "xslt"/>
<sdx:transformation src = "path to stylesheet, can be absolute or relative to the directory containing this file" sdx:id = "step3" sdx:type = "xslt" keep = "true"/>
</sdx:pipeline>
</sdx:index>
<sdx:repositories>
<sdx:repository baseDirectory = "blah4" depth = "3" extent = "100" sdx:type = "FS" sdx:default = "true" sdx:id = "blah4"/>
<sdx:repository ref = "blah2"/>
</sdx:repositories>
</sdx:documentBase>
org.apache.avalon.framework.configuration.ConfigurationExceptionwe should link to this in the future when we have better documentation capabilitiespublic int getDefaultDepth()
SDXThesaurus
getDefaultDepth in interface SDXThesauruspublic int[] getDefaultRelations()
SDXThesaurus
getDefaultRelations in interface SDXThesaurus
public Concept[] filterByLangs(Concept[] concepts,
java.lang.String[] langs)
SDXThesaurus
filterByLangs in interface SDXThesaurusconcepts - List of concepts for filteringlangs - List of langauages (in xml:lang format) which are desired
public void init()
throws SDXException
DocumentBaseThis method must be called after the super.getLog() has been set and the configuration done.
init in interface SDXThesaurusinit in class LuceneDocumentBaseSDXException
public void build(java.lang.String url)
throws SDXException,
org.apache.avalon.framework.configuration.ConfigurationException
SDXThesaurus
build in interface SDXThesaurusurl - The url to the file containing the thesaurus.
SDXException
org.apache.avalon.framework.configuration.ConfigurationException
public void build(org.xml.sax.InputSource source)
throws SDXException
SDXThesaurus
build in interface SDXThesaurussource - The SAX input source where the thesaurus is.
SDXException
public void compile()
throws SDXException
compile in interface ThesaurusSDXExceptionpublic java.lang.String getRelationTypeAbbreviation(int type)
Thesaurus
getRelationTypeAbbreviation in interface Thesauruspublic int getRelationTypeInt(java.lang.String abbr)
Thesaurus
getRelationTypeInt in interface Thesaurusprotected boolean initToSax()
AbstractSdxObject
initToSax in class LuceneDocumentBaseprotected void initVolatileObjectsToSax()
initVolatileObjectsToSax in class LuceneDocumentBase
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||