WSL/SLF GitLab Repository

Commit e9df4c7c authored by Sam's avatar Sam
Browse files

add solr config

parent 95d6fbe6
......@@ -4,4 +4,6 @@
# Allow files and directories
!envidat_extensions.*
!ckan-entrypoint.sh
!config/who.ini
\ No newline at end of file
!config/who.ini
!solr/*.xml
!solr/*.txt
\ No newline at end of file
......@@ -30,6 +30,8 @@ services:
- MAINTAINER=${MAINTAINER}
depends_on:
- db
- solr
- redis
ports:
- "8989:5000"
env_file:
......
......@@ -11,16 +11,8 @@ ENV SOLR_CORE ckan
RUN mkdir -p /opt/solr/server/solr/$SOLR_CORE/conf \
&& mkdir -p /opt/solr/server/solr/$SOLR_CORE/data
# Adding Files
COPY ./solrconfig.xml \
./schema.xml \
/opt/solr/server/solr/$SOLR_CORE/conf/
ADD https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.5/solr/server/solr/configsets/basic_configs/conf/currency.xml \
https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.5/solr/server/solr/configsets/basic_configs/conf/synonyms.txt \
https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.5/solr/server/solr/configsets/basic_configs/conf/stopwords.txt \
https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.5/solr/server/solr/configsets/basic_configs/conf/protwords.txt \
https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.6.5/solr/server/solr/configsets/data_driven_schema_configs/conf/elevate.xml \
/opt/solr/server/solr/$SOLR_CORE/conf/
# Add Config Files
COPY . /opt/solr/server/solr/$SOLR_CORE/conf/
# Create Core.properties
RUN echo name=$SOLR_CORE > /opt/solr/server/solr/$SOLR_CORE/core.properties
......
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
identifier
institute
research
university
and
for
affiliation
affiliation_02
affiliation_03
affiliation_04
affiliation_05
data_credit
datacredit
data
credit
collection
curation
publication
validation
software
email
name
given_name
givenname
given
supervision
wsl
slf
epfl
eth
ethz
01
02
03
04
05
ch
snow
swiss
federal
birmensdorf
com
gmail
switzerland
0000
0001
0002
0003
0004
111
8903
school
env
sciences
landscape
org
https
http
scheme
davos
avalanche
lausanne
civil
department
engineering
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- If this file is found in the config directory, it will only be
loaded once at startup. If it is found in Solr's data
directory, it will be re-loaded every commit.
See http://wiki.apache.org/solr/QueryElevationComponent for more info
-->
<elevate>
<!-- Query elevation examples
<query text="foo bar">
<doc id="1" />
<doc id="2" />
<doc id="3" />
</query>
for use with techproducts example
<query text="ipod">
<doc id="MA147LL/A" /> put the actual ipod at the top
<doc id="IW-02" exclude="true" /> exclude this cable
</query>
-->
</elevate>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
citation
next
https
doi
org
www
published
switzerland
publication
available
maps
used
between
result
paper
contains
university
600
100042
inc
eddy
httpsdoiorg
web
online
year
can
set
also
therefore
much
more
after
before
dataset
data
dataset
datasets
data
set
\ No newline at end of file
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
# Use a protected word file to protect against the stemmer reducing two
# unrelated words to the same base word.
# Some non-words that normally won't be encountered,
# just to test that they won't be stemmed.
nfi
lfi
wsl
lwf
slf
snow
ice
\ No newline at end of file
......@@ -30,23 +30,23 @@ schema. In this case the version should be set to the next CKAN version number.
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
<fieldtype name="binary" class="solr.BinaryField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
<fieldType name="tdates" class="solr.TrieDateField" precisionStep="7" positionIncrementGap="0" multiValued="true"/>
<fieldType name="int" class="solr.IntPointField" docValues="true" omitNorms="true"/>
<fieldType name="float" class="solr.FloatPointField" docValues="true" omitNorms="true"/>
<fieldType name="long" class="solr.LongPointField" docValues="true" omitNorms="true"/>
<fieldType name="double" class="solr.DoublePointField" docValues="true" omitNorms="true"/>
<fieldType name="tint" class="solr.IntPointField" docValues="true" omitNorms="true"/>
<fieldType name="tfloat" class="solr.FloatPointField" docValues="true" omitNorms="true"/>
<fieldType name="tlong" class="solr.LongPointField" docValues="true" omitNorms="true"/>
<fieldType name="tdouble" class="solr.DoublePointField" docValues="true" omitNorms="true"/>
<fieldType name="date" class="solr.DatePointField" omitNorms="true" docValues="true"/>
<fieldType name="tdate" class="solr.DatePointField" omitNorms="true"/>
<fieldType name="tdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
<fieldType name="tints" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tfloats" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tlongs" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tdoubles" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tints" class="solr.IntPointField" docValues="true" multiValued="true"/>
<fieldType name="tfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
<fieldType name="tlongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
<fieldType name="tdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
......@@ -54,6 +54,8 @@ schema. In this case the version should be set to the next CKAN version number.
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.LengthFilterFactory" min="2" max="100" />
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
<analyzer type="query">
......@@ -62,21 +64,67 @@ schema. In this case the version should be set to the next CKAN version number.
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="mlt_authors" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.LengthFilterFactory" min="4" max="12" />
<filter class="solr.StopFilterFactory" words="stopwords.txt,author_stopwords.txt" ignoreCase="true"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt,author_stopwords.txt" ignoreCase="true"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="mlt_notes" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.LengthFilterFactory" min="3" max="20" />
<filter class="solr.StopFilterFactory" words="stopwords.txt,notes_stopwords.txt" ignoreCase="true"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt,notes_stopwords.txt" ignoreCase="true"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
</fieldType>
<!-- spatial search field type (ckanext-spatial) -->
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
spatialContextFactory="org.locationtech.spatial4j.context.jts.JtsSpatialContextFactory"
autoIndex="true"
distErrPct="0.025" maxDistErr="0.000009" distanceUnits="degrees" />
<!-- A general unstemmed text field - good if one does not know the language of the field -->
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
......@@ -138,6 +186,9 @@ schema. In this case the version should be set to the next CKAN version number.
via copyField further on in this schema -->
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="urls" type="text" indexed="true" stored="false" multiValued="true"/>
<!-- copy fields for the MorLikeThis handler-->
<field name="mlt_authors" type="mlt_authors" indexed="true" stored="false" multiValued="true"/>
<field name="mlt_notes" type="mlt_notes" indexed="true" stored="false" multiValued="true"/>
<field name="depends_on" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="dependency_of" type="text" indexed="true" stored="false" multiValued="true"/>
......@@ -166,6 +217,10 @@ schema. In this case the version should be set to the next CKAN version number.
<field name="_version_" type="string" indexed="true" stored="true"/>
<!-- spatial search field (ckanext-spatial) -->
<field name="spatial_geom" type="location_rpt" indexed="true" stored="true" multiValued="true" />
<!-- dynamic fields -->
<dynamicField name="*_date" type="date" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="extras_*" type="text" indexed="true" stored="true" multiValued="false"/>
......@@ -175,8 +230,8 @@ schema. In this case the version should be set to the next CKAN version number.
</fields>
<uniqueKey>index_id</uniqueKey>
<defaultSearchField>text</defaultSearchField>
<solrQueryParser defaultOperator="AND"/>
<!-- <defaultSearchField>text</defaultSearchField> -->
<!-- <solrQueryParser defaultOperator="AND"/> -->
<copyField source="url" dest="urls"/>
<copyField source="title" dest="title_ngram"/>
......@@ -201,4 +256,8 @@ schema. In this case the version should be set to the next CKAN version number.
<copyField source="maintainer" dest="text"/>
<copyField source="author" dest="text"/>
</schema>
\ No newline at end of file
<copyField source="author" dest="mlt_authors"/>
<copyField source="notes" dest="mlt_notes"/>
</schema>
This diff is collapsed.
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Standard english stop words taken from Lucene's StopAnalyzer
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
s
such
t
that
the
their
then
there
these
they
this
to
was
will
with
were
follow
other
all
each
one
previous
did
done
done
has
have
had
got
is
over
edu
what
where
which
given
but
from
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
#some test synonym mappings unlikely to appear in real input text
aaafoo => aaabar
bbbfoo => bbbfoo bbbbar
cccfoo => cccbar cccbaz
fooaaa,baraaa,bazaaa
# Some synonym groups specific to this example
GB,gib,gigabyte,gigabytes
MB,mib,megabyte,megabytes
Television, Televisions, TV, TVs
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
#after us won't split it into two words.
# Synonym mappings can be used for spelling correction too
pixima => pixma
# EnviDat synonyms
lfi => nfi
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment