PHPCMS官方的设置参考如下:
英文介绍:http://www.sphinxsearch.com/docs/manual-0.9.9.html
一、首先需要在服务器上安装sphinx
在Windows上安装sphinx
1.下载支持mysql的包 http://www.sphinxsearch.com/downloads/sphinx-0.9.9-win32.zip
2.解压缩 sphinx-0.9.9-win32.zip 到 D:\sphinx
3.安装sphinx服务,在命令行执行命令
D:\sphinx\searchd --install --config d:\sphinx\sphinx.conf --servicename SphinxSearch
英文参照:http://www.sphinxsearch.com/docs ... #installing-windows
在Linux服务器上安装sphinx
1.下载源码包 http://www.sphinxsearch.com/downloads/sphinx-0.9.9.tar.gz
$ tar xzvf sphinx-0.9.8.tar.gz
$ cd sphinx
$ ./configure --prefix=/usr/local/sphinx --with-mysql=/usr/local/mysql
$ make
$ make install
常见问题1
/usr/local/sphinx-0.9.9/src/sphinx.cpp:20060: undefined reference to `libiconv_open'
/usr/local/sphinx-0.9.9/src/sphinx.cpp:20078: undefined reference to `libiconv'
/usr/local/sphinx-0.9.9/src/sphinx.cpp:20084: undefined reference to `libiconv_close'
collect2: ld returned 1 exit status
make[2]: *** [indexer] Error 1
make[2]: Leaving directory `/home/jling/sphinx-0.9.9/src'
make[1]: *** [all] Error 2
make[1]: Leaving directory `/home/jling/sphinx-0.9.9/src'
make: *** [all-recursive] Error 1
解决办法:打开configure文件,找到“#define USE_LIBICONV 1”,将注释去掉,并将1改成0。
常见问题2
error while loading shared libraries: libmysqlclient.so.16: cannot open shared object file: No such file or directory
解决办法:
64位系统
ln -s /usr/local/webserver/mysql/lib/mysql/libmysqlclient.so.16.0.0 /usr/lib64/libmysqlclient.so.16
32位系统ln -s /usr/local/webserver/mysql/lib/mysql/libmysqlclient.so.16.0.0 /usr/lib/libmysqlclient.so.16
sphinx.conf样例
source main{
type = mysql #数据库类型
sql_host = 10.228.134.211 #数据库ip
sql_user = admin #数据库用户名
sql_pass = admin #数据库密码
sql_db = phpcms_v9 #数据库名
sql_port = 3306 # 数据库端口sql_query_pre = SET NAMES utf8
sql_query_pre = REPLACE INTO v9_sphinx_counter SELECT 1, MAX(searchid) FROM v9_search
sql_query = SELECT searchid, adddate, siteid, typeid, id, data FROM v9_search \
WHERE searchid>=$start AND searchid<=$end
sql_query_range = SELECT 1,max_doc_id FROM v9_sphinx_counter WHERE counter_id=1
sql_range_step = 5000#字符串属性设置、需要过滤、排序的时候用到
sql_attr_uint = typeid
sql_attr_uint = siteid
sql_attr_uint = id
sql_attr_timestamp = adddate
sql_query_info = SELECT * FROM v9_search WHERE searchid=$id
}source delta
{
type = mysql #数据库类型
sql_host = 10.228.134.211 #数据库ip
sql_user = admin #数据库用户名
sql_pass = admin #数据库密码
sql_db = phpcms_v9 #数据库名
sql_port = 3306 # 数据库端口sql_query_pre = SET NAMES utf8
sql_query = SELECT searchid, adddate, siteid, typeid, id, data FROM v9_search \
WHERE searchid >( SELECT max_doc_id FROM v9_sphinx_counter WHERE counter_id=1 )
sql_query_post = REPLACE INTO v9_sphinx_counter SELECT 1, MAX(searchid) FROM v9_search
#字符串属性设置、需要过滤、排序的时候用到
sql_attr_uint = typeid
sql_attr_uint = siteid
sql_attr_uint = id
sql_attr_timestamp = adddate
sql_query_info = SELECT * FROM v9_search WHERE searchid=$id
}#主索引
index main
{
source = main
# 放索引的目录
path = D:\sphinx\data\main
# 编码
charset_type = utf-8
# 指定utf-8的编码表
charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
# 简单分词,只支持0和1,如果要搜索中文,请指定为1
ngram_len = 1
# 需要分词的字符,如果要搜索中文,去掉前面的注释
ngram_chars = U+3000..U+2FA1F
}#增量索引
index delta
{
source = delta
path = D:\sphinx\data\delta
# 编码
charset_type = utf-8
# 指定utf-8的编码表
charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
# 简单分词,只支持0和1,如果要搜索中文,请指定为1
ngram_len = 1
# 需要分词的字符,如果要搜索中文,去掉前面的注释
ngram_chars = U+3000..U+2FA1F
}
indexer
{
mem_limit = 128M
}searchd
{
port = 9312
log = D:\sphinx\data\phpcms\searchd.log
query_log = D:\sphinx\data\phpcms\query.log
read_timeout = 5
max_children = 30
pid_file = D:\sphinx\data\phpcms\searchd.pid
max_matches = 2000
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
}
附件:设置计划任务更新索引
1.windows下
需要设置计划任务
#凌晨4点合并索引,执行merge.bat
#其余时间每分钟更新索引,执行delta.bat
merge.bat
@ECHO off
D:\sphinx\bin\indexer.exe --config D:\sphinx\sphinx.conf --merge main delta --rotate
echo indexing, window will close when complete
delta.bat
@ECHO off
D:\sphinx\bin\indexer.exe --config D:\sphinx\sphinx.conf delta --rotate
echo indexing, window will close when complete
2.linux下编辑定时任务 crontab -e
#凌晨4点合并索引,其余时间每分钟更新索引
* 0-3 * * * /usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx.conf delta --rotate
* 6-23 * * * /usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx.conf delta --rotate
0 4 * * * /usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx.conf --merge main delta --rotate
各种路径、权限需要应用所在服务器一致,如:
sphinx.conf 中需要配置
sql_host 数据库主机地址
sql_user 数据库用户名
sql_pass 数据库密码
sql_db 数据库名
sql_port 数据库端口
phpcms表前缀样例中为phpcms_
索引路径 D:\sphinx\data\delta
参考示例1:
sphinx官方网站 http://sphinxsearch.com/downloads/release/
下载对应服务器版本
安装示例:
dpkg -i sphinxsearch_2.2.11-release-1-wheezy_i386.deb
配置文件参考:
# # Sphinx configuration file sample # # WARNING! While this sample file mentions all available options, # it contains (very) short helper descriptions only. Please refer to # doc/sphinx.html for details. # ############################################################################# ## data source definition ############################################################################# source src1 { # data source type. mandatory, no default value # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc type = mysql ##################################################################### ## SQL settings (for 'mysql' and 'pgsql' types) ##################################################################### # some straightforward parameters for SQL source types sql_host = localhost sql_user = root sql_pass = ****** sql_db = fxgw sql_port = 3306 # optional, default is 3306 # UNIX socket name # optional, default is empty (reuse client library defaults) # usually '/var/lib/mysql/mysql.sock' on Linux # usually '/tmp/mysql.sock' on FreeBSD # sql_sock = /tmp/mysql.sock # MySQL specific client connection flags # optional, default is 0 # # mysql_connect_flags = 32 # enable compression # MySQL specific SSL certificate settings # optional, defaults are empty # # mysql_ssl_cert = /etc/ssl/client-cert.pem # mysql_ssl_key = /etc/ssl/client-key.pem # mysql_ssl_ca = /etc/ssl/cacert.pem # MS SQL specific Windows authentication mode flag # MUST be in sync with charset_type index-level setting # optional, default is 0 # # mssql_winauth = 1 # use currently logged on user credentials # ODBC specific DSN (data source name) # mandatory for odbc source type, no default value # # odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)}; # sql_query = SELECT id, data FROM documents.csv # ODBC and MS SQL specific, per-column buffer sizes # optional, default is auto-detect # # sql_column_buffers = content=12M, comments=1M # pre-query, executed before the main fetch query # multi-value, optional, default is empty list of queries # sql_query_pre = SET NAMES utf8 # sql_query_pre = SET SESSION query_cache_type=OFF # main document fetch query # mandatory, integer document ID field MUST be the first selected column sql_query = \ SELECT a.id, a.catid, a.inputtime AS date_added, a.title, b.content,1 AS typeid \ FROM v9_news a,v9_news_data b,v9_category c where a.id = b.id and a.catid = c.catid and c.parentid in (22,27,68,69,63,26) \ union \ SELECT a.id, a.catid, a.inputtime AS date_added, a.title, b.content,2 AS typeid \ FROM v9_news a,v9_news_data b,v9_category c where a.id = b.id and a.catid = c.catid and c.parentid in (20,39,100,101,102,103) \ union \ SELECT a.id, a.catid, a.inputtime AS date_added, a.title, b.content,3 AS typeid \ FROM v9_news a,v9_news_data b,v9_category c where a.id = b.id and a.catid = c.catid and c.parentid in (338) \ union \ SELECT a.qid,a.catid,a.addtime AS date_added,a.question AS title,a.content,4 AS typeid FROM v9_ask_question a \ union \ SELECT a.qid,a.catid,b.addtime AS date_added,a.question AS title,b.content,4 AS typeid FROM v9_ask_question a left join v9_ask_answer b on a.qid = b.qid # joined/payload field fetch query # joined fields let you avoid (slow) JOIN and GROUP_CONCAT # payload fields let you attach custom per-keyword values (eg. for ranking) # # syntax is FIELD-NAME 'from' ( 'query' | 'payload-query' ); QUERY # joined field QUERY should return 2 columns (docid, text) # payload field QUERY should return 3 columns (docid, keyword, weight) # # REQUIRES that query results are in ascending document ID order! # multi-value, optional, default is empty list of queries # # sql_joined_field = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC # sql_joined_field = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC # file based field declaration # # content of this field is treated as a file name # and the file gets loaded and indexed in place of a field # # max file size is limited by max_file_field_buffer indexer setting # file IO errors are non-fatal and get reported as warnings # # sql_file_field = content_file_path # range query setup, query that must return min and max ID values # optional, default is empty # # sql_query will need to reference $start and $end boundaries # if using ranged query: # # sql_query = \ # SELECT doc.id, doc.id AS group, doc.title, doc.data \ # FROM documents doc \ # WHERE id>=$start AND id<=$end # # sql_query_range = SELECT MIN(id),MAX(id) FROM documents # range query step # optional, default is 1024 # sql_range_step = 5000 # unsigned integer attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # optional bit size can be specified, default is 32 # # sql_attr_uint = author_id # sql_attr_uint = forum_id:9 # 9 bits fositeidr forum_id sql_attr_uint = typeid #sql_attr_uint = siteid sql_attr_uint = id # boolean attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # equivalent to sql_attr_uint with 1-bit size # # sql_attr_bool = is_deleted # bigint attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # declares a signed (unlike uint!) 64-bit attribute # # sql_attr_bigint = my_bigint_id # UNIX timestamp attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # similar to integer, but can also be used in date functions # # sql_attr_timestamp = posted_ts # sql_attr_timestamp = last_edited_ts sql_attr_timestamp = date_added # floating point attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # values are stored in single precision, 32-bit IEEE 754 format # # sql_attr_float = lat_radians # sql_attr_float = long_radians # multi-valued attribute (MVA) attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # MVA values are variable length lists of unsigned 32-bit integers # # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY] # ATTR-TYPE is 'uint' or 'timestamp' # SOURCE-TYPE is 'field', 'query', or 'ranged-query' # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range' # # sql_attr_multi = uint tag from query; SELECT docid, tagid FROM tags # sql_attr_multi = uint tag from ranged-query; \ # SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; \ # SELECT MIN(docid), MAX(docid) FROM tags # string attribute declaration # multi-value (an arbitrary number of these is allowed), optional # lets you store and retrieve strings # # sql_attr_string = stitle # JSON attribute declaration # multi-value (an arbitrary number of these is allowed), optional # lets you store a JSON document as an (in-memory) attribute for later use # # sql_attr_json = properties # combined field plus attribute declaration (from a single column) # stores column as an attribute, but also indexes it as a full-text field # # sql_field_string = author # post-query, executed on sql_query completion # optional, default is empty # # sql_query_post = # post-index-query, executed on successful indexing completion # optional, default is empty # $maxid expands to max document ID actually fetched from DB # # sql_query_post_index = REPLACE INTO counters ( id, val ) \ # VALUES ( 'max_indexed_id', $maxid ) # ranged query throttling, in milliseconds # optional, default is 0 which means no delay # enforces given delay before each query step sql_ranged_throttle = 0 # kill-list query, fetches the document IDs for kill-list # k-list will suppress matches from preceding indexes in the same query # optional, default is empty # # sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex # columns to unpack on indexer side when indexing # multi-value, optional, default is empty list # # unpack_zlib = zlib_column # unpack_mysqlcompress = compressed_column # unpack_mysqlcompress = compressed_column_2 # maximum unpacked length allowed in MySQL COMPRESS() unpacker # optional, default is 16M # # unpack_mysqlcompress_maxsize = 16M # hook command to run when SQL connection succeeds # optional, default value is empty (do nothing) # # hook_connect = bash sql_connect.sh # hook command to run after (any) SQL range query # it may print out "minid maxid" (w/o quotes) to override the range # optional, default value is empty (do nothing) # # hook_query_range = bash sql_query_range.sh # hook command to run on successful indexing completion # $maxid expands to max document ID actually fetched from DB # optional, default value is empty (do nothing) # # hook_post_index = bash sql_post_index.sh $maxid ##################################################################### ## xmlpipe2 settings ##################################################################### # type = xmlpipe # shell command to invoke xmlpipe stream producer # mandatory # # xmlpipe_command = cat /var/lib/sphinxsearch/test.xml # xmlpipe2 field declaration # multi-value, optional, default is empty # # xmlpipe_field = subject # xmlpipe_field = content # xmlpipe2 attribute declaration # multi-value, optional, default is empty # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX # examples: # # xmlpipe_attr_timestamp = published # xmlpipe_attr_uint = author_id # xmlpipe_attr_bool = is_enabled # xmlpipe_attr_float = latitude # xmlpipe_attr_bigint = guid # xmlpipe_attr_multi = tags # xmlpipe_attr_multi_64 = tags64 # xmlpipe_attr_string = title # xmlpipe_attr_json = extra_data # xmlpipe_field_string = content # perform UTF-8 validation, and filter out incorrect codes # avoids XML parser choking on non-UTF-8 documents # optional, default is 0 # # xmlpipe_fixup_utf8 = 1 } # inherited source example # # all the parameters are copied from the parent source, # and may then be overridden in this source definition source src1throttled : src1 { sql_ranged_throttle = 100 } ############################################################################# ## index definition ############################################################################# # local index example # # this is an index which is stored locally in the filesystem # # all indexing-time options (such as morphology and charsets) # are configured per local index index test1 { # index type # optional, default is 'plain' # known values are 'plain', 'distributed', and 'rt' (see samples below) # type = plain # document source(s) to index # multi-value, mandatory # document IDs must be globally unique across all sources source = src1 # index files path and file name, without extension # mandatory, path must be writable, extensions will be auto-appended path = /var/lib/sphinxsearch/data/test1 # document attribute values (docinfo) storage mode # optional, default is 'extern' # known values are 'none', 'extern' and 'inline' docinfo = extern # dictionary type, 'crc' or 'keywords' # crc is faster to index when no substring/wildcards searches are needed # crc with substrings might be faster to search but is much slower to index # (because all substrings are pre-extracted as individual keywords) # keywords is much faster to index with substrings, and index is much (3-10x) smaller # keywords supports wildcards, crc does not, and never will # optional, default is 'keywords' dict = keywords # memory locking for cached data (.spa and .spi), to prevent swapping # optional, default is 0 (do not mlock) # requires searchd to be run from root mlock = 0 # a list of morphology preprocessors to apply # optional, default is empty # # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru', # 'soundex', and 'metaphone'; additional preprocessors available from # libstemmer are 'libstemmer_XXX', where XXX is algorithm code # (see libstemmer_c/libstemmer/modules.txt) # # morphology = stem_en, stem_ru, soundex # morphology = libstemmer_german # morphology = libstemmer_sv morphology = none # minimum word length at which to enable stemming # optional, default is 1 (stem everything) # # min_stemming_len = 1 # stopword files list (space separated) # optional, default is empty # contents are plain text, charset_table and stemming are both applied # # stopwords = /var/lib/sphinxsearch/data/stopwords.txt charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F # wordforms file, in "mapfrom > mapto" plain text format # optional, default is empty # # wordforms = /var/lib/sphinxsearch/data/wordforms.txt # tokenizing exceptions file # optional, default is empty # # plain text, case sensitive, space insensitive in map-from part # one "Map Several Words => ToASingleOne" entry per line # # exceptions = /var/lib/sphinxsearch/data/exceptions.txt # embedded file size limit # optional, default is 16K # # exceptions, wordforms, and stopwords files smaller than this limit # are stored in the index; otherwise, their paths and sizes are stored # # embedded_limit = 16K # minimum indexed word length # default is 1 (index everything) min_word_len = 1 # ignored characters list # optional, default value is empty # # ignore_chars = U+00AD # minimum word prefix length to index # optional, default is 0 (do not index prefixes) # # min_prefix_len = 0 # minimum word infix length to index # optional, default is 0 (do not index infixes) # # min_infix_len = 0 # maximum substring (prefix or infix) length to index # optional, default is 0 (do not limit substring length) # # max_substring_len = 8 # list of fields to limit prefix/infix indexing to # optional, default value is empty (index all fields in prefix/infix mode) # # prefix_fields = filename # infix_fields = url, domain # expand keywords with exact forms and/or stars when searching fit indexes # search-time only, does not affect indexing, can be 0 or 1 # optional, default is 0 (do not expand keywords) # # expand_keywords = 1 # n-gram length to index, for CJK indexing # only supports 0 and 1 for now, other lengths to be implemented # optional, default is 0 (disable n-grams) # ngram_len = 1 # n-gram characters list, for CJK indexing # optional, default is empty # ngram_chars = U+3000..U+2FA1F # phrase boundary characters list # optional, default is empty # # phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis # phrase boundary word position increment # optional, default is 0 # # phrase_boundary_step = 100 # blended characters list # blended chars are indexed both as separators and valid characters # for instance, AT&T will results in 3 tokens ("at", "t", and "at&t") # optional, default is empty # # blend_chars = +, &, U+23 # blended token indexing mode # a comma separated list of blended token indexing variants # known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure # optional, default is trim_none # # blend_mode = trim_tail, skip_pure # whether to strip HTML tags from incoming documents # known values are 0 (do not strip) and 1 (do strip) # optional, default is 0 html_strip = 0 # what HTML attributes to index if stripping HTML # optional, default is empty (do not index anything) # # html_index_attrs = img=alt,title; a=title; # what HTML elements contents to strip # optional, default is empty (do not strip element contents) # # html_remove_elements = style, script # whether to preopen index data files on startup # optional, default is 0 (do not preopen), searchd-only # # preopen = 1 # whether to enable in-place inversion (2x less disk, 90-95% speed) # optional, default is 0 (use separate temporary files), indexer-only # # inplace_enable = 1 # in-place fine-tuning options # optional, defaults are listed below # # inplace_hit_gap = 0 # preallocated hitlist gap size # inplace_docinfo_gap = 0 # preallocated docinfo gap size # inplace_reloc_factor = 0.1 # relocation buffer size within arena # inplace_write_factor = 0.1 # write buffer size within arena # whether to index original keywords along with stemmed versions # enables "=exactform" operator to work # optional, default is 0 # # index_exact_words = 1 # position increment on overshort (less that min_word_len) words # optional, allowed values are 0 and 1, default is 1 # # overshort_step = 1 # position increment on stopword # optional, allowed values are 0 and 1, default is 1 # # stopword_step = 1 # hitless words list # positions for these keywords will not be stored in the index # optional, allowed values are 'all', or a list file name # # hitless_words = all # hitless_words = hitless.txt # detect and index sentence and paragraph boundaries # required for the SENTENCE and PARAGRAPH operators to work # optional, allowed values are 0 and 1, default is 0 # # index_sp = 1 # index zones, delimited by HTML/XML tags # a comma separated list of tags and wildcards # required for the ZONE operator to work # optional, default is empty string (do not index zones) # # index_zones = title, h*, th # index per-document and average per-index field lengths, in tokens # required for the BM25A(), BM25F() in expression ranker # optional, default is 0 (do not index field lenghts) # # index_field_lengths = 1 # regular expressions (regexps) to filter the fields and queries with # gets applied to data source fields when indexing # gets applied to search queries when searching # multi-value, optional, default is empty list of regexps # # regexp_filter = \b(\d+)\" => \1inch # regexp_filter = (blue|red) => color # list of the words considered frequent with respect to bigram indexing # optional, default is empty # # bigram_freq_words = the, a, i, you, my # bigram indexing mode # known values are none, all, first_freq, both_freq # option, default is none (do not index bigrams) # # bigram_index = both_freq # snippet document file name prefix # preprended to file names when generating snippets using load_files option # WARNING, this is a prefix (not a path), trailing slash matters! # optional, default is empty # # snippets_file_prefix = /mnt/mydocs/server1 # whether to apply stopwords before or after stemming # optional, default is 0 (apply stopwords after stemming) # # stopwords_unstemmed = 0 # path to a global (cluster-wide) keyword IDFs file # optional, default is empty (use local IDFs) # # global_idf = /usr/local/sphinx/var/global.idf } # inherited index example # # all the parameters are copied from the parent index, # and may then be overridden in this index definition index test1stemmed : test1 { path = /var/lib/sphinxsearch/data/test1stemmed morphology = stem_en } # distributed index example # # this is a virtual index which can NOT be directly indexed, # and only contains references to other local and/or remote indexes index dist1 { # 'distributed' index type MUST be specified type = distributed # local index to be searched # there can be many local indexes configured local = test1 local = test1stemmed # remote agent # multiple remote agents may be specified # syntax for TCP connections is 'hostname:port:index1,[index2[,...]]' # syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]' agent = localhost:9313:remote1 agent = localhost:9314:remote2,remote3 # agent = /var/run/searchd.sock:remote4 # remote agent mirrors groups, aka mirrors, aka HA agents # defines 2 or more interchangeable mirrors for a given index part # # agent = server3:9312 | server4:9312 :indexchunk2 # agent = server3:9312:chunk2server3 | server4:9312:chunk2server4 # agent = server3:chunk2server3 | server4:chunk2server4 # agent = server21|server22|server23:chunk2 # blackhole remote agent, for debugging/testing # network errors and search results will be ignored # # agent_blackhole = testbox:9312:testindex1,testindex2 # persistenly connected remote agent # reduces connect() pressure, requires that workers IS threads # # agent_persistent = testbox:9312:testindex1,testindex2 # remote agent connection timeout, milliseconds # optional, default is 1000 ms, ie. 1 sec agent_connect_timeout = 1000 # remote agent query timeout, milliseconds # optional, default is 3000 ms, ie. 3 sec agent_query_timeout = 3000 # HA mirror agent strategy # optional, defaults to ??? (random mirror) # know values are nodeads, noerrors, roundrobin, nodeadstm, noerrorstm # # ha_strategy = nodeads # path to RLP context file # optional, defaut is empty # # rlp_context = /usr/local/share/sphinx/rlp/rlp-context.xml } # realtime index example # # you can run INSERT, REPLACE, and DELETE on this index on the fly # using MySQL protocol (see 'listen' directive below) index rt { # 'rt' index type must be specified to use RT index type = rt # index files path and file name, without extension # mandatory, path must be writable, extensions will be auto-appended path = /var/lib/sphinxsearch/data/rt # RAM chunk size limit # RT index will keep at most this much data in RAM, then flush to disk # optional, default is 128M # # rt_mem_limit = 512M # full-text field declaration # multi-value, mandatory rt_field = title rt_field = content # unsigned integer attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # declares an unsigned 32-bit attribute rt_attr_uint = gid # RT indexes currently support the following attribute types: # uint, bigint, float, timestamp, string, mva, mva64, json # # rt_attr_bigint = guid # rt_attr_float = gpa # rt_attr_timestamp = ts_added # rt_attr_string = author # rt_attr_multi = tags # rt_attr_multi_64 = tags64 # rt_attr_json = extra_data } ############################################################################# ## indexer settings ############################################################################# indexer { # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) # optional, default is 128M, max is 2047M, recommended is 256M to 1024M mem_limit = 128M # maximum IO calls per second (for I/O throttling) # optional, default is 0 (unlimited) # # max_iops = 40 # maximum IO call size, bytes (for I/O throttling) # optional, default is 0 (unlimited) # # max_iosize = 1048576 # maximum xmlpipe2 field length, bytes # optional, default is 2M # # max_xmlpipe2_field = 4M # write buffer size, bytes # several (currently up to 4) buffers will be allocated # write buffers are allocated in addition to mem_limit # optional, default is 1M # # write_buffer = 1M # maximum file field adaptive buffer size # optional, default is 8M, minimum is 1M # # max_file_field_buffer = 32M # how to handle IO errors in file fields # known values are 'ignore_field', 'skip_document', and 'fail_index' # optional, default is 'ignore_field' # # on_file_field_error = skip_document # lemmatizer cache size # improves the indexing time when the lemmatization is enabled # optional, default is 256K # # lemmatizer_cache = 512M } ############################################################################# ## searchd settings ############################################################################# searchd { # [hostname:]port[:protocol], or /unix/socket/path to listen on # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL) # # multi-value, multiple listen points are allowed # optional, defaults are 9312:sphinx and 9306:mysql41, as below # # listen = 127.0.0.1 # listen = 192.168.0.1:9312 # listen = 9312 # listen = /var/run/searchd.sock listen = 9312 listen = 9306:mysql41 # log file, searchd run info is logged here # optional, default is 'searchd.log' log = /var/log/sphinxsearch/searchd.log # query log file, all search queries are logged here # optional, default is empty (do not log queries) query_log = /var/log/sphinxsearch/query.log # client read timeout, seconds # optional, default is 5 read_timeout = 5 # request timeout, seconds # optional, default is 5 minutes client_timeout = 300 # maximum amount of children to fork (concurrent searches to run) # optional, default is 0 (unlimited) max_children = 30 # maximum amount of persistent connections from this master to each agent host # optional, but necessary if you use agent_persistent. It is reasonable to set the value # as max_children, or less on the agent's hosts. persistent_connections_limit = 30 # PID file, searchd process ID file name # mandatory pid_file = /var/run/sphinxsearch/searchd.pid # seamless rotate, prevents rotate stalls if precaching huge datasets # optional, default is 1 seamless_rotate = 1 # whether to forcibly preopen all indexes on startup # optional, default is 1 (preopen everything) preopen_indexes = 1 # whether to unlink .old index copies on succesful rotation. # optional, default is 1 (do unlink) unlink_old = 1 # attribute updates periodic flush timeout, seconds # updates will be automatically dumped to disk this frequently # optional, default is 0 (disable periodic flush) # # attr_flush_period = 900 # MVA updates pool size # shared between all instances of searchd, disables attr flushes! # optional, default size is 1M mva_updates_pool = 1M # max allowed network packet size # limits both query packets from clients, and responses from agents # optional, default size is 8M max_packet_size = 8M # max allowed per-query filter count # optional, default is 256 max_filters = 256 # max allowed per-filter values count # optional, default is 4096 max_filter_values = 4096 # socket listen queue length # optional, default is 5 # # listen_backlog = 5 # per-keyword read buffer size # optional, default is 256K # # read_buffer = 256K # unhinted read size (currently used when reading hits) # optional, default is 32K # # read_unhinted = 32K # max allowed per-batch query count (aka multi-query count) # optional, default is 32 max_batch_queries = 32 # max common subtree document cache size, per-query # optional, default is 0 (disable subtree optimization) # # subtree_docs_cache = 4M # max common subtree hit cache size, per-query # optional, default is 0 (disable subtree optimization) # # subtree_hits_cache = 8M # multi-processing mode (MPM) # known values are none, fork, prefork, and threads # threads is required for RT backend to work # optional, default is threads workers = threads # for RT to work # max threads to create for searching local parts of a distributed index # optional, default is 0, which means disable multi-threaded searching # should work with all MPMs (ie. does NOT require workers=threads) # # dist_threads = 4 # binlog files path; use empty string to disable binlog # optional, default is build-time configured data directory # # binlog_path = # disable logging # binlog_path = /var/lib/sphinxsearch/data # binlog.001 etc will be created there # binlog flush/sync mode # 0 means flush and sync every second # 1 means flush and sync every transaction # 2 means flush every transaction, sync every second # optional, default is 2 # # binlog_flush = 2 # binlog per-file size limit # optional, default is 128M, 0 means no limit # # binlog_max_log_size = 256M # per-thread stack size, only affects workers=threads mode # optional, default is 64K # # thread_stack = 128K # per-keyword expansion limit (for dict=keywords prefix searches) # optional, default is 0 (no limit) # # expansion_limit = 1000 # RT RAM chunks flush period # optional, default is 0 (no periodic flush) # # rt_flush_period = 900 # query log file format # optional, known values are plain and sphinxql, default is plain # # query_log_format = sphinxql # version string returned to MySQL network protocol clients # optional, default is empty (use Sphinx version) # # mysql_version_string = 5.0.37 # default server-wide collation # optional, default is libc_ci # # collation_server = utf8_general_ci # server-wide locale for libc based collations # optional, default is C # # collation_libc_locale = ru_RU.UTF-8 # threaded server watchdog (only used in workers=threads mode) # optional, values are 0 and 1, default is 1 (watchdog on) # # watchdog = 1 # costs for max_predicted_time model, in (imaginary) nanoseconds # optional, default is "doc=64, hit=48, skip=2048, match=64" # # predicted_time_costs = doc=64, hit=48, skip=2048, match=64 # current SphinxQL state (uservars etc) serialization path # optional, default is none (do not serialize SphinxQL state) # # sphinxql_state = sphinxvars.sql # maximum RT merge thread IO calls per second, and per-call IO size # useful for throttling (the background) OPTIMIZE INDEX impact # optional, default is 0 (unlimited) # # rt_merge_iops = 40 # rt_merge_maxiosize = 1M # interval between agent mirror pings, in milliseconds # 0 means disable pings # optional, default is 1000 # # ha_ping_interval = 0 # agent mirror statistics window size, in seconds # stats older than the window size (karma) are retired # that is, they will not affect master choice of agents in any way # optional, default is 60 seconds # # ha_period_karma = 60 # delay between preforked children restarts on rotation, in milliseconds # optional, default is 0 (no delay) # # prefork_rotation_throttle = 100 # a prefix to prepend to the local file names when creating snippets # with load_files and/or load_files_scatter options # optional, default is empty # # snippets_file_prefix = /mnt/common/server1/ } ############################################################################# ## common settings ############################################################################# common { # lemmatizer dictionaries base path # optional, defaut is /usr/local/share (see ./configure --datadir) # # lemmatizer_base = /usr/local/share/sphinx/dicts # how to handle syntax errors in JSON attributes # known values are 'ignore_attr' and 'fail_index' # optional, default is 'ignore_attr' # # on_json_attr_error = fail_index # whether to auto-convert numeric values from strings in JSON attributes # with auto-conversion, string value with actually numeric data # (as in {"key":"12345"}) gets stored as a number, rather than string # optional, allowed values are 0 and 1, default is 0 (do not convert) # # json_autoconv_numbers = 1 # whether and how to auto-convert key names in JSON attributes # known value is 'lowercase' # optional, default is unspecified (do nothing) # # json_autoconv_keynames = lowercase # path to RLP root directory # optional, defaut is /usr/local/share (see ./configure --datadir) # # rlp_root = /usr/local/share/sphinx/rlp # path to RLP environment file # optional, defaut is /usr/local/share/rlp-environment.xml (see ./configure --datadir) # # rlp_environment = /usr/local/share/sphinx/rlp/rlp/etc/rlp-environment.xml # maximum total size of documents batched before processing them by the RLP # optional, default is 51200 # # rlp_max_batch_size = 100k # maximum number of documents batched before processing them by the RLP # optional, default is 50 # # rlp_max_batch_docs = 100 # trusted plugin directory # optional, default is empty (disable UDFs) # # plugin_dir = /usr/local/sphinx/lib } # --eof--
执行分词索引:
/usr/bin/indexer --config /etc/sphinxsearch/sphinx.conf test1 --rotate
微信扫码添加微信好友