嗨亲爱的Postgres的用户,如何我可以索引这个查询免费文本搜索
短的故事,我有相关的国家(每个地址都有一个状态)地址表。我将运行以下查询进行搜索:
SELECT "addresses".*, (ts_rank((to_tsvector('simple', coalesce("addresses"."name"::text, '')) || to_tsvector('simple', coalesce(pg_search_85240c410826a2b0e0f0e5.pg_search_442c4ad3183a256248ef8d::text, ''))), (to_tsquery('simple', ''' ' || 'test' || ' ''' || ':*')), 0)) AS pg_search_rank
FROM "addresses"
LEFT OUTER JOIN (SELECT "addresses"."id" AS id, string_agg("states"."name"::text, ' ') AS pg_search_442c4ad3183a256248ef8d
FROM "addresses" INNER JOIN "states" ON "states"."id" = "addresses"."state_id" GROUP BY "addresses"."id") pg_search_85240c410826a2b0e0f0e5 ON pg_search_85240c410826a2b0e0f0e5.id = "addresses"."id"
WHERE (((to_tsvector('simple', coalesce("addresses"."name"::text, '')) || to_tsvector('simple', coalesce(pg_search_85240c410826a2b0e0f0e5.pg_search_442c4ad3183a256248ef8d::text, ''))) @@ (to_tsquery('simple', ''' ' || 'test' || ' ''' || ':*')))) AND (company_id = 2142)
ORDER BY pg_search_rank DESC, "addresses"."id" ASC
直向前,对吗?这在一台好的机器和8581个地址上需要大约1.226ms。我需要改善这一点,所以我创建这个索引2
CREATE INDEX index_addresses_search_by_name ON addresses USING gin(to_tsvector('simple', COALESCE((public.addresses.name)::text, '')))
CREATE INDEX index_state_search_by_name ON states USING gin(to_tsvector('simple', COALESCE((public.states.name)::text, '')))
这将有助于创建上解决了指数,并在美国一个指标,但它并不:(查询速度慢和以前一样解释显示未使用的指标
请提供一个建议,
编辑:
这里是解释分析会显示:
"Sort (cost=11.39..11.40 rows=1 width=4824) (actual time=0.947..0.947 rows=0 loops=1)"
" Sort Key: (ts_rank((to_tsvector('simple'::regconfig, COALESCE((public.addresses.name)::text, ''::text)) || to_tsvector('simple'::regconfig, COALESCE((string_agg((states.name)::text, ' '::text)), ''::text))), '''test'':*'::tsquery, 0)), public.addresses.id"
" Sort Method: quicksort Memory: 17kB"
" -> Nested Loop Left Join (cost=6.19..11.38 rows=1 width=4824) (actual time=0.905..0.905 rows=0 loops=1)"
" Join Filter: (public.addresses.id = public.addresses.id)"
" Filter: ((to_tsvector('simple'::regconfig, COALESCE((public.addresses.name)::text, ''::text)) || to_tsvector('simple'::regconfig, COALESCE((string_agg((states.name)::text, ' '::text)), ''::text))) @@ '''test'':*'::tsquery)"
" -> Seq Scan on addresses (cost=0.00..5.14 rows=1 width=4792) (actual time=0.904..0.904 rows=0 loops=1)"
" Filter: (company_id = 2142)"
" -> HashAggregate (cost=6.19..6.20 rows=1 width=520) (never executed)"
" -> Hash Join (cost=1.02..6.18 rows=1 width=520) (never executed)"
" Hash Cond: (public.addresses.state_id = states.id)"
" -> Seq Scan on addresses (cost=0.00..5.11 rows=11 width=8) (never executed)"
" -> Hash (cost=1.01..1.01 rows=1 width=520) (never executed)"
" -> Seq Scan on states (cost=0.00..1.01 rows=1 width=520) (never executed)"
"Total runtime: 1.226 ms"
EDIT2
地址DDL
--
-- PostgreSQL database dump
--
SET statement_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SET check_function_bodies = false;
SET client_min_messages = warning;
SET search_path = public, pg_catalog;
SET default_tablespace = '';
SET default_with_oids = false;
--
-- Name: addresses; Type: TABLE; Schema: public; Owner: trucking; Tablespace:
--
CREATE TABLE addresses (
id integer NOT NULL,
name character varying(255),
street character varying(255),
city character varying(255),
zip_code character varying(255),
primary_phone character varying(255),
alternate_phone character varying(255),
fax character varying(255),
email character varying(255),
contact character varying(255),
company_id integer DEFAULT 0 NOT NULL,
motor_carrier_number character varying(12),
state_id integer,
created_at timestamp without time zone,
updated_at timestamp without time zone,
alternate_phone2 character varying(12),
insurance_expires_on date,
notes text
);
ALTER TABLE public.addresses OWNER TO trucking;
--
-- Name: addresses_id_seq; Type: SEQUENCE; Schema: public; Owner: trucking
--
CREATE SEQUENCE addresses_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER TABLE public.addresses_id_seq OWNER TO trucking;
--
-- Name: addresses_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: trucking
--
ALTER SEQUENCE addresses_id_seq OWNED BY addresses.id;
--
-- Name: id; Type: DEFAULT; Schema: public; Owner: trucking
--
ALTER TABLE ONLY addresses ALTER COLUMN id SET DEFAULT nextval('addresses_id_seq'::regclass);
--
-- Name: addresses_pkey; Type: CONSTRAINT; Schema: public; Owner: trucking; Tablespace:
--
ALTER TABLE ONLY addresses
ADD CONSTRAINT addresses_pkey PRIMARY KEY (id);
--
-- Name: index_addresses_on_company_id; Type: INDEX; Schema: public; Owner: trucking; Tablespace:
--
CREATE INDEX index_addresses_on_company_id ON addresses USING btree (company_id);
--
-- Name: index_addresses_on_state_id; Type: INDEX; Schema: public; Owner: trucking; Tablespace:
--
CREATE INDEX index_addresses_on_state_id ON addresses USING btree (state_id);
--
-- Name: index_addresses_search_by_city; Type: INDEX; Schema: public; Owner: trucking; Tablespace:
--
CREATE INDEX index_addresses_search_by_city ON addresses USING gin (to_tsvector('simple'::regconfig, COALESCE((city)::text, ''::text)));
--
-- Name: index_addresses_search_by_email; Type: INDEX; Schema: public; Owner: trucking; Tablespace:
--
CREATE INDEX index_addresses_search_by_email ON addresses USING gin (to_tsvector('simple'::regconfig, COALESCE((email)::text, ''::text)));
--
-- Name: index_addresses_search_by_name; Type: INDEX; Schema: public; Owner: trucking; Tablespace:
--
CREATE INDEX index_addresses_search_by_name ON addresses USING gin (to_tsvector('simple'::regconfig, COALESCE((name)::text, ''::text)));
--
-- Name: index_addresses_search_by_street; Type: INDEX; Schema: public; Owner: trucking; Tablespace:
--
CREATE INDEX index_addresses_search_by_street ON addresses USING gin (to_tsvector('simple'::regconfig, COALESCE((street)::text, ''::text)));
--
-- PostgreSQL database dump complete
--
美国DDL
--
-- PostgreSQL database dump
--
SET statement_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SET check_function_bodies = false;
SET client_min_messages = warning;
SET search_path = public, pg_catalog;
SET default_tablespace = '';
SET default_with_oids = false;
--
-- Name: states; Type: TABLE; Schema: public; Owner: trucking; Tablespace:
--
CREATE TABLE states (
id integer NOT NULL,
name character varying(255) NOT NULL,
abbrev character varying(255) NOT NULL,
country character varying(255)
);
ALTER TABLE public.states OWNER TO trucking;
--
-- Name: states_id_seq; Type: SEQUENCE; Schema: public; Owner: trucking
--
CREATE SEQUENCE states_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER TABLE public.states_id_seq OWNER TO trucking;
--
-- Name: states_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: trucking
--
ALTER SEQUENCE states_id_seq OWNED BY states.id;
--
-- Name: id; Type: DEFAULT; Schema: public; Owner: trucking
--
ALTER TABLE ONLY states ALTER COLUMN id SET DEFAULT nextval('states_id_seq'::regclass);
--
-- Name: states_pkey; Type: CONSTRAINT; Schema: public; Owner: trucking; Tablespace:
--
ALTER TABLE ONLY states
ADD CONSTRAINT states_pkey PRIMARY KEY (id);
--
-- Name: index_states_search_by_abbrev; Type: INDEX; Schema: public; Owner: trucking; Tablespace:
--
CREATE INDEX index_states_search_by_abbrev ON states USING gin (to_tsvector('simple'::regconfig, COALESCE((abbrev)::text, ''::text)));
--
-- Name: index_states_search_by_name; Type: INDEX; Schema: public; Owner: trucking; Tablespace:
--
CREATE INDEX index_states_search_by_name ON states USING gin (to_tsvector('simple'::regconfig, COALESCE((name)::text, ''::text)));
--
-- PostgreSQL database dump complete
--
最佳改写到目前为止
SELECT consolidated_address.id, (ts_rank((to_tsvector('simple', coalesce(consolidated_address.name::text, '')) || to_tsvector('simple', coalesce(consolidated_address.state_name::text, ''))), (to_tsquery('simple', ''' ' || 'Gallaway' || ' ''' || ':*')), 0)) AS pg_search_rank
FROM (
SELECT "addresses".id,
"addresses".name,
string_agg("states".name::text, ' ') as state_name
FROM addresses
LEFT OUTER JOIN "states"
ON "states".id = "addresses".state_id
GROUP BY "addresses".id) consolidated_address
WHERE
(((to_tsvector('simple', coalesce(consolidated_address.name::text, '')) || to_tsvector('simple', coalesce(consolidated_address.state_name::text, ''))) @@ (to_tsquery('simple', ''' ' || 'Gallaway' || ' ''' || ':*'))))
这是一个快一点,但还是做ES没有用任何指标
的谢谢,
对于将来的问题,请确保显示您的Pg版本,并且对于缓慢的查询问题“EXPLAIN ANALYZE SELECT ....的输出....您的查询....”。查看https://wiki.postgresql.org/wiki/Slow_Query_Questions – 2012-08-14 00:17:41
编辑了这个问题,谢谢@CraigRinger – Calin 2012-08-14 12:14:59
你有索引创建后分析你的表吗?我们可以看到涉及的表格和约束的DDL吗? – vyegorov 2012-08-14 12:44:36