我想要循环超过200,000个用户数据集来过滤30,000个产品,我如何优化这个嵌套的大循环以获得最佳性能?重构循环?
//settings , 5 max per user, can up to 200,000
$settings = array(...);
//all prods, up to 30,000
$prods = array(...);
//all prods category relation map, up to 2 * 30,000
$prods_cate_ref_all = array(...);
//msgs filtered by settings saved yesterday , more then 100 * 200,000
$msg_all = array(...);
//filter counter
$j = 0;
//filter result
$res = array();
foreach($settings as $set){
foreach($prods as $k=>$p){
//filter prods by site_id
if ($set['site_id'] != $p['site_id']) continue;
//filter prods by city_id , city_id == 0 is all over the country
if ($set['city_id'] != $p['city_id'] && $p['city_id'] > 0) continue;
//muti settings of a user may get same prods
if (prod_in($p['id'], $set['uuid'], $res)) continue;
//prods filtered by settings saved to msg table yesterday
if (msg_in($p['id'], $set['uuid'], $msg_all)) continue;
//filter prods by category id
if (!prod_cate_in($p['id'], $set['cate_id'], $prods_cate_ref_all)) continue;
//filter prods by tags of set not in prod title, website ...
$arr = array($p['title'], $p['website'], $p['detail'], $p['shop'], $p['tags']);
if (!tags_in($set['tags'], $arr)) continue;
$res[$j]['name'] = $v['name'];
$res[$j]['prod_id'] = $p['id'];
$res[$j]['uuid'] = $v['uuid'];
$res[$j]['msg'] = '...';
$j++;
}
}
save_to_msg($res);
function prod_in($prod_id, $uuid, $prod_all){
foreach($prod_all as $v){
if ($v['prod_id'] == $prod_id && $v['uuid'] == $uuid)
return true;
}
return false;
}
function prod_cate_in($prod_id, $cate_id, $prod_cate_all){
foreach($prod_cate_all as $v){
if ($v['prod_id'] == $prod_id && $v['cate_id'] == $cate_id)
return true;
}
return false;
}
function tags_in($tags, $arr){
$tag_arr = explode(',', str_replace(',', ',', $tags));
foreach($tag_arr as $v){
foreach($arr as $a){
if(strpos($a, strtolower($v)) !== false){
return true;
}
}
}
return false;
}
function msg_in($prod_id, $uuid, $msg_all){
foreach($msg_all as $v){
if ($v['prod_id'] == $prod_id && $v['uuid'] == $uuid)
return true;
}
return false;
}
更新: 非常感谢。 是的,数据是在MySQL中,以下是主要结构:
-- user settings to filter prods, 5 max per user
CREATE TABLE setting(
id INT NOT NULL AUTO_INCREMENT,
uuid VARCHAR(100) NOT NULL DEFAULT '',
tags VARCHAR(100) NOT NULL DEFAULT '',
site_id SMALLINT UNSIGNED NOT NULL DEFAULT 0,
city_id MEDIUMINT UNSIGNED NOT NULL DEFAULT 0,
cate_id MEDIUMINT UNSIGNED NOT NULL DEFAULT 0,
addtime INT UNSIGNED NOT NULL DEFAULT 0,
PRIMARY KEY (`id`),
KEY `idx_setting_uuid` (`uuid`),
KEY `idx_setting_tags` (`tags`),
KEY `idx_setting_city_id` (`city_id`),
KEY `idx_setting_cate_id` (`cate_id`)
) DEFAULT CHARSET=utf8;
CREATE TABLE users(
id INT NOT NULL AUTO_INCREMENT,
uuid VARCHAR(100) NOT NULL DEFAULT '',
PRIMARY KEY (`id`),
UNIQUE KEY `idx_unique_uuid` (`uuid`)
) DEFAULT CHARSET=utf8;
-- filtered prods
CREATE TABLE msg_list(
id INT NOT NULL AUTO_INCREMENT,
uuid VARCHAR(100) NOT NULL DEFAULT '',
prod_id INT UNSIGNED NOT NULL DEFAULT 0,
msg TEXT NOT NULL DEFAULT '',
PRIMARY KEY (`id`),
KEY `idx_ml_uuid` (`uuid`)
) DEFAULT CHARSET=utf8;
-- prods and prod_cate_ref table in another database, so can not join it
CREATE TABLE prod(
id INT NOT NULL AUTO_INCREMENT,
website VARCHAR(100) NOT NULL DEFAULT '' COMMENT ' site name ',
site_id MEDIUMINT UNSIGNED NOT NULL DEFAULT 0,
city_id MEDIUMINT UNSIGNED NOT NULL DEFAULT 0,
title VARCHAR(50) NOT NULL DEFAULT '',
tags VARCHAR(50) NOT NULL DEFAULT '',
detail VARCHAR(500) NOT NULL DEFAULT '',
shop VARCHAR(300) NOT NULL DEFAULT '',
PRIMARY KEY (`id`),
KEY `idx_prod_tags` (`tags`),
KEY `idx_prod_site_id` (`site_id`),
KEY `idx_prod_city_id` (`city_id`),
KEY `idx_prod_mix` (`site_id`,`city_id`,`tags`)
) DEFAULT CHARSET=utf8;
CREATE TABLE prod_cate_ref(
id MEDIUMINT NOT NULL AUTO_INCREMENT,
prod_id INT NOT NULL NULL DEFAULT 0,
cate_id MEDIUMINT NOT NULL NULL DEFAULT 0,
PRIMARY KEY (`id`),
KEY `idx_pcr_mix` (`prod_id`,`cate_id`)
) DEFAULT CHARSET=utf8;
-- ENGINE all is myisam
我不知道如何只使用一个SQL来获取所有。
我假设你从数据库中获取这个数据。我还假设你正在使用SQL数据库。那为什么不使用'JOINs'和'WHERE'子句呢? – NullUserException
你从哪里得到设置?数据库? – svick
这是一个很重要的数据存储在PHP数组中。这是从数据库或其他东西出来的吗? –