|
Oracle 版本:
/*
drop table cust;
drop table phone;
create table cust(id int primary key,name varchar2(30),gid int);
insert into cust(id,name) values(1,'A');
insert into cust(id,name) values(2,'B');
insert into cust(id,name) values(3,'C');
insert into cust(id,name) values(4,'D');
insert into cust(id,name) values(5,'E');
create table phone(id int, phone_num varchar2(40));
insert into phone values(1,3);
insert into phone values(1,4);
insert into phone values(2,4);
insert into phone values(3,6);
insert into phone values(2,6);
insert into phone values(4,1);
insert into phone values(5,1);
-- large test
delete from cust;
delete from phone;
declare
N int := 1000;
M int := 2;
L int := 500;
begin
insert into cust (id, name) SELECT LEVEL, 'cust' ||LEVEL FROM DUAL CONNECT BY LEVEL<=N;
insert into phone (id, phone_num)
SELECT TRUNC(dbms_random.value(1,N+1)),num
FROM (SELECT LEVEL FROM DUAL CONNECT BY LEVEL<=M),(SELECT LEVEL num FROM DUAL CONNECT BY LEVEL<L);
end;
/
select * from cust;
select * from phone order by phone_num;
*/
------下面代码被我改写过,三个雷同的地方抽出来变成了子函数f_find_parent
DECLARE
cust_count int;
TYPE t_num IS TABLE OF NUMBER INDEX BY PLS_INTEGER;
parents t_num;
ranks t_num;
lv_id t_num;
parentX int;
parentY int;
rankX int;
rankY int;
last_phone int;
FUNCTION f_find_parent(p_cur_cust IN INTEGER) RETURN INTEGER
AS
stack t_num;
stack_cnt int;
cur_cust int := p_cur_cust;
parent int;
BEGIN
stack_cnt :=0;
LOOP
parent := parents(cur_cust);
EXIT WHEN parent = cur_cust ;
stack_cnt := stack_cnt+1;
stack(stack_cnt):=cur_cust;
cur_cust := parent;
end LOOP;
FOR i IN 1..stack_cnt LOOP
parents(stack(i)) := parent;
END LOOP;
RETURN parent;
END f_find_parent;
BEGIN
SELECT count(*) INTO cust_count from cust;
FOR i IN 1..cust_count LOOP
parents(i) := i;
ranks(i) := 0;
END LOOP;
lv_id := parents;
last_phone :=0;
-- union
FOR cur IN (select id, phone_num from phone order by phone_num, id) LOOP
if cur.phone_num <> last_phone then
-- find parent for x
parentX := f_find_parent(cur.id);
rankX := ranks(parentX);
last_phone := cur.phone_num;
CONTINUE;
END IF;
-- find parent for y
parentY := f_find_parent(cur.id);
rankY := ranks(parentY);
CASE
WHEN parentX = parentY THEN
continue;
WHEN rankX < rankY THEN
parents(parentX) := parentY;
WHEN rankX > rankY THEN
parents(parentY) := parentX;
WHEN rankX = rankY THEN
parents(parentY) := parentX;
ranks(parentX) := rankX;
END CASE;
END LOOP;
-- find for each cust
FOR i IN 1..cust_count LOOP
parentY := f_find_parent(i);
END LOOP;
FORALL i IN 1..cust_count
UPDATE cust SET gid=parents(i) WHERE id=lv_id(i);
END;
/
该算法的限制是:假定客户号是从1开始的连续整数,这个很容易改写;
另外要求在内存里分配几个大数组,楼主的客户表如果大了很容易撑爆。 |
|