|
Naldonado 发表于 2014-3-19 23:18 ![]()
把你的方法拿出来看看。
我的方法就是根据每个客户id做一次递归,然后找出同组的客户
id集合,然后再对这些集合进行去重:
--疑似相似客户表创建- create type varchartable is table of varchar2(4000);
- /
- create table retval_tmp(id int,ref_var varchartable)
- nested table
- ref_var
- store as
- next_ref_var;
- /
复制代码 --集合转hash函数创建
注:由于utl_raw.cast_to_raw函数的限制,导致listagg拼出的
字符长度不能超过1500字节。- create or replace function coll2hash(in_coll in varchartable)
- return varchar2 as
- v_retval varchar2(3000);
- begin
- select dbms_crypto.hash(src => utl_raw.cast_to_raw(listagg(column_value,
- ',') within
- group(order by null)),
- typ => dbms_crypto.HASH_MD5)
- into v_retval
- from table(in_coll);
- return v_retval;
- end;
- /
复制代码 --每个id进行循环递归- declare
- v_cur_varchar varcharTable;
- begin
- execute immediate 'truncate table retval_tmp';
- for loop_idx in (select id from cus_tab) loop
-
- with t as
- (select /*+materialize*/
- id, phone_num
- from phone_tab),
-
- t2 as
- (select distinct t1.id
- from t t1
- start with t1.id = loop_idx.id
- connect by nocycle prior t1.id = t1.id
- or prior t1.phone_num = t1.phone_num)
- select id bulk collect into v_cur_varchar from t2;
-
- insert into retval_tmp values (loop_idx.id, v_cur_varchar);
- commit;
- end loop;
- end;
- /
复制代码 --集合去重- with t as
- (select t1.*,
- row_number() over(partition by coll2hash(t1.ref_var) order by id) rn
- from retval_tmp t1)
- select ref_var from t where rn = 1;
复制代码 |
|