|
|
http://oraqa.com/2007/06/24/how- ... in-a-sql-statement/
The following SQL pattern can be used to find matched rows between 2 tables based on required matching criteria in a SQL statement.
把两张表的数据一一配对,条件是t2_compare BETWEEN t1_compare-2 AND t1_compare+2, 并且每一行最多能和一行匹配。
要求匹配对数最多的配对方式。
Here are the requirements.
1) matching rows in t1 to t2
(t2_compare BETWEEN t1_compare-2 AND t1_compare+2 )
2) match to a t2 row at most once, once a row is matched, it can't be used again.
3) The goal is to match as many pairs as possible
create table t1 ( pk integer primary key, t1_compare integer, t1_data varchar2(10));
create table t2 ( pk integer primary key, t2_compare integer, t2_data varchar2(10));
insert into t1 values ( 1, 10, 'T1 ROW1' );
insert into t1 values ( 2, 12, 'T1 ROW2' );
insert into t1 values ( 3, 14, 'T1 ROW3' );
insert into t1 values ( 4, 16, 'T1 ROW4' );
insert into t1 values ( 5, 18, 'T1 ROW5' );
insert into t1 values ( 6, 19, 'T1 ROW6' );
insert into t1 values ( 7, 128, 'T1 ROW8');
insert into t2 values ( 1, 11, 'T2 ROW1' );
insert into t2 values ( 2, 12, 'T2 ROW2' );
insert into t2 values ( 3, 14, 'T2 ROW3' );
insert into t2 values ( 4, 15, 'T2 ROW4' );
insert into t2 values ( 5, 19, 'T2 ROW5' );
insert into t2 values ( 6, 19, 'T2 ROW6' );
insert into t2 values ( 8, 28, 'T2 ROW18');
insert into t2 values ( 9, 68, 'T2 ROW28');
SQL> select * from t1;
PK T1_COMPARE T1_DATA
---------- ---------- ----------
1 10 T1 ROW1
2 12 T1 ROW2
3 14 T1 ROW3
4 16 T1 ROW4
5 18 T1 ROW5
6 19 T1 ROW6
7 128 T1 ROW8
7 rows selected.
SQL> select * from t2;
PK T2_COMPARE T2_DATA
---------- ---------- ----------
1 11 T2 ROW1
2 12 T2 ROW2
3 14 T2 ROW3
4 15 T2 ROW4
5 19 T2 ROW5
6 19 T2 ROW6
8 28 T2 ROW18
9 68 T2 ROW28
8 rows selected.
SQL> COLUMN t2_pk FORMAT A8
SQL> COLUMN unmatched_t2 FORMAT A8
SQL> COLUMN STATUS FORMAT A10
SELECT t1_pk, match as t2_pk, tmp_str as unmatched_t2,
CASE WHEN t1_pk IS NULL
THEN 'NO T1'
WHEN match IS NULL
THEN 'NO T2'
ELSE 'MATCHED'
END AS STATUS
FROM
(SELECT t1.pk t1_pk, row_number() OVER (ORDER BY t1.pk) position,
ltrim(regexp_replace(XMLAgg(XMLElement(x,t2.pk)
order by t2.pk),'<X>|</X><X>|</X>',','),',') str,
CASE WHEN count(t2.pk) != 0 AND t1.pk IS NOT NULL
THEN count(t2.pk)
ELSE NULL
END ct ,
count(t1.pk) OVER ( ) counter
FROM t1 FULL OUTER JOIN t2
ON ( t2_compare BETWEEN t1_compare-2 AND t1_compare+2 )
GROUP BY t1.pk
)
MODEL
DIMENSION BY (position)
MEASURES (t1_pk, str, ct, str as tmp_str, CAST(NULL AS NUMBER) min_tmp,
counter, ct ct_tmp, CAST(NULL AS VARCHAR2(4000)) match,
CAST(NULL AS NUMBER) dup, CAST(NULL AS NUMBER) min_dup,
CAST(NULL AS VARCHAR2(4000)) tmp_ch ,
CAST(NULL AS VARCHAR2(4000)) tmp_cp
)
RULES
ITERATE (1000000) UNTIL (ITERATION_NUMBER>= counter[1])
(
min_tmp[ANY] = min(ct_tmp)[ANY],
dup[ANY] = CASE WHEN ct_tmp[CV()] = min_tmp[1]
THEN t1_pk[CV()]
ELSE NULL
END,
-----------------------------------------------------------------------
min_dup[ANY] = min(dup)[ANY],
tmp_ch[ANY] = CASE WHEN t1_pk[CV()] = min_dup[1]
THEN substr(tmp_str[CV()],0,
instr(tmp_str[CV()],',',1)-1)
END,
-----------------------------------------------------------------------
tmp_cp[ANY] = min(tmp_ch)[ANY],
-----------------------------------------------------------------------
match[ANY] = CASE WHEN t1_pk[CV()] = min_dup[1] THEN tmp_cp[CV()]
ELSE match[CV()]
END,
-----------------------------------------------------------------------
tmp_str[ANY]=CASE WHEN CT[CV()] IS NOT NULL
THEN REGEXP_REPLACE(ltrim(REPLACE(
tmp_str[CV()],tmp_cp[CV()] ),','),'[,]+',',')
ELSE tmp_str[CV()]
END,
-----------------------------------------------------------------------
ct_tmp[ANY] = CASE WHEN match[CV()] IS NULL AND t1_pk[CV()] IS NOT NULL
THEN LENGTH(tmp_str[CV()])-
LENGTH(REPLACE(tmp_str[CV()],','))
ELSE NULL
END
);
T1_PK T2_PK UNMATCHE STATUS
---------- -------- -------- ----------
1 1 MATCHED
2 2 MATCHED
3 3 MATCHED
4 4 MATCHED
5 5 MATCHED
6 6 MATCHED
7 NO T2
8,9, NO T1
8 rows selected.
11GR2, 用强大的递归WITH:
WITH d AS (SELECT t1.pk pk1,t2.pk pk2,DENSE_RANK() OVER (ORDER BY t1.pk) rn,COUNT(DISTINCT t1.pk) OVER() cnt
FROM t1,t2
WHERE t2_compare BETWEEN t1_compare-2 AND t1_compare+2
)
, m AS (SELECT pk1,pk2,rn,cnt FROM d
UNION ALL
SELECT DISTINCT pk1,NULL,rn,cnt FROM d
)
,t(rn,match_path,match_cnt,cnt) AS (
SELECT rn,TO_CHAR(pk1)||'-'||TO_CHAR(pk2),(CASE WHEN m.pk2 IS NULL THEN 0 ELSE 1 END),cnt
FROM m
WHERE rn=1
UNION ALL
SELECT m.rn,t.match_path||','||m.pk1||'-'||m.pk2,t.match_cnt + (CASE WHEN m.pk2 IS NULL THEN 0 ELSE 1 END),t.cnt
FROM t,m
WHERE t.rn<t.cnt
AND t.rn+1=m.rn
AND INSTR(match_path||',','-'||m.pk2||',')=0
)
SELECT match_path FROM (SELECT t.*,RANK() OVER(ORDER BY match_cnt DESC) rnk FROM T ) WHERE rnk=1;
MATCH_PATH
--------------------------------
1-2,2-1,3-4,4-3,5-6,6-5
1-1,2-3,3-2,4-4,5-5,6-6
1-1,2-2,3-3,4-4,5-5,6-6
1-2,2-1,3-3,4-4,5-5,6-6
1-1,2-2,3-4,4-3,5-5,6-6
1-2,2-1,3-4,4-3,5-5,6-6
1-1,2-3,3-2,4-4,5-6,6-5
1-1,2-2,3-3,4-4,5-6,6-5
1-2,2-1,3-3,4-4,5-6,6-5
1-1,2-2,3-4,4-3,5-6,6-5
10 rows selected. |
|