見出し画像

SQL 再帰クエリ

┃使い方

うまいことデータを用意できれば、loop処理の代用になる。

┃Ⅰ. 統合元の抽出

1)統合データを用意する

DROP TABLE IF EXISTS TEMP_DATA;
CREATE TEMP TABLE TEMP_DATA AS (

   /*
    ・C000、C002、C005は統合されてないID
    ・C001はC002に統合されたID
    ・C003はC004に、さらにC004はC005に統合されたID
   */
   SELECT 'C000' AS ID, NULL   AS NEXT_ID UNION
   SELECT 'C001' AS ID, 'C002' AS NEXT_ID UNION
   SELECT 'C002' AS ID, NULL   AS NEXT_ID UNION
   SELECT 'C003' AS ID, 'C004' AS NEXT_ID UNION
   SELECT 'C004' AS ID, 'C005' AS NEXT_ID UNION
   SELECT 'C005' AS ID, NULL   AS NEXT_ID

);

2)再帰クエリで統合元のIDを探索する

WITH RECURSIVE TEMP_CTE (ORG_ID, ID, NEXT_ID, SRCH) AS (

   SELECT
       T1.ID AS ORG_ID
       , T1.ID
       , T1.NEXT_ID
       , 1 AS SRCH
   FROM
       TEMP_DATA T1
   WHERE
       T1.NEXT_ID IS NULL
   
   UNION
   
   SELECT
       T1.ORG_ID
       , T2.ID
       , T2.NEXT_ID
       , T1.SRCH + 1 AS SRCH
   FROM
       TEMP_CTE T1
       , TEMP_DATA T2
   WHERE
       T1.ID = T2.NEXT_ID

)
, TEMP_SRCH_RSLT AS (

   SELECT DISTINCT
       T1.ORG_ID
       , T1.ID
       , T1.NEXT_ID
       , T1.SRCH
       , MAX(T1.SRCH) OVER (PARTITION BY T1.ORG_ID) AS MAX_SRCH
   FROM
       TEMP_CTE T1

)
SELECT
   T1.ORG_ID
   , T1.ID
FROM
   TEMP_SRCH_RSLT T1
WHERE
   T1.SRCH = T1.MAX_SRCH
ORDER BY
   T1.ORG_ID
;

3)結果

org_id	id
-----------------
C000	C000
C002	C001
C005	C003

┃Ⅱ. n日間隔の抽出

1)時系列データを用意する

DROP TABLE IF EXISTS TEMP_YMD;
CREATE TEMP TABLE TEMP_YMD AS (

    SELECT  20210901 AS YMD UNION
    SELECT  20210907 AS YMD UNION
    SELECT  20210908 AS YMD UNION
    SELECT  20210913 AS YMD UNION
    SELECT  20210914 AS YMD UNION
    SELECT  20210918 AS YMD UNION
    SELECT  20210925 AS YMD UNION
    SELECT  20210926 AS YMD UNION
    SELECT  20210928 AS YMD UNION
    SELECT  20210930 AS YMD

);

2)各データの日付からn日間隔の最小を導出する

DROP TABLE IF EXISTS TEMP_YMD_WITH_NEXT;
CREATE TEMP TABLE TEMP_YMD_WITH_NEXT AS (

   SELECT
       T1.YMD
       , MIN(T2.YMD) AS NEXT_YMD
   FROM
       TEMP_YMD T1
       
       LEFT OUTER JOIN
           TEMP_YMD T2
           ON
               TO_DATE(CAST(T1.YMD AS VARCHAR), 'YYYYMMDD') + 4
                    < TO_DATE(CAST(T2.YMD AS VARCHAR), 'YYYYMMDD')
   GROUP BY
       T1.YMD
);

3)再帰クエリで初回と初回以降のデータを抽出する

WITH RECURSIVE TEMP_CTE (YMD, NEXT_YMD) AS (

   -- 初回
   SELECT
       T1.YMD
       , T1.NEXT_YMD
   FROM
       TEMP_YMD_WITH_NEXT T1
   WHERE
       T1.YMD = 20210901
   
   UNION 
   
   -- 初回以降
   SELECT
       T2.YMD
       , T2.NEXT_YMD
   FROM
       TEMP_CTE T1
       , TEMP_YMD_WITH_NEXT T2
   WHERE
       T1.NEXT_YMD = T2.YMD

)
SELECT DISTINCT
   T1.YMD
FROM
   TEMP_CTE T1
ORDER BY
   T1.YMD
;

4)結果

ymd
---------
20210901
20210907
20210913
20210918
20210925
20210930




この記事が気に入ったらサポートをしてみませんか?