Let's create a simple database structure as follows. We can use a tool like SQL Fiddle to run queries and see how it works. Make sure you have selected Oracle 11g R2 as DB type before running any queries.
CREATE TABLE STUDENT (
STUDENT_ID VARCHAR2(8),
STUDENT_NAME VARCHAR2(50) NOT NULL,
SEX VARCHAR2(1) NOT NULL,
ADDRESS VARCHAR2(200),
CREATED_DATE DATE DEFAULT SYSDATE NOT NULL,
MODIFIED_DATE DATE DEFAULT SYSDATE NOT NULL,
PRIMARY KEY (STUDENT_ID)
);
CREATE TABLE SEMISTER_GPA(
STUDENT_ID VARCHAR2(8),
SEMISTER_ID VARCHAR2(4),
GPA NUMBER(5,4),
PRIMARY KEY (STUDENT_ID,SEMISTER_ID)
);
Adding some values to above tables.
INSERT INTO STUDENT(STUDENT_ID,STUDENT_NAME,SEX,ADDRESS,CREATED_DATE,MODIFIED_DATE) VALUES('ST000001','JOHN','M','No.112, Prakstreet, Colombo 07',SYSDATE,SYSDATE);
INSERT INTO STUDENT(STUDENT_ID,STUDENT_NAME,SEX,ADDRESS,CREATED_DATE,MODIFIED_DATE) VALUES('ST000002','SAM','M','No.15, Main Street, Colombo 04',SYSDATE,SYSDATE+1);
INSERT INTO STUDENT(STUDENT_ID,STUDENT_NAME,SEX,ADDRESS,CREATED_DATE,MODIFIED_DATE) VALUES('ST000003','TINA','F','No.225, Parstreet Mews, Colombo 05',SYSDATE,SYSDATE+2);
INSERT INTO SEMISTER_GPA(STUDENT_ID,SEMISTER_ID,GPA) VALUES ('ST000001','L1S1',3.5245);
INSERT INTO SEMISTER_GPA(STUDENT_ID,SEMISTER_ID,GPA) VALUES ('ST000002','L1S1',3.6100);
INSERT INTO SEMISTER_GPA(STUDENT_ID,SEMISTER_ID,GPA) VALUES ('ST000003','L1S1',3.0589);
INSERT INTO SEMISTER_GPA(STUDENT_ID,SEMISTER_ID,GPA) VALUES ('ST000001','L1S2',3.7241);
INSERT INTO SEMISTER_GPA(STUDENT_ID,SEMISTER_ID,GPA) VALUES ('ST000002','L1S3',3.8180);
INSERT INTO SEMISTER_GPA(STUDENT_ID,SEMISTER_ID,GPA) VALUES ('ST000003','L1S2',3.2589);
INSERT INTO SEMISTER_GPA(STUDENT_ID,SEMISTER_ID,GPA) VALUES ('ST000002','L1S4',3.8675);
INSERT INTO SEMISTER_GPA(STUDENT_ID,SEMISTER_ID,GPA) VALUES ('ST000002','L2S1',3.2369);
INSERT INTO SEMISTER_GPA(STUDENT_ID,SEMISTER_ID,GPA) VALUES ('ST000003','L1S4',3.6523);
INSERT INTO SEMISTER_GPA(STUDENT_ID,SEMISTER_ID,GPA) VALUES ('ST000001','L1S4',3.2675);
INSERT INTO SEMISTER_GPA(STUDENT_ID,SEMISTER_ID,GPA) VALUES ('ST000003','L2S1',3.4389);
INSERT INTO SEMISTER_GPA(STUDENT_ID,SEMISTER_ID,GPA) VALUES ('ST000003','L2S2',3.0523);
See in SQL Fiddle.
01. Simple grouping: Get the no. of semesters each student has completed.
SELECT STUDENT_ID,COUNT(*) FROM SEMISTER_GPA GROUP BY STUDENT_ID;
02. DENSE_RANK()and PARTITION BY
'PARTITION BY' is used partition a table into different segments which makes easier to manipulate each segment individually. Here 'SEMISTER_GPA'table can be partition by STUDENT_ID. In order to get the row which has the highest 'GPA' for each student, each segment is ordered by 'GPA' in descending order. Then the row which has the DENSE_RANK()=1 gives the expected result.
SELECT DT.STUDENT_ID,DT.SEMISTER_ID, DT.GPA FROM (
SELECT STUDENT_ID,SEMISTER_ID,GPA, DENSE_RANK() OVER (PARTITION BY STUDENT_ID ORDER BY GPA DESC) RNK FROM SEMISTER_GPA
) DT WHERE (DT.RNK = 1 )
03. LISTAGG () function
LISTAGG () function gives a single row, concatenated using specified column within each group ordered. To get the list of semesters each student has completed, we can group table using STUDENT_ID order each group by GPA and concatenate column values using above function.
SELECT STUDENT_ID, LISTAGG(SEMISTER_ID,',') WITHIN GROUP (ORDER BY SEMISTER_ID) AS SEMISTER_LIST FROM SEMISTER_GPA GROUP BY STUDENT_ID
Below mentioned is another sample data set. Timezone data is stored in two tables TIME_ZONE and TIMEZONE_DESCRIPTIONS. In order to migrate data into a single denormalized table 'MERGE INTO .. USING.. ' statement can be used. Merging can handle insert or update unconditionally without knowing it's presence in target table. This is a more flexible way of moving large amounts of data extracted from one or more tables. See in SQL Fiddle.
CREATE TABLE TIME_ZONE (
TIMEZONE_ID NUMBER(3) NOT NULL,
TIMEZONE_OFFSET VARCHAR2(10),
MODIFIED_DATE DATE DEFAULT SYSDATE NOT NULL,
PRIMARY KEY (TIMEZONE_ID)
);
CREATE TABLE TIMEZONE_DESCRIPTIONS (
TIMEZONE_ID NUMBER(3) NOT NULL,
LANGUAGE_ID VARCHAR2(2) DEFAULT 'EN' NOT NULL,
DESCRIPTION NVARCHAR2(100),
MODIFIED_DATE DATE DEFAULT SYSDATE NOT NULL,
PRIMARY KEY (TIMEZONE_ID,LANGUAGE_ID)
);
Insert into TIME_ZONE (TIMEZONE_ID,TIMEZONE_OFFSET,MODIFIED_DATE) values (1,'+0300',SYSDATE);
Insert into TIME_ZONE (TIMEZONE_ID,TIMEZONE_OFFSET,MODIFIED_DATE) values (2,'+0200',SYSDATE);
Insert into TIME_ZONE (TIMEZONE_ID,TIMEZONE_OFFSET,MODIFIED_DATE) values (3,'+0400',SYSDATE);
Insert into TIME_ZONE (TIMEZONE_ID,TIMEZONE_OFFSET,MODIFIED_DATE) values (5,'-0400',SYSDATE);
Insert into TIME_ZONE (TIMEZONE_ID,TIMEZONE_OFFSET,MODIFIED_DATE) values (6,'+0400',SYSDATE);
Insert into TIMEZONE_DESCRIPTIONS (TIMEZONE_ID,LANGUAGE_ID,DESCRIPTION,MODIFIED_DATE) values (1,'EN','Saudi Arabia',SYSDATE);
Insert into TIMEZONE_DESCRIPTIONS (TIMEZONE_ID,LANGUAGE_ID,DESCRIPTION,MODIFIED_DATE) values (2,'EN','Egypt',SYSDATE);
Insert into TIMEZONE_DESCRIPTIONS (TIMEZONE_ID,LANGUAGE_ID,DESCRIPTION,MODIFIED_DATE) values (3,'EN','UAE',SYSDATE);
Insert into TIMEZONE_DESCRIPTIONS (TIMEZONE_ID,LANGUAGE_ID,DESCRIPTION,MODIFIED_DATE) values (5,'EN','USA',SYSDATE);
Insert into TIMEZONE_DESCRIPTIONS (TIMEZONE_ID,LANGUAGE_ID,DESCRIPTION,MODIFIED_DATE) values (6,'EN','Oman',SYSDATE);
Insert into TIMEZONE_DESCRIPTIONS (TIMEZONE_ID,LANGUAGE_ID,DESCRIPTION,MODIFIED_DATE) values (1,'FR','Arabie Saoudite',SYSDATE);
Insert into TIMEZONE_DESCRIPTIONS (TIMEZONE_ID,LANGUAGE_ID,DESCRIPTION,MODIFIED_DATE) values (2,'FR','Egypte',SYSDATE);
Insert into TIMEZONE_DESCRIPTIONS (TIMEZONE_ID,LANGUAGE_ID,DESCRIPTION,MODIFIED_DATE) values (3,'FR','Émirats arabes unis',SYSDATE);
Insert into TIMEZONE_DESCRIPTIONS (TIMEZONE_ID,LANGUAGE_ID,DESCRIPTION,MODIFIED_DATE) values (5,'FR','USA',SYSDATE);
Insert into TIMEZONE_DESCRIPTIONS (TIMEZONE_ID,LANGUAGE_ID,DESCRIPTION,MODIFIED_DATE) values (6,'FR','Oman',SYSDATE);
Denormalized table structure.
CREATE TABLE TIME_ZONE_MASTER (
TIMEZONE_ID NUMBER(3) NOT NULL,
TIMEZONE_OFFSET VARCHAR2(10),
DESCRIPTION NVARCHAR2(100),
MODIFIED_DATE DATE DEFAULT SYSDATE NOT NULL,
PRIMARY KEY (TIMEZONE_ID)
);
Select data set from TIMEZONE_DESCRIPTIONS table to join with TIME_ZONE table. In the target table, language wise descriptions will be stored in the same row according to a predefined format.
SELECT X.TIMEZONE_ID,
MAX(X.MODIFIED_DATE) AS MODIFIED_DATE,
'EN:'
|| MAX(DECODE(X.LANGUAGE_ID, 'EN', X.DESCRIPTION))
|| ',FR:'
|| MAX(DECODE(X.LANGUAGE_ID, 'FR', X.DESCRIPTION)) TIME_ZONE_DESC
FROM (
SELECT TZD.TIMEZONE_ID, TZD.LANGUAGE_ID, TZD.DESCRIPTION,TZD.MODIFIED_DATE FROM TIMEZONE_DESCRIPTIONS TZD
) X GROUP BY X.TIMEZONE_ID;
To get the full select statement, join with TIME_ZONE table using TIMEZONE_ID.
SELECT tz.TIMEZONE_ID,
tz.TIMEZONE_OFFSET,
v.TIME_ZONE_DESC,
GREATEST(v.MODIFIED_DATE,tz.MODIFIED_DATE) AS MODIFIED_DATE
FROM TIME_ZONE tz
LEFT OUTER JOIN
(SELECT x.TIMEZONE_ID,
MAX(x.MODIFIED_DATE) AS MODIFIED_DATE,
'EN:'
|| MAX(DECODE(x.language_id, 'EN', x.DESCRIPTION))
|| ',FR:'
|| MAX(DECODE(x.language_id, 'FR', x.DESCRIPTION)) TIME_ZONE_DESC
FROM
(SELECT tzd.TIMEZONE_ID,
tzd.LANGUAGE_ID,
tzd.DESCRIPTION,
tzd.MODIFIED_DATE
FROM TIMEZONE_DESCRIPTIONS tzd
) x
GROUP BY x.TIMEZONE_ID
) v
ON (tz.TIMEZONE_ID = v.TIMEZONE_ID);
Merge statement syntax,
MERGE INTO TARET_TABLE USING
(SELECT
)ON (JOIN)
WHEN MATCHED THEN UPDATE
SET
WHEN NOT MATCHED THEN
INSERT
()
VALUES
();
Finally, we can create the full merge statement to update TIME_ZONE_MASTER as follows.
MERGE INTO TIME_ZONE_MASTER t USING
(SELECT tz.TIMEZONE_ID,
tz.TIMEZONE_OFFSET,
v.TIME_ZONE_DESC,
GREATEST(v.MODIFIED_DATE,tz.MODIFIED_DATE) AS MODIFIED_DATE
FROM TIME_ZONE tz
LEFT OUTER JOIN
(SELECT x.TIMEZONE_ID,
MAX(x.MODIFIED_DATE) AS MODIFIED_DATE,
'EN:'
|| MAX(DECODE(x.language_id, 'EN', x.DESCRIPTION))
|| ',FR:'
|| MAX(DECODE(x.language_id, 'FR', x.DESCRIPTION)) TIME_ZONE_DESC
FROM
(SELECT tzd.TIMEZONE_ID,
tzd.LANGUAGE_ID,
tzd.DESCRIPTION,
tzd.MODIFIED_DATE
FROM TIMEZONE_DESCRIPTIONS tzd
) x
GROUP BY x.TIMEZONE_ID
) v
ON (tz.TIMEZONE_ID = v.TIMEZONE_ID)
) td ON (t.TIMEZONE_ID = td.TIMEZONE_ID)
WHEN MATCHED THEN UPDATE
SET t.DESCRIPTION = td.TIME_ZONE_DESC,
t.TIMEZONE_OFFSET = td.TIMEZONE_OFFSET,
t.MODIFIED_DATE = td.MODIFIED_DATE
WHEN NOT MATCHED THEN
INSERT
(
TIMEZONE_ID,
TIMEZONE_OFFSET,
DESCRIPTION,
MODIFIED_DATE
)
VALUES
(
td.TIMEZONE_ID,
td.TIMEZONE_OFFSET,
td.TIME_ZONE_DESC,
td.MODIFIED_DATE
);
Final outcome would be like this.
Thoughts are welcome...