1. Presto Quick References

Create Table … AS SELECT

CREATE TABLE IF NOT EXISTS schema.table_name
    WITH (
        format = 'parquet', 
        external_location = 's3://bucket/location/v0',
        partitioned_by = ARRAY['created_date']
      )
AS
    WITH temp_table AS (
        SELECT user_id, created_date
        FROM some.table_name_haha
      )

SELECT * 
FROM temp_table

2. INSERT INTO … SELECT …

INSERT INTO schema.table_name
WITH temp_table AS (
    SELECT user_id, created_date
    FROM some.table_name_haha
)

SELECT *
FROM temp_table

2. Hive Quick References

Create External Table

CREATE EXTERNAL TABLE IF NOT EXISTS haha.created_table
(
    id      bigint, 
    name    string,
    age     int, 
    married boolean,
    score   float
)
    PARTITIONED BY (dt string)
    STORED AS PARQUET
    LOCATION 's3://bucket-name/parquet/location'

INSERT OVERWRITE TABLE

위에서 만든 테이블에 데이터를 넣으려면 다음과 같이 합니다.
WITH clause 는 때에 따라서 써도 되고 빼도 됩니다.

SET hive.mapred.mode = nonstrict;
SET hive.exec.dynamic.partition = true;
SET hive.exec.dynamic.partition.mode = nonstrict;
SET hive.exec.parallel = true;
SET hive.exec.reducers.max = 1;
SET fs.s3.consistent.throwExceptionOnInconsistency=false;
SET fs.s3.consistent=false;

WITH temp_table1 AS
    (SELECT id,
            married
     FROM <some_db>.<table_name>),
WITH temp_table2 AS
    (SELECT id,
            score
     FROM <some_db>.<table_name>)

INSERT OVERWRITE TABLE haha.created_table PARTITION (dt)
SELECT a.id, 
       a.name, 
       a.age,
       b.married, 
       c.score
FROM <db>.<table_name> 
    JOIN temp_table1 b on a.id = b.id
    JOIN temp_table2 c on a.id = c.id;

실행하고 나서 s3 를 보면.. temporary directory 가 만들어짐..
예를 들어서 .hive-staging_hive_2023-02-01_12-34-45_1234567890-12345 이런 temporary 디렉토리가 만들어지고..
완료가 되면 dt=20230201 같은 디렉토리로 변경이 됨.