/// BANGBOO BLOG ///
/// BANGBOO BLOG ///
February 2, 2021 12:00 AM
BigQuery part2
■BQタイムアウトは6時間■マテリアライズドビュー実体データを保持しリフレッシュ更新で早いため集計等に向くベーステーブルは一つ、カウントができない、使用できない関数がある等の制約があるまたマテビューはビューを元に作成できずテーブルからである必要があるストレージコストは掛かるが、通常ビューで時間掛かる計算を頻繁にする場合は早く安くなる可能性があるBigQueryのMaterialized Viewを使う #データ分析 - Qiita■BQ同時実行数オンデマンドでは使用可能なスロット数に基づき自動的に同時実行数が決定され超えるとスロットに 空きがでるまでキューに保管されるプロジェクトごとにクエリの最大同時実行数は動的に決まる同時実行数の最大値は指定できるが、大きくしても実行数が増えることはなく、あくまで自動決定 内の方が優先されるEditionsが割り当てられているプロジェクトでは最大同時実行…
コメント (0)

February 2, 2021 01:00 AM
BigQuery
■Big queryリファレンス標準SQLとレガシーSQLがある、違いは?標準 SQL のクエリ構文  |  BigQuery  |  Google Cloud標準 SQL への移行  |  BigQuery  |  Google Cloud標準 SQL のデータ型  |  BigQuery  |  Google Cloudレガシー SQL 関数と演算子  |  BigQuery  |  Google Cloudレガシー SQL のデータ型  |  BigQuery  |  Google CloudBigQuery: クラウド データ ウェアハウス  |  Google C…
///BigQuery
■nullが最小か調べる(BigQueryではnullが最小値)
select * from bangboo_data.x_data order by value

■NULLの件数を事前に数えておく(nullを0に変換してカウントした)
SELECT *
 FROM (SELECT 'value', count(COALESCE(value,0))  FROM bangboo_data.x_data WHERE value IS NULL)
 union all
 (SELECT 'value2', count(COALESCE(value2,0))  FROM bangboo_data.x_data WHERE value2 IS NULL)
 union all
 (SELECT 'v_no_null', count(COALESCE(v_no_null,0))  FROM bangboo_data.x_data WHERE v_no_null IS NULL)
->4件、4件、0件で正常なのでcount(COALESCE(value,0))条件はis nullを使う

横並びなら、単一テーブルなのでFromはCross joinでOK、SQLは0件のカウントをGroup byできないので↑が安全かも
SELECT count_null_1, count_null_2, count_null_3
 FROM (SELECT count(COALESCE(value,0)) as count_null_1 FROM bangboo_data.x_data WHERE value IS NULL) as t1,
 (SELECT count(COALESCE(value2,0))  as count_null_2 FROM bangboo_data.x_data WHERE value2 IS NULL) as t2,
 (SELECT count(COALESCE(v_no_null,0))  as count_null_3 FROM bangboo_data.x_data WHERE v_no_null IS NULL) as t3

SQLは0件のカウントをGroup byできない(下記SQL駄目、URLを参照し対策を)→カウント以外のカラムを出すには副問い合わせが必要だったりと、、、
SELECT v_no_null, count(COALESCE(v_no_null,0))  as count_null_3 FROM bangboo_data.x_data WHERE v_no_null IS NULL GROUP BY v_no_null
https://www.ksakae1216.com/entry/2017/02/17/063000

■nullと0と''の違い
select * from bangboo_data.x_data where v_no_null = 0
 ->4行でる
select * from bangboo_data.x_data where v_no_null is null
 ->0行
==>0はnullでなくwhere a = 0で検索せよ

select * from bangboo_data.x_purchase where item = ''
 ->2行でる
select * from bangboo_data.x_purchase where item is null
 ->0行
==>''はnullでなくwhere a = ''で検索せよ

■全削除
delete from bangboo_data.x_employee where true
truncate table bangboo_data.x_employee

■テーブル削除
drop table bangboo_data.x_xxx

■DDLとデータ入力
CREATE TABLE IF NOT EXISTS `bangboo_data.x_data` (
  `no` INT64 NOT NULL,
  `value` INT64,
  `value2` INT64,
  `v_no_null` INT64 NOT NULL,
)

INSERT INTO `bangboo_data.x_data` (`no`, `value`, `value2`, `v_no_null`) VALUES
(1, 100, NULL, 0),
(2, NULL, 200, 0),
(3, NULL, 200, 5),
(4, NULL, 100, 10),
(5, NULL, 300, 0),
(6, 200, NULL, 0),
(7, 200, NULL, 5),
(8, 100, NULL, 10);


CREATE TABLE IF NOT EXISTS bangboo_data.x_bill (
  `no` INT64 NOT NULL,
  `user_no` INT64 NOT NULL,
  `total_bill` INT64 NOT NULL,
  `date` DATETIME,
)

INSERT INTO `bangboo_data.x_bill` (`no`, `user_no`, `total_bill`, `date`) VALUES
(1, 1, 1000, '2021-01-01 00:00:00'),
(2, 1, 1500, '2021-01-02 00:00:00'),
(3, 1, 500, '2021-01-03 00:00:00'),
(4, 2, 1100, '2021-01-01 00:00:00'),
(5, 2, 1700, '2021-01-02 00:00:00'),
(6, 2, 800, '2021-01-03 00:00:00'),
(7, 3, 1100, '2021-01-01 00:00:00'),
(8, 3, 1700, '2021-01-02 00:00:00'),
(9, 3, 800, '2021-01-03 00:00:00'),
(10, 3, 800, '2021-01-04 00:00:00');

CREATE TABLE IF NOT EXISTS `bangboo_data.x_compensation` (
  `no` INT64 NOT NULL,
  `allow` STRING
)

INSERT INTO `bangboo_data.x_compensation` (`no`, `allow`) VALUES
(1, '住宅'),
(1, '交通'),
(2, '住宅');

CREATE TABLE IF NOT EXISTS `bangboo_data.x_employee` (
  `no` INT64 NOT NULL,
  `name` STRING
)

INSERT INTO `bangboo_data.x_employee` (`no`, `name`) VALUES
(1, '田中'),
(2, '鈴木'),
(3, '佐藤');

CREATE TABLE IF NOT EXISTS `bangboo_data.x_joblevel` (
  `no` INT64 NOT NULL,
  `res` STRING
)

INSERT INTO `bangboo_data.x_joblevel` (`no`, `res`) VALUES
(1, '部長'),
(1, '課長'),
(2, '平');

CREATE TABLE IF NOT EXISTS `bangboo_data.x_purchase` (
  `no` INT64 NOT NULL,
  `p_no` INT64 NOT NULL,
  `item` STRING,
  `price` INT64 NOT NULL,
  `unit` INT64 NOT NULL,
)

INSERT INTO `bangboo_data.x_purchase` (`no`, `p_no`, `item`, `price`, `unit`) VALUES
(1, 1, 'aaa', 100, 10),
(2, 2, 'aaa', 100, 10),
(3, 2, '', 100, 5),
(4, 3, 'bbb', 100, 5),
(5, 4, 'ccc', 300, 3),
(6, 4, 'aaa', 100, 2),
(7, 5, 'ccc', 300, 5),
(8, 5, 'bbb', 100, 2),
(9, 6, 'aaa', 100, 3),
(10, 6, 'bbb', 100, 2),
(11, 6, 'ccc', 300, 1),
(12, 7, 'aaa', 100, 11),
(13, 8, 'bbb', 100, 17),
(14, 9, 'ccc', 300, 2),
(15, 9, 'aaa', 100, 2),
(16, 10, 'ccc', 300, 2),
(17, 10, '', 100, 2);

■view
CREATE VIEW myProject.myDataset.myView AS SELECT * FROM anotherDataset.myTable;
ちなみにdeclareが入っているSQLはviewにできない

■簡易RFM(recency frequenctry monetary)
SELECT name, MAX(date), count(DISTINCT x_bill.no), SUM(price * unit) FROM bangboo_data.x_employee
 RIGHT JOIN bangboo_data.x_bill ON x_employee.no = x_bill.user_no
 RIGHT JOIN bangboo_data.x_purchase ON x_bill.no = x_purchase.p_no
 GROUP BY name ORDER BY SUM(price * unit) DESC

■RFM
SELECT RANK() OVER (ORDER BY rank_recent+rank_frequent+rank_money) as rank, rank_recent+rank_frequent+rank_money as least_score_wins, t1_r.no as user_no, t1_r.name, recent, frequent, money, rank_recent, rank_frequent, rank_money
FROM (
   SELECT RANK() OVER (ORDER BY recent DESC) as rank_recent, t1.no, t1.name, recent
   FROM (SELECT x_employee.no, x_employee.name, MAX(date) as recent FROM bangboo_data.x_employee RIGHT JOIN bangboo_data.x_bill ON x_employee.no = x_bill.user_no RIGHT JOIN bangboo_data.x_purchase ON x_bill.no = x_purchase.p_no GROUP BY x_employee.no, x_employee.name ORDER BY MAX(date) DESC) as t1
) as t1_r
LEFT OUTER JOIN (
   SELECT RANK() OVER (ORDER BY frequent DESC) as rank_frequent, t2.user_no, t2.name, frequent
   FROM (SELECT x_bill.user_no, x_employee.name, count(distinct x_bill.no) as frequent FROM bangboo_data.x_employee RIGHT JOIN bangboo_data.x_bill ON x_employee.no = x_bill.user_no RIGHT JOIN bangboo_data.x_purchase ON x_bill.no = x_purchase.p_no GROUP BY x_bill.user_no, x_employee.name ORDER BY count(distinct x_bill.no) DESC) as t2
) as t2_f
ON t1_r.no = t2_f.user_no
LEFT OUTER JOIN (
   SELECT RANK() OVER (ORDER BY money DESC) as rank_money, t3.no, t3.name, money
   FROM (SELECT x_employee.no, x_employee.name, SUM(price * unit) as money FROM bangboo_data.x_employee RIGHT JOIN bangboo_data.x_bill ON x_employee.no = x_bill.user_no RIGHT JOIN bangboo_data.x_purchase ON x_bill.no = x_purchase.p_no GROUP BY x_employee.no, x_employee.name ORDER BY SUM(price * unit) DESC) as t3
) as t3_m
ON t2_f.user_no = t3_m.no
ORDER BY least_score_wins asc

■SQLのルールは重要なので下記要チェック
/// BANGBOO BLOG /// - Ora Ora Ora Ora Ora

コメント (0)

February 2, 2021 12:00 AM
Python Python
■importとfromPythonのモジュールとimportとfrom入門 - QiitaPython, importの使い方(from, as, PEP8の推奨スタイル, 注意点など) | note.nkmk.mePythonインポート周り徹底理解への道 - QiitaPythonの相対インポートで上位ディレクトリ・サブディレクトリを指定 | note.nkmk.me標準ライブラリならimport文を本体に書いていれば良い 標準ライブラリ以外はPyPI(Python Package Index)と呼ばれる3rdパーティライブラリから pip(The Python Package Installer)インスコimport文を使って下記の3つなどをインポートし使う 標準ライブラリ pipでインスコしたパッケージ 自作のパッケージ自作は大体わかるがそれ以外はどこにあるのか? $ python…
コメント (0)

ナビ:  <  18 | 19 | 20 | 21 | 22  >
(C)Blog.Bangboo.COM