{"id":17850,"date":"2019-03-18T12:15:12","date_gmt":"2019-03-18T03:15:12","guid":{"rendered":"https:\/\/labs.gree.jp\/blog\/?p=17850"},"modified":"2019-05-13T20:57:09","modified_gmt":"2019-05-13T11:57:09","slug":"post17850","status":"publish","type":"post","link":"https:\/\/labs.gree.jp\/blog\/2019\/03\/17850\/","title":{"rendered":"Fargate\u74b0\u5883\u3067embulk\u3092\u4f7f\u3063\u3066S3\u304b\u3089BigQuery\u3078\u306eALB\u3068CloudFront\u306e\u30ed\u30b0\u8ee2\u9001"},"content":{"rendered":"<p>\u4f4f\u307e\u3044\u66ae\u3089\u3057\u30e1\u30c7\u30a3\u30a2<a href=\"https:\/\/limia.jp\/\">LIMIA<\/a>\u3067\u958b\u767a\u3092\u62c5\u5f53\u3057\u3066\u3044\u308b\u6a0b\u53e3(<a href=\"https:\/\/twitter.com\/mahiguch1\">@mahiguch1<\/a>)\u3067\u3059\u3002<br \/>\nLIMIA\u3067\u306fBigQuery\u3092\u4f7f\u3063\u3066\u30c7\u30fc\u30bf\u5206\u6790\u3092\u884c\u306a\u3063\u3066\u3044\u307e\u3059\u3002<br \/>\n\u524d\u56de<a href=\"https:\/\/labs.gree.jp\/blog\/2019\/03\/17834\/\">MySQL\u304b\u3089BigQuery\u3078\u306e\u30c7\u30fc\u30bf\u8ee2\u9001\u306b\u95a2\u3059\u308b\u8a18\u4e8b<\/a>\u3092\u66f8\u3044\u305f\u3068\u3053\u308d\u3001S3\u306b\u3042\u308bALB\u3084CloudFront\u306e\u30ed\u30b0\u306e\u8ee2\u9001\u306b\u3064\u3044\u3066\u8cea\u554f\u3092\u53d7\u3051\u305f\u305f\u3081\u3001\u305d\u308c\u306b\u3064\u3044\u3066\u5171\u6709\u3057\u307e\u3059\u3002<\/p>\n<h2>ALB\u30ed\u30b0\u8ee2\u9001\u8a2d\u5b9a<\/h2>\n<p>ALB\u30ed\u30b0\u306eSchema\u306f\u6c7a\u307e\u3063\u3066\u3044\u308b\u305f\u3081\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>files\/etc\/embulk\/schema\/alb_access_log.json<\/p>\n<pre class=\"lang:default decode:true \">[\n    {\"name\": \"protocol\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"timestamp\", \"type\": \"timestamp\", \"mode\": \"nullable\"},\n    {\"name\": \"elb\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"client_port\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"backend_port\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"request_processing_time\", \"type\": \"float\", \"mode\": \"nullable\"},\n    {\"name\": \"backend_processing_time\", \"type\": \"float\", \"mode\": \"nullable\"},\n    {\"name\": \"response_processing_time\", \"type\": \"float\", \"mode\": \"nullable\"},\n    {\"name\": \"elb_status_code\", \"type\": \"integer\", \"mode\": \"nullable\"},\n    {\"name\": \"backend_status_code\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"received_bytes\", \"type\": \"integer\", \"mode\": \"nullable\"},\n    {\"name\": \"send_bytes\", \"type\": \"integer\", \"mode\": \"nullable\"},\n    {\"name\": \"request\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"user_agent\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"ssl_cipher\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"ssl_protocol\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"target_group_arn\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"trace_id\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"domain_name\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"chosen_cert_arn\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"matched_rule_priority\", \"type\": \"integer\", \"mode\": \"nullable\"}\n]<\/pre>\n<p>ALB\u306e\u30a2\u30af\u30bb\u30b9\u30ed\u30b0\u306f\u3001S3\u306b\u683c\u7d0d\u3055\u308c\u3066\u3044\u307e\u3059\u3002<br \/>\nS3\u306b\u3042\u308b\u306e\u3067\u3001embulk-input-s3\u304c\u4f7f\u3048\u305d\u3046\u3067\u3059\u3002<br \/>\nEC2\u3067\u52d5\u304b\u3057\u3066\u3044\u305f\u3068\u304d\u306f\u305d\u308c\u3067\u554f\u984c\u306a\u304b\u3063\u305f\u306e\u3067\u3059\u304c\u3001ECS\/Fargate\u3067\u52d5\u304b\u3059\u3068\u6a29\u9650\u4e0d\u8db3\u3068\u306e\u30a8\u30e9\u30fc\u304c\u51fa\u3066\u51fa\u3066\u4e0a\u624b\u304f\u884c\u304d\u307e\u305b\u3093\u3067\u3057\u305f\u3002<br \/>\n\u305d\u3053\u3067\u3001awscli\u3092\u4f7f\u3063\u3066\u30d5\u30a1\u30a4\u30eb\u3092\u8db3\u5143\u306b\u8ee2\u9001\u3057\u3066\u3001\u305d\u308c\u3092BigQuery\u306b\u683c\u7d0d\u3059\u308b\u8a2d\u5b9a\u3092\u884c\u3044\u307e\u3057\u305f\u3002<\/p>\n<p>files\/etc\/embulk\/logs\/production\/access-log-elb-limia-jp-alb.yml.liquid<\/p>\n<pre class=\"lang:default decode:true \">in:\n  type: file\n  path_prefix: \/tmp\/access-log-elb-limia-jp-alb\n  parser:\n    charset: UTF-8\n    newline: CRLF\n    type: csv\n    delimiter: \" \"\n    trim_if_not_quoted: false\n    skip_header_lines: 0\n    allow_extra_columns: true\n    allow_optional_columns: true\n    null_string: \"-\"\n    columns:\n    - {name: protocol, type: string}\n    - {name: timestamp, type: timestamp, format: '%Y-%m-%dT%H:%M:%S.%NZ'}\n    - {name: elb, type: string}\n    - {name: client_port, type: string}\n    - {name: backend_port, type: string}\n    - {name: request_processing_time, type: double}\n    - {name: backend_processing_time, type: double}\n    - {name: response_processing_time, type: double}\n    - {name: elb_status_code, type: long}\n    - {name: backend_status_code, type: string}\n    - {name: received_bytes, type: long}\n    - {name: send_bytes, type: long}\n    - {name: request, type: string}\n    - {name: user_agent, type: string}\n    - {name: ssl_cipher, type: string}\n    - {name: ssl_protocol, type: string}\n    - {name: target_group_arn, type: string}\n    - {name: trace_id, type: string}\n    - {name: domain_name, type: string}\n    - {name: chosen_cert_arn, type: string}\n    - {name: matched_rule_priority, type: long}\n  decoders:\n  - {type: gzip}\nout:\n  type: bigquery\n  mode: replace\n  prevent_duplicate_insert: false\n  auth_method: json_key\n  json_keyfile: \/etc\/bigquery_service_account.json\n  project: {{ env.GCP_PROJECT_ID }}\n  dataset: alb_logs\n  compression: GZIP\n  source_format: NEWLINE_DELIMITED_JSON\n  table: limia_jp_{{ \"now\" | date: \"%s\" | minus: 86400 | date: \"%Y%m%d\" }}\n  schema_file: \/etc\/embulk\/schema\/alb_access_log.json\n  auto_create_table: true<\/pre>\n<h2>CloudFront\u30ed\u30b0\u8ee2\u9001\u8a2d\u5b9a<\/h2>\n<p>ALB\u540c\u69d8CloudFront\u306eSchema\u306f\u56fa\u5b9a\u7684\u306a\u306e\u3067\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u3057\u307e\u3057\u305f\u3002<\/p>\n<p>files\/etc\/embulk\/schema\/cloudfront_access_log.json<\/p>\n<pre class=\"lang:default decode:true \">[\n    {\"name\": \"date\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"time\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"x_edge_location\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"sc_bytes\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"c_ip\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"cs_method\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"cs_host\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"cs_uri_stem\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"sc_status\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"cs_referer\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"cs_user_agent\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"cs_uri_query\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"cs_cookie\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"x_edge_result_type\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"x_edge_request_id\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"x_host_header\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"cs_protocol\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"cs_bytes\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"time_taken\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"x_forwarded_for\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"ssl_protocol\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"ssl_cipher\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"x_edge_response_result_type\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"cs_protocol_version\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"fle_status\", \"type\": \"string\", \"mode\": \"nullable\"},\n    {\"name\": \"fle_encrypted_fields\", \"type\": \"string\", \"mode\": \"nullable\"}\n]<\/pre>\n<p>\u8ee2\u9001\u8a2d\u5b9a\u3082ALB\u540c\u69d8\u3001\u8db3\u5143\u306e\u30d5\u30a1\u30a4\u30eb\u3092\u8ee2\u9001\u3059\u308b\u3082\u306e\u3067\u3059\u3002<\/p>\n<p>files\/etc\/embulk\/logs\/production\/access-log-cloudfront-d-limia-jp.yml.liquid<\/p>\n<pre class=\"lang:default decode:true \">in:\n  type: file\n  path_prefix: \/tmp\/access-log-cloudfront-d-limia-jp\n  parser:\n    charset: UTF-8\n    newline: CRLF\n    type: csv\n    delimiter: \"\\t\"\n    trim_if_not_quoted: false\n    skip_header_lines: 2\n    allow_extra_columns: true\n    allow_optional_columns: true\n    null_string: \"-\"\n    columns:\n    - {name: date, type: string}\n    - {name: time, type: string}\n    - {name: x_edge_location, type: string}\n    - {name: sc_bytes, type: string}\n    - {name: c_ip, type: string}\n    - {name: cs_method, type: string}\n    - {name: cs_host, type: string}\n    - {name: cs_uri_stem, type: string}\n    - {name: sc_status, type: string}\n    - {name: cs_referer, type: string}\n    - {name: cs_user_agent, type: string}\n    - {name: cs_uri_query, type: string}\n    - {name: cs_cookie, type: string}\n    - {name: x_edge_result_type, type: string}\n    - {name: x_edge_request_id, type: string}\n    - {name: x_host_header, type: string}\n    - {name: cs_protocol, type: string}\n    - {name: cs_bytes, type: string}\n    - {name: time_taken, type: string}\n    - {name: x_forwarded_for, type: string}\n    - {name: ssl_protocol, type: string}\n    - {name: ssl_cipher, type: string}\n    - {name: x_edge_response_result_type, type: string}\n    - {name: cs_protocol_version, type: string}\n    - {name: fle_status, type: string}\n    - {name: fle_encrypted_fields, type: string}\n  decoders:\n  - {type: gzip}\nout:\n  type: bigquery\n  mode: replace\n  prevent_duplicate_insert: false\n  auth_method: json_key\n  json_keyfile: \/etc\/bigquery_service_account.json\n  project: {{ env.GCP_PROJECT_ID }}\n  dataset: alb_logs\n  compression: GZIP\n  source_format: NEWLINE_DELIMITED_JSON\n  table: d_limia_jp_{{ \"now\" | date: \"%s\" | minus: 86400 | date: \"%Y%m%d\" }}\n  schema_file: \/etc\/embulk\/schema\/cloudfront_access_log.json\n  auto_create_table: true<\/pre>\n<h2>\u30b9\u30af\u30ea\u30d7\u30c8<\/h2>\n<p>\u524d\u56de\u306e\u30b9\u30af\u30ea\u30d7\u30c8\u306b\u30ed\u30b0\u8ee2\u9001\u51e6\u7406\u3092\u8ffd\u52a0\u3057\u307e\u3057\u305f\u3002<br \/>\n\u5177\u4f53\u7684\u306b\u306f\u3001\u5f15\u6570\u306b logs \u304c\u6e21\u3055\u308c\u305f\u3089\u3001 LIMIA_ENV.sh \u3092\u5b9f\u884c\u3057\u307e\u3059\u3002<\/p>\n<p>files\/entrypoint.sh<\/p>\n<pre class=\"lang:sh decode:true \">#!\/bin\/sh\n\ndecrypt() {\n    aws kms decrypt --ciphertext-blob fileb:\/\/\/etc\/embulk\/blob\/${LIMIA_ENV}-bigquery-service-account.blob --output text --query Plaintext | base64 -d &gt; \/etc\/bigquery_service_account.json\n}\n\nprocess_tables() {\n    ls \/etc\/embulk\/tables\/*.yml.liquid | xargs -n1 java -jar \/usr\/local\/bin\/embulk run 2&gt;&amp;1\n}\n\nprocess_specified() {\n    if [ $# -eq 0 ]; then\n        exit\n    fi\n\n    if [ \"$1\" = \"logs\" ]; then\n        if [ -e \/etc\/embulk\/logs\/${LIMIA_ENV} ]; then\n          \/etc\/embulk\/logs\/${LIMIA_ENV}.sh\n        fi\n    else\n        for var in $@; do\n            ls \/etc\/embulk\/tables\/${var}.yml.liquid | xargs -n1 java -jar \/usr\/local\/bin\/embulk run 2&gt;&amp;1\n        done\n    fi\n}\n\ninit() {\n    if [ -z \"${GCP_PROJECT_ID}\" ]; then\n      echo no GCP_PROJECT_ID\n      exit\n    fi\n    if [ -z \"${LIMIA_ENV}\" ]; then\n      echo no LIMIA_ENV\n      exit\n    fi\n\n    decrypt\n\n    if [ $# -eq 0 ]; then\n        process_tables\n    else\n        process_specified \"$@\"\n    fi\n}\n\ninit \"$@\"<\/pre>\n<p>entrypoint\u304b\u3089\u547c\u3073\u51fa\u3055\u308c\u308bshell script\u306f\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u306a\u308a\u307e\u3059\u3002<br \/>\nawscli\u3092\u4f7f\u3063\u3066\u3001\u524d\u65e5\u5206\u306e\u30ed\u30b0\u3092\u8db3\u5143\u306b\u8ee2\u9001\u3057\u3001embulk\u3092\u5b9f\u884c\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>files\/etc\/embulk\/logs\/production.sh<\/p>\n<pre class=\"lang:sh decode:true \">#!\/bin\/sh\naws s3 sync s3:\/\/bucket_name\/cf-logs\/ \/tmp\/access-log-cloudfront-d-limia-jp\/ --exclude \"*\" --include \"*.`date --date '1 day ago' +%Y-%m-%d`-*\" --quiet\njava -jar \/usr\/local\/bin\/embulk run \/etc\/embulk\/logs\/${LIMIA_ENV}\/access-log-cloudfront-d-limia-jp.yml.liquid 2&gt;&amp;1\nrm -rf \/tmp\/access-log-cloudfront-d-limia-jp\/\n\naws s3 sync s3:\/\/bucket_name\/AWSLogs\/aws_account_id\/elasticloadbalancing\/ap-northeast-1\/`date --date '1 day ago' +%Y\/%m\/%d`\/ \/tmp\/access-log-elb-limia-jp-alb\/ --quiet\njava -jar \/usr\/local\/bin\/embulk run \/etc\/embulk\/logs\/${LIMIA_ENV}\/access-log-elb-limia-jp-alb.yml.liquid 2&gt;&amp;1\nrm -rf \/tmp\/access-log-elb-limia-jp-alb\/<\/pre>\n<h2>\u307e\u3068\u3081<\/h2>\n<p>ALB\u3084CloudFront\u306e\u30ed\u30b0\u3092BigQuery\u306b\u8ee2\u9001\u3059\u308b\u65b9\u6cd5\u3092\u5171\u6709\u3057\u307e\u3057\u305f\u3002<br \/>\n\u540c\u3058\u3088\u3046\u306a\u554f\u984c\u306b\u906d\u9047\u3057\u305f\u65b9\u306e\u5f79\u306b\u7acb\u3066\u3070\u5e78\u3044\u3067\u3059\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4f4f\u307e\u3044\u66ae\u3089\u3057\u30e1\u30c7\u30a3\u30a2LIMIA\u3067\u958b\u767a\u3092\u62c5\u5f53\u3057\u3066\u3044\u308b\u6a0b\u53e3(@mahiguch1)\u3067\u3059\u3002 LIMIA\u3067\u306fBigQuery\u3092\u4f7f\u3063\u3066\u30c7\u30fc\u30bf\u5206\u6790\u3092\u884c\u306a\u3063\u3066\u3044\u307e\u3059\u3002 \u524d\u56deMySQL\u304b\u3089BigQuery\u3078\u306e\u30c7\u30fc\u30bf\u8ee2\u9001\u306b\u95a2\u3059\u308b\u8a18\u4e8b\u3092\u66f8\u3044 [&hellip;]<\/p>\n","protected":false},"author":195,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[9],"tags":[164,162,114,163,141,101],"class_list":["post-17850","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-info","tag-accesslog","tag-alb","tag-aws","tag-cloudfront","tag-gcp","tag-s3"],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/labs.gree.jp\/blog\/wp-json\/wp\/v2\/posts\/17850","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/labs.gree.jp\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/labs.gree.jp\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/labs.gree.jp\/blog\/wp-json\/wp\/v2\/users\/195"}],"replies":[{"embeddable":true,"href":"https:\/\/labs.gree.jp\/blog\/wp-json\/wp\/v2\/comments?post=17850"}],"version-history":[{"count":3,"href":"https:\/\/labs.gree.jp\/blog\/wp-json\/wp\/v2\/posts\/17850\/revisions"}],"predecessor-version":[{"id":17903,"href":"https:\/\/labs.gree.jp\/blog\/wp-json\/wp\/v2\/posts\/17850\/revisions\/17903"}],"wp:attachment":[{"href":"https:\/\/labs.gree.jp\/blog\/wp-json\/wp\/v2\/media?parent=17850"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/labs.gree.jp\/blog\/wp-json\/wp\/v2\/categories?post=17850"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/labs.gree.jp\/blog\/wp-json\/wp\/v2\/tags?post=17850"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}