diff --git a/elasticsearch-ingestion/.gitignore b/elasticsearch-ingestion/.gitignore new file mode 100644 index 0000000..060be5e --- /dev/null +++ b/elasticsearch-ingestion/.gitignore @@ -0,0 +1 @@ +access.log diff --git a/elasticsearch-ingestion/README.md b/elasticsearch-ingestion/README.md new file mode 100644 index 0000000..79c7263 --- /dev/null +++ b/elasticsearch-ingestion/README.md @@ -0,0 +1,80 @@ +# Elasticsearch Ingestion Demo + +This demo shows how to ingest data from Elasticsearch to GreptimeDB. The related documentation is [here](https://docs.greptime.com/nightly/user-guide/ingest-data/for-observability/elasticsearch). + +## How to run this demo + +Ensure you have `git`, `docker` and `docker-compose` installed. To run this +demo: + +```console +git clone https://github.com/GreptimeTeam/demo-scene.git +cd demo-scene/elasticsearch-ingestion +docker compose up +``` + +It can take a while for the first run to pull down images and also build +necessary components. + +You can access GreptimeDB using `mysql` client. Just run `mysql -h 127.0.0.1 -P +4002` to connect to the database and use SQL queries like `SHOW TABLES` as a +start. You can query the `nginx` table to see the data that has been ingested. + +```console +$ mysql -h 127.0.0.1 -P 4002 +Welcome to the MySQL monitor. Commands end with ; or \g. +Your MySQL connection id is 8 +Server version: 8.4.2 Greptime + +Copyright (c) 2000, 2024, Oracle and/or its affiliates. + +Oracle is a registered trademark of Oracle Corporation and/or its +affiliates. Other names may be trademarks of their respective +owners. + +Type 'help;' or '\h' for help. Type '\c' to clear the current input statement. + +mysql> show tables; ++---------+ +| Tables | ++---------+ +| nginx | +| numbers | ++---------+ +2 rows in set (0.02 sec) + +mysql> SELECT * FROM nginx LIMIT 3 \G; +*************************** 1. row *************************** + ip_address: 104.165.107.159 + http_method: DELETE + status_code: 200 + request_line: /contact HTTP/2.0 + user_agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60 +response_size: 927 + timestamp: 2025-02-07 08:12:52 +*************************** 2. row *************************** + ip_address: 110.181.64.38 + http_method: DELETE + status_code: 404 + request_line: /signup HTTP/2.0 + user_agent: Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16 +response_size: 789 + timestamp: 2025-02-07 08:05:45 +*************************** 3. row *************************** + ip_address: 118.236.67.182 + http_method: DELETE + status_code: 500 + request_line: /blog HTTP/1.1 + user_agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36 +response_size: 684 + timestamp: 2025-02-07 08:12:46 +3 rows in set (0.02 sec) +``` + +## How to stop this demo + +```console +docker compose down +``` + +It will stop all the services and remove the containers(including the data). diff --git a/elasticsearch-ingestion/configs/greptimedb/nginx-logs-pipeline.yml b/elasticsearch-ingestion/configs/greptimedb/nginx-logs-pipeline.yml new file mode 100644 index 0000000..ed06e30 --- /dev/null +++ b/elasticsearch-ingestion/configs/greptimedb/nginx-logs-pipeline.yml @@ -0,0 +1,35 @@ +processors: + - dissect: + fields: + - line + patterns: + - '%{ip_address} - - [%{timestamp}] "%{http_method} %{request_line}" %{status_code} %{response_size} "-" "%{user_agent}"' + ignore_missing: true + - date: + fields: + - timestamp + formats: + - "%d/%b/%Y:%H:%M:%S %z" + +transform: + - fields: + - ip_address + - http_method + type: string + index: tag + - fields: + - status_code + type: int32 + index: tag + - fields: + - request_line + - user_agent + type: string + index: fulltext + - fields: + - response_size + type: int32 + - fields: + - timestamp + type: time + index: timestamp diff --git a/elasticsearch-ingestion/configs/logstash/logstash.yml b/elasticsearch-ingestion/configs/logstash/logstash.yml new file mode 100644 index 0000000..a03cdff --- /dev/null +++ b/elasticsearch-ingestion/configs/logstash/logstash.yml @@ -0,0 +1,2 @@ +http.host: "0.0.0.0" +xpack.monitoring.enabled: false diff --git a/elasticsearch-ingestion/configs/logstash/pipeline.conf b/elasticsearch-ingestion/configs/logstash/pipeline.conf new file mode 100644 index 0000000..60fd3de --- /dev/null +++ b/elasticsearch-ingestion/configs/logstash/pipeline.conf @@ -0,0 +1,18 @@ +input { + file { + path => "/var/log/nginx/access.log" + start_position => "beginning" + sincedb_path => "/dev/null" + } +} + +output { + elasticsearch { + hosts => ["http://greptimedb:4000/v1/elasticsearch"] + index => "nginx" + parameters => { + "pipeline_name" => "nginx-logs-pipeline" + "msg_field" => "message" + } + } +} diff --git a/elasticsearch-ingestion/docker-compose.yaml b/elasticsearch-ingestion/docker-compose.yaml new file mode 100644 index 0000000..1b91344 --- /dev/null +++ b/elasticsearch-ingestion/docker-compose.yaml @@ -0,0 +1,45 @@ +include: + - ../greptimedb-common.yml + +services: + create-nginx-logs-pipeline: + image: docker.io/alpine/curl:latest + networks: + - demo-network + depends_on: + greptimedb: + condition: service_healthy + volumes: + - ./configs/greptimedb:/configs + command: sh -c "sleep 5 && curl -X 'POST' 'http://greptimedb:4000/v1/events/pipelines/nginx-logs-pipeline' -F 'file=@/configs/nginx-logs-pipeline.yml' -v" + init: yes + + nginx-log-generator: + image: docker.io/library/python:3.10 + networks: + - demo-network + volumes: + - ./nginx-log-generator:/app + working_dir: /app + command: ["sh", "-c", "sleep 10 && python3 app.py --interval 3 --output ./access.log"] + healthcheck: + test: ["CMD", "test", "-f", "./access.log"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 5s + + logstash: + image: docker.elastic.co/logstash/logstash:8.17.0 + networks: + - demo-network + depends_on: + nginx-log-generator: + condition: service_healthy + volumes: + - ./configs/logstash/pipeline.conf:/usr/share/logstash/pipeline/logstash.conf + - ./configs/logstash/logstash.yml:/usr/share/logstash/config/logstash.yml + - ./nginx-log-generator:/var/log/nginx + +networks: + demo-network: diff --git a/elasticsearch-ingestion/nginx-log-generator/app.py b/elasticsearch-ingestion/nginx-log-generator/app.py new file mode 100644 index 0000000..4f05e5b --- /dev/null +++ b/elasticsearch-ingestion/nginx-log-generator/app.py @@ -0,0 +1,118 @@ +import argparse +import time +import random +from datetime import datetime, timezone, timedelta + +parser = argparse.ArgumentParser() +parser.add_argument('--interval', type=int, default=3, help='Log generation interval in seconds') +parser.add_argument('--output', type=str, default='access.log', help='Output log file path') +args = parser.parse_args() + +GENERATE_INTERVAL = args.interval # seconds +STORED_LOGS_FILE = args.output + +def nginx_log_generator(): + ip = ".".join(str(random.randint(1, 255)) for _ in range(4)) + current_time = datetime.now(timezone(timedelta(hours=-7))) + date_time = current_time.strftime("%d/%b/%Y:%H:%M:%S %z") + method = random.choice(["GET", "POST", "PUT", "DELETE"]) + path = random.choice( + [ + "/", + "/about", + "/contact", + "/blog", + "/post", + "/user", + "/login", + "/logout", + "/signup", + ] + ) + protocol = random.choice(["HTTP/1.1", "HTTP/2.0"]) + status_code = random.choice([200, 201, 404, 500]) + body_size = random.randint(100, 1000) + user_agent = random.choice( + [ + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60", + "Opera/8.0 (Windows NT 5.1; U; en)", + "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50", + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50", + "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11", + "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11", + "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10", + "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0", + "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1", + "Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2", + "MAC:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36", + "Windows:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", + "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", + "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", + "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11", + "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko", + "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)", + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)" + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)", + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", + "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0", + "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)", + "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4094.1 Safari/537.36", + "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", + "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", + "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5", + "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", + "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", + "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", + "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", + "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10", + "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13", + "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+", + "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;", + "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)", + "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)", + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)", + "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)", + "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)", + "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)", + "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)", + "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", + "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)", + "UCWEB7.0.2.37/28/999", + "NOKIA5700/ UCWEB7.0.2.37/28/999", + "Openwave/ UCWEB7.0.2.37/28/999", + "Openwave/ UCWEB7.0.2.37/28/999", + ] + ) + log = f'{ip} - - [{date_time}] "{method} {path} {protocol}" {status_code} {body_size} "-" "{user_agent}"' + return str.encode(log) + +def main(): + try: + print(f"Generating nginx logs to {STORED_LOGS_FILE} for every {GENERATE_INTERVAL} seconds", flush=True) + while True: + log = nginx_log_generator() + # Write the log to the file. + with open(STORED_LOGS_FILE, "a") as f: + f.write(log.decode("utf-8") + "\n") + time.sleep(GENERATE_INTERVAL) + except KeyboardInterrupt: + print("Shutting down...", flush=True) + +if __name__ == "__main__": + main() diff --git a/greptimedb-common.yml b/greptimedb-common.yml index 8e1c143..b9eacfd 100644 --- a/greptimedb-common.yml +++ b/greptimedb-common.yml @@ -1,6 +1,6 @@ services: greptimedb: - image: docker.io/greptime/greptimedb:v0.11.2 + image: docker.io/greptime/greptimedb:v0.12.0 command: standalone start --http-addr=0.0.0.0:4000 --rpc-addr=0.0.0.0:4001 --mysql-addr=0.0.0.0:4002 --postgres-addr 0.0.0.0:4003 ports: - 4000:4000