Implement database operations

This commit is contained in:
Vladan Popovic 2020-09-05 02:38:01 +02:00
parent b23bf037e0
commit 0189f66cec
2 changed files with 113 additions and 14 deletions

View File

@ -7,6 +7,7 @@ import logging
import logging.config import logging.config
from logging import Logger from logging import Logger
from typing import Tuple from typing import Tuple
import os
import yaml import yaml
from chweb.collector import Collector, Producer from chweb.collector import Collector, Producer
@ -25,11 +26,36 @@ def configure(name) -> Tuple[Config, Logger]:
help=('The yaml config file. ' help=('The yaml config file. '
'Defaults to /etc/checker.yaml')) 'Defaults to /etc/checker.yaml'))
args = parser.parse_args() args = parser.parse_args()
kafka_servers_env = os.getenv('KAFKA_SERVERS')
if kafka_servers_env is not None:
kafka_servers = kafka_servers_env.split(',')
kafka_topic = os.getenv('KAFKA_TOPIC')
pg_db = os.getenv('POSTGRES_DB')
pg_host = os.getenv('POSTGRES_HOST')
pg_port = os.getenv('POSTGRES_PORT')
pg_user = os.getenv('POSTGRES_USER')
pg_pass = os.getenv('POSTGRES_PASS')
with open(args.config, 'r') as conf_file: with open(args.config, 'r') as conf_file:
config = yaml.load(conf_file, Loader=yaml.FullLoader) config = yaml.load(conf_file, Loader=yaml.FullLoader)
logging.config.dictConfig(config['logging']) logging.config.dictConfig(config['logging'])
config = Config(**config)
config.kafka.servers = (kafka_servers if kafka_servers_env
else config.kafka.servers)
config.kafka.topic = kafka_topic or config.kafka.topic
config.postgres.dbhost = pg_host or config.postgres.dbhost
config.postgres.dbname = pg_db or config.postgres.dbname
config.postgres.dbport = pg_port or config.postgres.dbport
config.postgres.dbuser = pg_user or config.postgres.dbuser
config.postgres.dbpass = pg_pass or config.postgres.dbpass
logger = logging.getLogger("chweb.{}".format(name)) logger = logging.getLogger("chweb.{}".format(name))
return (Config(**config), logger) print(config)
return (config, logger)
def collect(): def collect():

View File

@ -4,7 +4,8 @@ Sample consumer.
import asyncio import asyncio
import json import json
import logging import logging
from typing import Any, Dict import time
from typing import Any, Dict, Optional
import aiokafka # type: ignore import aiokafka # type: ignore
import asyncpg # type: ignore import asyncpg # type: ignore
@ -14,6 +15,12 @@ from chweb.models import Check
class Consumer(Service): class Consumer(Service):
@property
def db(self):
return Db(self.loop, self.config.postgres.dbuser,
self.config.postgres.dbpass, self.config.postgres.dbhost,
self.config.postgres.dbport, self.config.postgres.dbname)
async def consume(self): async def consume(self):
""" """
Consumes messages from a kafka topic and writes them in the database. Consumes messages from a kafka topic and writes them in the database.
@ -25,11 +32,13 @@ class Consumer(Service):
await consumer.start() await consumer.start()
try: try:
# Consume messages from the kafka topic. async with self.db as db:
async for msg in consumer: await db.setup()
check_info = Check(**json.loads(msg.value)) # Consume messages from the kafka topic.
self.queue.put_nowait(check_info) async for msg in consumer:
self.logger.info(check_info) check = Check(**json.loads(msg.value))
self.queue.put_nowait(check)
await db.save(check)
finally: finally:
# Will leave consumer group; perform autocommit if enabled. # Will leave consumer group; perform autocommit if enabled.
await consumer.stop() await consumer.stop()
@ -39,10 +48,74 @@ class Consumer(Service):
class Db: class Db:
async def consume_and_save(self): """
try: Database operations and handy helpers.
while True: """
status = await self.queue.get() def __init__(self, loop: asyncio.AbstractEventLoop, user: str, passwd: str,
yield status host: str, port: int, dbname: str):
finally: self.loop = loop
self.logger.info("Queue reader stopped.") # Do a side effect here since without this there's not any point for
# the application to start. Applies for tests as well.
self.conn: Optional[asyncpg.Connection] = None
self.user = user
self.passwd = passwd
self.host = host
self.port = port
self.dbname = dbname
async def __aenter__(self):
self.conn = await asyncpg.connect(host=self.host, port=self.port,
user=self.user, password=self.passwd,
database=self.dbname, loop=self.loop,
timeout=60)
return self
async def __aexit__(self, type_, value, traceback):
await self.conn.close()
async def setup(self):
"""
Setup the database, i.e. create the table and set up the indexes.
"""
await self.conn.execute('''
CREATE TABLE IF NOT EXISTS statuses(
id SERIAL PRIMARY KEY,
domain TEXT NOT NULL,
regex TEXT NULL,
regex_matches BOOLEAN NULL,
request_time TIMESTAMP NOT NULL,
response_time INTEGER NOT NULL,
status INTEGER NOT NULL,
url text NOT NULL
);
CREATE INDEX IF NOT EXISTS
statuses_domain ON statuses(domain);
CREATE INDEX IF NOT EXISTS
statuses_status ON statuses(status);
CREATE INDEX IF NOT EXISTS
statuses_request_time ON statuses(request_time);
CREATE INDEX IF NOT EXISTS
statuses_response_time ON statuses(response_time);
CREATE INDEX IF NOT EXISTS
statuses_regex_matches ON statuses(regex_matches);
''')
async def save(self, data: Check):
"""
Writes a single record in the database. This is not very optimal, a
better way would be to write a batch of status checks at once.
"""
tstamp = time.mktime(data.request_time.timetuple())
if self.conn is not None:
try:
await self.conn.execute('''
INSERT INTO statuses (domain, regex, regex_matches,
request_time, response_time,
status, url)
VALUES($1, $2, $3, $4, $5, $6, $7)
''', data.domain, data.regex, data.regex_matches,
data.request_time, data.response_time, data.status,
data.url)
except asyncpg.PostgresError as exc:
logger.error("error in query %s", exc.query)
raise