First raw version of the stats collector
This commit is contained in:
parent
83985be518
commit
cc2c0e492b
12 changed files with 343 additions and 0 deletions
54
.gitignore
vendored
Normal file
54
.gitignore
vendored
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
reports/
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
12
LICENCE
Normal file
12
LICENCE
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
Copyright (C) 2020 by Vladan <vladanovic at gmail dot com>
|
||||||
|
|
||||||
|
Permission to use, copy, modify, and/or distribute this software for any purpose
|
||||||
|
with or without fee is hereby granted.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
|
||||||
|
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||||
|
FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
||||||
|
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||||
|
OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||||
|
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||||
|
THIS SOFTWARE.
|
5
README.rst
Normal file
5
README.rst
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
============================
|
||||||
|
Website stats collector demo
|
||||||
|
============================
|
||||||
|
|
||||||
|
...
|
12
config.yaml
Normal file
12
config.yaml
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
kafka_servers:
|
||||||
|
- "localhost:9992"
|
||||||
|
kafka_topic: "sample"
|
||||||
|
sites:
|
||||||
|
- url: "https://example.com"
|
||||||
|
regex: "domain"
|
||||||
|
check_interval: 5
|
||||||
|
- url: "https://example.com"
|
||||||
|
regex: "aaaaaaaaaaaaa"
|
||||||
|
check_interval: 8
|
||||||
|
- url: "https://example.com/404"
|
||||||
|
check_interval: 13
|
0
reports/.keep
Normal file
0
reports/.keep
Normal file
1
requirements.txt
Normal file
1
requirements.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
.
|
26
setup.cfg
Normal file
26
setup.cfg
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
[metadata]
|
||||||
|
name = webstat
|
||||||
|
summary = Tool for collecting website stats.
|
||||||
|
description-file = README.rst
|
||||||
|
author = Vladan Popovic
|
||||||
|
author-email = vladanovic@gmail.com
|
||||||
|
classifier =
|
||||||
|
Environment :: Automation
|
||||||
|
Operating System :: POSIX :: Linux
|
||||||
|
Programming Language :: Python :: 3.8
|
||||||
|
|
||||||
|
[options]
|
||||||
|
package_dir=
|
||||||
|
=src
|
||||||
|
packages=find:
|
||||||
|
|
||||||
|
[options.packages.find]
|
||||||
|
where=src
|
||||||
|
|
||||||
|
[options.entry_points]
|
||||||
|
console_scripts =
|
||||||
|
wstat_collect = webstat.cmd:collect
|
||||||
|
wstat_consume = webstat.cmd:consume
|
||||||
|
|
||||||
|
[bdist_wheel]
|
||||||
|
universal = 1
|
18
setup.py
Normal file
18
setup.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
"""
|
||||||
|
Setup file for the web stats collector.
|
||||||
|
"""
|
||||||
|
from setuptools import setup # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
setup(
|
||||||
|
use_scm_version=True,
|
||||||
|
setup_requires=['setuptools_scm'],
|
||||||
|
python_requires='>=3.8, <4',
|
||||||
|
install_requires=[
|
||||||
|
'aiokafka==0.6.0',
|
||||||
|
'asyncpg==0.21.0',
|
||||||
|
'PyYAML==5.3.1',
|
||||||
|
'requests==2.24.0',
|
||||||
|
],
|
||||||
|
include_package_data=True,
|
||||||
|
)
|
34
src/webstat/cmd.py
Normal file
34
src/webstat/cmd.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
"""
|
||||||
|
A module containing all console script functions.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from webstat.collector import Collector
|
||||||
|
from webstat.consumer import Consumer
|
||||||
|
|
||||||
|
|
||||||
|
def run(Service):
|
||||||
|
"""
|
||||||
|
A factory kinda that runs both services in an event loop.
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
queue = asyncio.Queue()
|
||||||
|
with open('config.yaml', 'r') as conf_file:
|
||||||
|
config = yaml.load(conf_file, Loader=yaml.FullLoader)
|
||||||
|
tasks = Service(config, loop, queue).tasks()
|
||||||
|
loop.run_until_complete(asyncio.gather(*tasks))
|
||||||
|
|
||||||
|
|
||||||
|
def collect():
|
||||||
|
"""
|
||||||
|
Main producer event loop.
|
||||||
|
"""
|
||||||
|
run(Collector)
|
||||||
|
|
||||||
|
|
||||||
|
def consume():
|
||||||
|
"""
|
||||||
|
Main consumer event loop.
|
||||||
|
"""
|
||||||
|
run(Consumer)
|
86
src/webstat/collector.py
Normal file
86
src/webstat/collector.py
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
"""
|
||||||
|
Checks status of web servers and sends them to a configured Kafka topic.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import aiokafka # type: ignore
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class Collector:
|
||||||
|
"""
|
||||||
|
A class that contains all methods needed to check the statuses of all
|
||||||
|
websites present in the config.
|
||||||
|
"""
|
||||||
|
def __init__(self, config: Dict[str, Any],
|
||||||
|
event_loop: asyncio.AbstractEventLoop,
|
||||||
|
queue: asyncio.Queue):
|
||||||
|
self.config = config
|
||||||
|
self.loop = event_loop
|
||||||
|
self.queue = queue
|
||||||
|
|
||||||
|
async def get_status(self, url: str, regex: Optional[str]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Checks the status of a website and optionally matches a regex on the
|
||||||
|
response body.
|
||||||
|
|
||||||
|
:param url: The URL of the site that needs to be checked.
|
||||||
|
:param regex: An optional regex to match on the response body.
|
||||||
|
:returns: A dict ready to be sent to the queue for further processing.
|
||||||
|
"""
|
||||||
|
res = await self.loop.run_in_executor(None, requests.get, url)
|
||||||
|
matches = None # The matches value should be None since the regex can
|
||||||
|
# be ommited from the config.
|
||||||
|
if regex is not None:
|
||||||
|
matches = re.search(regex, res.text) is not None
|
||||||
|
return {
|
||||||
|
'url': url,
|
||||||
|
'regex': regex,
|
||||||
|
'status': res.status_code,
|
||||||
|
'response_time': res.elapsed.microseconds,
|
||||||
|
'regex_matches': matches,
|
||||||
|
}
|
||||||
|
|
||||||
|
async def create_periodic_task(self, site):
|
||||||
|
"""
|
||||||
|
A void function that gets the status of a site and sends it to an
|
||||||
|
``asyncio.Queue`` for further processing (sending to a Kafka topic).
|
||||||
|
|
||||||
|
:param site: A site object from the config.
|
||||||
|
"""
|
||||||
|
while True:
|
||||||
|
data = await self.get_status(site["url"], site.get("regex"))
|
||||||
|
self.queue.put_nowait(data)
|
||||||
|
await asyncio.sleep(site["check_interval"])
|
||||||
|
|
||||||
|
async def produce(self):
|
||||||
|
"""
|
||||||
|
Creates and starts an ``aiokafka.AIOKafkaProducer`` and runs a loop that
|
||||||
|
reads from the ``queue`` and sends the messages to the topic from the
|
||||||
|
``config``.
|
||||||
|
"""
|
||||||
|
producer = aiokafka.AIOKafkaProducer(
|
||||||
|
loop=self.loop,
|
||||||
|
bootstrap_servers=self.config["kafka_servers"])
|
||||||
|
|
||||||
|
await producer.start()
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
status = await self.queue.get()
|
||||||
|
msg = bytes(json.dumps(status).encode("utf-8"))
|
||||||
|
await producer.send_and_wait(self.config["kafka_topic"], msg)
|
||||||
|
finally:
|
||||||
|
await producer.stop()
|
||||||
|
|
||||||
|
def tasks(self) -> List[asyncio.Task]:
|
||||||
|
"""
|
||||||
|
Creates a task for every site.
|
||||||
|
"""
|
||||||
|
def create_task(site) -> asyncio.Task:
|
||||||
|
return self.loop.create_task(self.create_periodic_task(site))
|
||||||
|
tasks = list(map(create_task, self.config["sites"]))
|
||||||
|
tasks.append(self.loop.create_task(self.produce()))
|
||||||
|
return tasks
|
59
src/webstat/consumer.py
Normal file
59
src/webstat/consumer.py
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
"""
|
||||||
|
Sample consumer.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
import aiokafka # type: ignore
|
||||||
|
import asyncpg # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
class Consumer:
|
||||||
|
def __init__(self, config: Dict[str, Any],
|
||||||
|
event_loop: asyncio.AbstractEventLoop,
|
||||||
|
queue: asyncio.Queue):
|
||||||
|
self.config = config
|
||||||
|
self.loop = event_loop
|
||||||
|
self.queue = queue
|
||||||
|
|
||||||
|
async def consume(self):
|
||||||
|
"""
|
||||||
|
Consumes messages from a Kafka topic.
|
||||||
|
"""
|
||||||
|
consumer = aiokafka.AIOKafkaConsumer(
|
||||||
|
self.config['kafka_topic'],
|
||||||
|
loop=self.loop,
|
||||||
|
bootstrap_servers=self.config['kafka_servers'])
|
||||||
|
|
||||||
|
await consumer.start()
|
||||||
|
try:
|
||||||
|
# Consume messages
|
||||||
|
async for msg in consumer:
|
||||||
|
self.queue.put_nowait(json.loads(msg.value))
|
||||||
|
finally:
|
||||||
|
# Will leave consumer group; perform autocommit if enabled.
|
||||||
|
await consumer.stop()
|
||||||
|
|
||||||
|
|
||||||
|
async def save(self, pool, data):
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
async with conn.cursor() as cur:
|
||||||
|
await cur.execute("SELECT 1")
|
||||||
|
|
||||||
|
async def write(self):
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
status = await self.queue.get()
|
||||||
|
print(status)
|
||||||
|
finally:
|
||||||
|
print("EXITED!")
|
||||||
|
|
||||||
|
def tasks(self) -> List[asyncio.Task]:
|
||||||
|
"""
|
||||||
|
Creates tasks for reading from the Kafka topic and writing in
|
||||||
|
PostgreSQL.
|
||||||
|
"""
|
||||||
|
kafka_consumer = self.loop.create_task(self.consume())
|
||||||
|
psql_writer = self.loop.create_task(self.write())
|
||||||
|
return [kafka_consumer, psql_writer]
|
36
tox.ini
Normal file
36
tox.ini
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
[tox]
|
||||||
|
envlist = clean,lint,py3,report
|
||||||
|
|
||||||
|
[testenv]
|
||||||
|
deps =
|
||||||
|
mock
|
||||||
|
pytest
|
||||||
|
pytest-cov
|
||||||
|
pytest-mock
|
||||||
|
commands =
|
||||||
|
pytest --cov=webstat --cov-append --cov-report=term-missing {posargs}
|
||||||
|
|
||||||
|
[testenv:lint]
|
||||||
|
deps = pylint
|
||||||
|
whitelist_externals = bash
|
||||||
|
commands =
|
||||||
|
bash -c "pylint --output-format=parseable src/ | tee reports/pylint.out"
|
||||||
|
|
||||||
|
[testenv:report]
|
||||||
|
deps = coverage
|
||||||
|
skip_install = true
|
||||||
|
commands =
|
||||||
|
coverage report
|
||||||
|
coverage html -d reports/htmlcov
|
||||||
|
coverage xml -o reports/coverage.xml
|
||||||
|
|
||||||
|
[testenv:docs]
|
||||||
|
changedir = docs
|
||||||
|
deps = sphinx
|
||||||
|
commands =
|
||||||
|
sphinx-build -W -b html -E ./source/ ./build/
|
||||||
|
|
||||||
|
[testenv:clean]
|
||||||
|
deps = coverage
|
||||||
|
skip_install = true
|
||||||
|
commands = coverage erase
|
Loading…
Add table
Reference in a new issue