First raw version of the stats collector
This commit is contained in:
		
							parent
							
								
									83985be518
								
							
						
					
					
						commit
						cc2c0e492b
					
				
					 12 changed files with 343 additions and 0 deletions
				
			
		
							
								
								
									
										54
									
								
								.gitignore
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								.gitignore
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,54 @@ | ||||||
|  | # Byte-compiled / optimized / DLL files | ||||||
|  | __pycache__/ | ||||||
|  | *.py[cod] | ||||||
|  | *$py.class | ||||||
|  | 
 | ||||||
|  | # Distribution / packaging | ||||||
|  | .Python | ||||||
|  | build/ | ||||||
|  | develop-eggs/ | ||||||
|  | dist/ | ||||||
|  | downloads/ | ||||||
|  | eggs/ | ||||||
|  | .eggs/ | ||||||
|  | lib/ | ||||||
|  | lib64/ | ||||||
|  | parts/ | ||||||
|  | sdist/ | ||||||
|  | var/ | ||||||
|  | wheels/ | ||||||
|  | share/python-wheels/ | ||||||
|  | *.egg-info/ | ||||||
|  | .installed.cfg | ||||||
|  | *.egg | ||||||
|  | MANIFEST | ||||||
|  | 
 | ||||||
|  | # Unit test / coverage reports | ||||||
|  | .tox/ | ||||||
|  | .nox/ | ||||||
|  | .coverage | ||||||
|  | .coverage.* | ||||||
|  | .cache | ||||||
|  | *.cover | ||||||
|  | *.py,cover | ||||||
|  | .hypothesis/ | ||||||
|  | .pytest_cache/ | ||||||
|  | cover/ | ||||||
|  | reports/ | ||||||
|  | 
 | ||||||
|  | # Sphinx documentation | ||||||
|  | docs/_build/ | ||||||
|  | 
 | ||||||
|  | # Environments | ||||||
|  | .env | ||||||
|  | .venv | ||||||
|  | env/ | ||||||
|  | venv/ | ||||||
|  | ENV/ | ||||||
|  | env.bak/ | ||||||
|  | venv.bak/ | ||||||
|  | 
 | ||||||
|  | # mypy | ||||||
|  | .mypy_cache/ | ||||||
|  | .dmypy.json | ||||||
|  | dmypy.json | ||||||
							
								
								
									
										12
									
								
								LICENCE
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								LICENCE
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,12 @@ | ||||||
|  | Copyright (C) 2020 by Vladan <vladanovic at gmail dot com> | ||||||
|  | 
 | ||||||
|  | Permission to use, copy, modify, and/or distribute this software for any purpose | ||||||
|  | with or without fee is hereby granted. | ||||||
|  | 
 | ||||||
|  | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | ||||||
|  | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND | ||||||
|  | FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | ||||||
|  | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS | ||||||
|  | OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | ||||||
|  | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF | ||||||
|  | THIS SOFTWARE. | ||||||
							
								
								
									
										5
									
								
								README.rst
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								README.rst
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,5 @@ | ||||||
|  | ============================ | ||||||
|  | Website stats collector demo | ||||||
|  | ============================ | ||||||
|  | 
 | ||||||
|  | ... | ||||||
							
								
								
									
										12
									
								
								config.yaml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								config.yaml
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,12 @@ | ||||||
|  | kafka_servers: | ||||||
|  | - "localhost:9992" | ||||||
|  | kafka_topic: "sample" | ||||||
|  | sites: | ||||||
|  | - url: "https://example.com" | ||||||
|  |   regex: "domain" | ||||||
|  |   check_interval: 5 | ||||||
|  | - url: "https://example.com" | ||||||
|  |   regex: "aaaaaaaaaaaaa" | ||||||
|  |   check_interval: 8 | ||||||
|  | - url: "https://example.com/404" | ||||||
|  |   check_interval: 13 | ||||||
							
								
								
									
										0
									
								
								reports/.keep
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								reports/.keep
									
										
									
									
									
										Normal file
									
								
							
							
								
								
									
										1
									
								
								requirements.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								requirements.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1 @@ | ||||||
|  | . | ||||||
							
								
								
									
										26
									
								
								setup.cfg
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								setup.cfg
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | ||||||
|  | [metadata] | ||||||
|  | name = webstat | ||||||
|  | summary = Tool for collecting website stats. | ||||||
|  | description-file = README.rst | ||||||
|  | author = Vladan Popovic | ||||||
|  | author-email = vladanovic@gmail.com | ||||||
|  | classifier = | ||||||
|  |     Environment :: Automation | ||||||
|  |     Operating System :: POSIX :: Linux | ||||||
|  |     Programming Language :: Python :: 3.8 | ||||||
|  | 
 | ||||||
|  | [options] | ||||||
|  | package_dir= | ||||||
|  |     =src | ||||||
|  | packages=find: | ||||||
|  | 
 | ||||||
|  | [options.packages.find] | ||||||
|  | where=src | ||||||
|  | 
 | ||||||
|  | [options.entry_points] | ||||||
|  | console_scripts = | ||||||
|  |     wstat_collect = webstat.cmd:collect | ||||||
|  |     wstat_consume = webstat.cmd:consume | ||||||
|  | 
 | ||||||
|  | [bdist_wheel] | ||||||
|  | universal = 1 | ||||||
							
								
								
									
										18
									
								
								setup.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								setup.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,18 @@ | ||||||
|  | """ | ||||||
|  | Setup file for the web stats collector. | ||||||
|  | """ | ||||||
|  | from setuptools import setup  # type: ignore | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | setup( | ||||||
|  |     use_scm_version=True, | ||||||
|  |     setup_requires=['setuptools_scm'], | ||||||
|  |     python_requires='>=3.8, <4', | ||||||
|  |     install_requires=[ | ||||||
|  |         'aiokafka==0.6.0', | ||||||
|  |         'asyncpg==0.21.0', | ||||||
|  |         'PyYAML==5.3.1', | ||||||
|  |         'requests==2.24.0', | ||||||
|  |     ], | ||||||
|  |     include_package_data=True, | ||||||
|  | ) | ||||||
							
								
								
									
										34
									
								
								src/webstat/cmd.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								src/webstat/cmd.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,34 @@ | ||||||
|  | """ | ||||||
|  | A module containing all console script functions. | ||||||
|  | """ | ||||||
|  | import asyncio | ||||||
|  | import yaml | ||||||
|  | 
 | ||||||
|  | from webstat.collector import Collector | ||||||
|  | from webstat.consumer import Consumer | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def run(Service): | ||||||
|  |     """ | ||||||
|  |     A factory kinda that runs both services in an event loop. | ||||||
|  |     """ | ||||||
|  |     loop = asyncio.get_event_loop() | ||||||
|  |     queue = asyncio.Queue() | ||||||
|  |     with open('config.yaml', 'r') as conf_file: | ||||||
|  |         config = yaml.load(conf_file, Loader=yaml.FullLoader) | ||||||
|  |     tasks = Service(config, loop, queue).tasks() | ||||||
|  |     loop.run_until_complete(asyncio.gather(*tasks)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def collect(): | ||||||
|  |     """ | ||||||
|  |     Main producer event loop. | ||||||
|  |     """ | ||||||
|  |     run(Collector) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def consume(): | ||||||
|  |     """ | ||||||
|  |     Main consumer event loop. | ||||||
|  |     """ | ||||||
|  |     run(Consumer) | ||||||
							
								
								
									
										86
									
								
								src/webstat/collector.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								src/webstat/collector.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,86 @@ | ||||||
|  | """ | ||||||
|  | Checks status of web servers and sends them to a configured Kafka topic. | ||||||
|  | """ | ||||||
|  | import asyncio | ||||||
|  | import json | ||||||
|  | import re | ||||||
|  | from typing import Any, Dict, List, Optional | ||||||
|  | 
 | ||||||
|  | import aiokafka #  type: ignore | ||||||
|  | import requests | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Collector: | ||||||
|  |     """ | ||||||
|  |     A class that contains all methods needed to check the statuses of all | ||||||
|  |     websites present in the config. | ||||||
|  |     """ | ||||||
|  |     def __init__(self, config: Dict[str, Any], | ||||||
|  |                  event_loop: asyncio.AbstractEventLoop, | ||||||
|  |                  queue: asyncio.Queue): | ||||||
|  |         self.config = config | ||||||
|  |         self.loop = event_loop | ||||||
|  |         self.queue = queue | ||||||
|  | 
 | ||||||
|  |     async def get_status(self, url: str, regex: Optional[str]) -> Dict[str, Any]: | ||||||
|  |         """ | ||||||
|  |         Checks the status of a website and optionally matches a regex on the | ||||||
|  |         response body. | ||||||
|  | 
 | ||||||
|  |         :param url: The URL of the site that needs to be checked. | ||||||
|  |         :param regex: An optional regex to match on the response body. | ||||||
|  |         :returns: A dict ready to be sent to the queue for further processing. | ||||||
|  |         """ | ||||||
|  |         res = await self.loop.run_in_executor(None, requests.get, url) | ||||||
|  |         matches = None #  The matches value should be None since the regex can | ||||||
|  |                        #  be ommited from the config. | ||||||
|  |         if regex is not None: | ||||||
|  |             matches = re.search(regex, res.text) is not None | ||||||
|  |         return { | ||||||
|  |             'url': url, | ||||||
|  |             'regex': regex, | ||||||
|  |             'status': res.status_code, | ||||||
|  |             'response_time': res.elapsed.microseconds, | ||||||
|  |             'regex_matches': matches, | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |     async def create_periodic_task(self, site): | ||||||
|  |         """ | ||||||
|  |         A void function that gets the status of a site and sends it to an | ||||||
|  |         ``asyncio.Queue`` for further processing (sending to a Kafka topic). | ||||||
|  | 
 | ||||||
|  |         :param site: A site object from the config. | ||||||
|  |         """ | ||||||
|  |         while True: | ||||||
|  |             data = await self.get_status(site["url"], site.get("regex")) | ||||||
|  |             self.queue.put_nowait(data) | ||||||
|  |             await asyncio.sleep(site["check_interval"]) | ||||||
|  | 
 | ||||||
|  |     async def produce(self): | ||||||
|  |         """ | ||||||
|  |         Creates and starts an ``aiokafka.AIOKafkaProducer`` and runs a loop that | ||||||
|  |         reads from the ``queue`` and sends the messages to the topic from the | ||||||
|  |         ``config``. | ||||||
|  |         """ | ||||||
|  |         producer = aiokafka.AIOKafkaProducer( | ||||||
|  |             loop=self.loop, | ||||||
|  |             bootstrap_servers=self.config["kafka_servers"]) | ||||||
|  | 
 | ||||||
|  |         await producer.start() | ||||||
|  |         try: | ||||||
|  |             while True: | ||||||
|  |                 status = await self.queue.get() | ||||||
|  |                 msg = bytes(json.dumps(status).encode("utf-8")) | ||||||
|  |                 await producer.send_and_wait(self.config["kafka_topic"], msg) | ||||||
|  |         finally: | ||||||
|  |             await producer.stop() | ||||||
|  | 
 | ||||||
|  |     def tasks(self) -> List[asyncio.Task]: | ||||||
|  |         """ | ||||||
|  |         Creates a task for every site. | ||||||
|  |         """ | ||||||
|  |         def create_task(site) -> asyncio.Task: | ||||||
|  |             return self.loop.create_task(self.create_periodic_task(site)) | ||||||
|  |         tasks = list(map(create_task, self.config["sites"])) | ||||||
|  |         tasks.append(self.loop.create_task(self.produce())) | ||||||
|  |         return tasks | ||||||
							
								
								
									
										59
									
								
								src/webstat/consumer.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								src/webstat/consumer.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,59 @@ | ||||||
|  | """ | ||||||
|  | Sample consumer. | ||||||
|  | """ | ||||||
|  | import asyncio | ||||||
|  | import json | ||||||
|  | from typing import Any, Dict, List | ||||||
|  | 
 | ||||||
|  | import aiokafka #  type: ignore | ||||||
|  | import asyncpg #  type: ignore | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Consumer: | ||||||
|  |     def __init__(self, config: Dict[str, Any], | ||||||
|  |                  event_loop: asyncio.AbstractEventLoop, | ||||||
|  |                  queue: asyncio.Queue): | ||||||
|  |         self.config = config | ||||||
|  |         self.loop = event_loop | ||||||
|  |         self.queue = queue | ||||||
|  | 
 | ||||||
|  |     async def consume(self): | ||||||
|  |         """ | ||||||
|  |         Consumes messages from a Kafka topic. | ||||||
|  |         """ | ||||||
|  |         consumer = aiokafka.AIOKafkaConsumer( | ||||||
|  |             self.config['kafka_topic'], | ||||||
|  |             loop=self.loop, | ||||||
|  |             bootstrap_servers=self.config['kafka_servers']) | ||||||
|  | 
 | ||||||
|  |         await consumer.start() | ||||||
|  |         try: | ||||||
|  |             # Consume messages | ||||||
|  |             async for msg in consumer: | ||||||
|  |                 self.queue.put_nowait(json.loads(msg.value)) | ||||||
|  |         finally: | ||||||
|  |             # Will leave consumer group; perform autocommit if enabled. | ||||||
|  |             await consumer.stop() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     async def save(self, pool, data): | ||||||
|  |         async with pool.acquire() as conn: | ||||||
|  |             async with conn.cursor() as cur: | ||||||
|  |                 await cur.execute("SELECT 1") | ||||||
|  | 
 | ||||||
|  |     async def write(self): | ||||||
|  |         try: | ||||||
|  |             while True: | ||||||
|  |                 status = await self.queue.get() | ||||||
|  |                 print(status) | ||||||
|  |         finally: | ||||||
|  |             print("EXITED!") | ||||||
|  | 
 | ||||||
|  |     def tasks(self) -> List[asyncio.Task]: | ||||||
|  |         """ | ||||||
|  |         Creates tasks for reading from the Kafka topic and writing in | ||||||
|  |         PostgreSQL. | ||||||
|  |         """ | ||||||
|  |         kafka_consumer = self.loop.create_task(self.consume()) | ||||||
|  |         psql_writer = self.loop.create_task(self.write()) | ||||||
|  |         return [kafka_consumer, psql_writer] | ||||||
							
								
								
									
										36
									
								
								tox.ini
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								tox.ini
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,36 @@ | ||||||
|  | [tox] | ||||||
|  | envlist = clean,lint,py3,report | ||||||
|  | 
 | ||||||
|  | [testenv] | ||||||
|  | deps = | ||||||
|  |     mock | ||||||
|  |     pytest | ||||||
|  |     pytest-cov | ||||||
|  |     pytest-mock | ||||||
|  | commands = | ||||||
|  |     pytest --cov=webstat --cov-append --cov-report=term-missing {posargs} | ||||||
|  | 
 | ||||||
|  | [testenv:lint] | ||||||
|  | deps = pylint | ||||||
|  | whitelist_externals = bash | ||||||
|  | commands = | ||||||
|  |     bash -c "pylint --output-format=parseable src/ | tee reports/pylint.out" | ||||||
|  | 
 | ||||||
|  | [testenv:report] | ||||||
|  | deps = coverage | ||||||
|  | skip_install = true | ||||||
|  | commands = | ||||||
|  |     coverage report | ||||||
|  |     coverage html -d reports/htmlcov | ||||||
|  |     coverage xml -o reports/coverage.xml | ||||||
|  | 
 | ||||||
|  | [testenv:docs] | ||||||
|  | changedir = docs | ||||||
|  | deps = sphinx | ||||||
|  | commands = | ||||||
|  |     sphinx-build -W -b html -E ./source/ ./build/ | ||||||
|  | 
 | ||||||
|  | [testenv:clean] | ||||||
|  | deps = coverage | ||||||
|  | skip_install = true | ||||||
|  | commands = coverage erase | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue