From aefdb31631578d076a863d9f8f9d5c07bb24ef78 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Fri, 18 May 2018 13:46:59 +0000 Subject: [PATCH 01/22] pyDKB: introduce common logging system for the whole library. We have individual implementation of logging functionality for some classes in the library, but there are some disadvantages: * same functionality is already re-implemented number of times * ...and must be re-implemented for every module and base class in the library; * hardly extendable to provide convenient interface of 'info()', 'error()', etc. methods; * does not provide filtering by log level (and can hardly be extended to be appliable library-wide); * ... In other words, there are too many things to be done before current implementation can become a sound logging system; and as they done, we might find ourself ending up with something resembling the standard Python library 'logging'. However this library does not provide some functionality we would like to have in our logging system: say, avoiding bare lines (with no identifiers) -- to simplify parsing its log messages on the supervisor side -- or TRACE log level. So here is introduced a wrapper around standard 'logging' module, which allows us to implement what we miss in the original module -- and still use all its advantages. To have logging configuration in the main library file, we have to include 'common' submodule and configure logging before including any other module; and to cope with `pycodestyle` we should wrap imports into `try/except` clause (see E402, module level import not at top of file) Recommended way to use logging: 1) within the library: ``` from pyDKB.common import logging logger = logging.getLogger(__name__) ... logger.error("My error message.") ``` 2) outside the library: ``` from pyDKB.common import logging logger = logging.getLogger("myProgramName") ... logger.error("My error message.") ``` --- Utils/Dataflow/pyDKB/__init__.py | 22 +- Utils/Dataflow/pyDKB/common/__init__.py | 1 + Utils/Dataflow/pyDKB/common/_logging.py | 284 ++++++++++++++++++++++++ Utils/Dataflow/pyDKB/common/hdfs.py | 9 +- Utils/Dataflow/pyDKB/dataflow/cds.py | 13 +- 5 files changed, 319 insertions(+), 10 deletions(-) create mode 100644 Utils/Dataflow/pyDKB/common/_logging.py diff --git a/Utils/Dataflow/pyDKB/__init__.py b/Utils/Dataflow/pyDKB/__init__.py index a821c1740..32eb255a7 100644 --- a/Utils/Dataflow/pyDKB/__init__.py +++ b/Utils/Dataflow/pyDKB/__init__.py @@ -2,9 +2,27 @@ Common library for Data Knowledge Base development. """ -import dataflow -import common +import sys __version__ = "0.3-SNAPSHOT" __all__ = ["dataflow"] + +try: + import common + logMsgFormat = '%(asctime)s: (%(levelname)s) %(message)s' \ + ' (%(name)s)' +# Or, in case of multithreading: +# ' (%(name)s) (%(threadName)s)' + common.logging.configureRootLogger(msg_format=logMsgFormat, + level=common.logging.DEBUG) + logger = common.logging.getLogger(__name__) +except (SyntaxError, ImportError), err: + sys.stderr.write("(ERROR) Failed to import submodule 'common': %s.\n" + % err) + +try: + import dataflow +except (SyntaxError, ImportError), err: + logger.warn("Failed to import submodule 'dataflow'.") + logger.traceback() diff --git a/Utils/Dataflow/pyDKB/common/__init__.py b/Utils/Dataflow/pyDKB/common/__init__.py index 024e2871f..b0b0fe9b7 100644 --- a/Utils/Dataflow/pyDKB/common/__init__.py +++ b/Utils/Dataflow/pyDKB/common/__init__.py @@ -3,6 +3,7 @@ """ from exceptions import * +import _logging as logging import hdfs import json_utils as json from custom_readline import custom_readline diff --git a/Utils/Dataflow/pyDKB/common/_logging.py b/Utils/Dataflow/pyDKB/common/_logging.py new file mode 100644 index 000000000..5122acda5 --- /dev/null +++ b/Utils/Dataflow/pyDKB/common/_logging.py @@ -0,0 +1,284 @@ +""" +pyDKB.common._logging + +A wrapper for standard 'logging' module. +""" + +import logging +from logging import CRITICAL, FATAL, ERROR, WARNING, WARN, INFO, DEBUG, NOTSET +import sys + +# --------------------------------------------------- +# Module variables +# --------------------------------------------------- +# +# Some variables are initialized in 'init()' function. +# +# Logger instance to be used in the module +logger = None +# Root logger for the whole library +_rootLogger = None +# Additional log level +TRACE = DEBUG / 2 +logging.addLevelName(TRACE, 'TRACE') + + +# ------------------------------------------- +# Custom implementation for 'logging' classes +# ------------------------------------------- + + +class Logger(logging.Logger): + """ Logger implementation, aware of 'TRACE' log level. + + New methods: + * trace() -- log with TRACE level; + * traceback() -- log traceback with DEBUG level. + """ + + def trace(self, msg, *args, **kwargs): + """ Log 'msg % args' with severity 'TRACE'. + + To pass exception information, use the keyword argument exc_info with + a true value, e.g. + + logger.trace("Houston, we have a %s", "interesting problem", + exc_info=1) + """ + if self.isEnabledFor(TRACE): + self._log(TRACE, msg, args, **kwargs) + + def traceback(self, *args, **kwargs): + """ Log traceback without additionat messages with severity 'DEBUG'. + + logger.traceback() + """ + if self.isEnabledFor(DEBUG): + kwargs['exc_info'] = 1 + self.debug('Traceback info:', *args, **kwargs) + + +class MultilineFormatter(logging.Formatter): + """ Formatter for multiline messages. + + Every extra line (directly in the message body, or the traceback) + is: + * prefixed with ' (==)'; + * suffixed with the part of format string which goes after + '%(message)s' part. + """ + + def getSuffix(self): + """ Return format suffix (everything that goes after msg body). """ + suffix = '' + fmt = self._fmt + splitted = fmt.split("%(message)s") + if len(splitted) > 1: + suffix = splitted[-1] + return suffix + + def formatExtra(self, lines, suffix=None, prefix=" (==) "): + """ Format extra lines of the log message (traceback, ...). """ + if suffix is None: + suffix = self.getSuffix() + if isinstance(lines, list) and len(lines): + extra = prefix + lines[0] + suffix + for line in lines[1:]: + extra += "\n" + prefix + line + suffix + else: + extra = "" + return extra + + def formatException(self, ei): + """ Format traceback as extra lines. """ + s = super(MultilineFormatter, self).formatException(ei) + lines = s.splitlines() + exc_text = self.formatExtra(lines) + return exc_text + + def format(self, record): + """ Format multiline message. + + Second and further lines from initial message are formatted + 'extra' lines. + """ + lines = record.msg.splitlines() + msg = lines[0] if lines else '' + extra = self.formatExtra(lines[1:]) + if extra and extra[:1] != '\n': + extra = '\n' + extra + record.msg = msg + formatted = super(MultilineFormatter, self).format(record) + lines = formatted.splitlines() + msg = lines[0] if lines else '' + if len(lines) > 1: + extra += '\n' + # Need to expand suffixes of extra lines (missed parent`s + # 'format()' operation). + result = (msg + extra + '\n'.join(lines[1:])) % record.__dict__ + return result + + +# -------------------------- +# Module top-level functions +# -------------------------- + + +def isInitialized(): + """ Checks if the module (namely, root logger) is initialized. """ + return isinstance(_rootLogger, Logger) and _rootLogger.handlers + + +def getRootLogger(**kwargs): + """ Reconfigure and return library root logger. """ + if kwargs or not isInitialized(): + configureRootLogger(**kwargs) + return _rootLogger + + +def init(**kwargs): + """ Initialize module. + + If already initialized, do nothing. + """ + global logger + if not isInitialized(): + initRootLogger(**kwargs) + logger = getLogger(__name__) + + +def initRootLogger(**kwargs): + """ Initialize root logger. + + If already initialized, do nothing. + """ + global _rootLogger + + if isInitialized(): + return _rootLogger + + name = kwargs.get('name') + if not name: + name = __name__.split('.')[0] + root = logging.getLogger(name) + # Cast new root to our custom class + root.__class__ = Logger + # Separate new 'root' from logging.root (to avoid possible interfere + # to/from any other usage of logging module): + # 1) prevent log record propagation to parent Handlers, if any; + root.propagate = 0 + # 2) create new manager (object, responsible for new loggers creation); + manager = logging.Manager(root) + manager.setLoggerClass(Logger) + Logger.manager = manager + root.manager = Logger.manager + # 3) reset root logger for our class. + Logger.root = root + + filename = kwargs.get('filename') + if filename: + mode = kwargs.get('mode', 'a') + hdlr_name = "file_%s_%s" % (filename, mode) + # Open file without delay, as presence of 'filename' + # keyword supposes that function was called during + # intended module initialization, not as a side effect + # of calling 'getLogger()' before initializtion. + hdlr = FileHandler(filename, mode) + else: + stream = kwargs.get('stream', sys.stderr) + hdlr_name = "stream_%s" % stream.fileno() + hdlr = logging.StreamHandler(stream) + + hdlr.set_name(hdlr_name) + + fs = kwargs.get('msg_format', logging.BASIC_FORMAT) + dfs = kwargs.get('datefmt', None) + fmt = MultilineFormatter(fs, dfs) + hdlr.setFormatter(fmt) + root.addHandler(hdlr) + + level = kwargs.get('level') + if level is not None: + root.setLevel(level) + _rootLogger = root + + return _rootLogger + + +def configureRootLogger(**kwargs): + """ (Re)configure root logger. """ + if not isInitialized(): + return init(**kwargs) + + root = _rootLogger + name = kwargs.get('name') + if name and name != root.name: + # Renaming is not allowed, + # as logger name is the hierarchy-forming parameter + logger.warning("Root logger ('%s') can not be renamed to '%s'.", + root.name, name) + + # Root logger is supposed to have exactly one handler, + # so we count on this fact here + old_hdlr = root.handlers[0] + old_fmt = old_hdlr.formatter + + filename = kwargs.get('filename') + stream = kwargs.get('stream', sys.stderr) + if filename: + mode = kwargs.get('mode', 'a') + hdlr_name = "file_%s_%s" % (filename, mode) + # Delay allows us not to open file right now, + # but only when it is actually required + hdlr = FileHandler(filename, mode, delay=True) + elif stream: + hdlr_name = "stream_%s" % stream.fileno() + hdlr = logging.StreamHandler(stream) + else: + hdlr = old_hdlr + + if not hdlr.get_name(): + hdlr.set_name(hdlr_name) + + # Remove old handler or go on with it instead of the newly created one + # (if it is configured just the same way). + if old_hdlr != hdlr: + if old_hdlr.get_name() != hdlr.get_name(): + old_hdlr.close() + root.removeHandler(old_hdlr) + root.addHandler(hdlr) + else: + hdlr.close() + hdlr = old_hdlr + + fs = kwargs.get("msg_format") + dfs = kwargs.get("datefmt") + + # Check if handler formatter was configured earlier + # and use old values if no new specified + if isinstance(old_fmt, logging.Formatter): + if not fs: + fs = old_fmt._fmt + if not dfs: + dfs = old_fmt.datefmt + elif not fs: + fs = logging.BASIC_FORMAT + + fmt = MultilineFormatter(fs, dfs) + hdlr.setFormatter(fmt) + + level = kwargs.get("level") + if level is not None: + root.setLevel(level) + + return root + + +def getLogger(name): + """ Return logger with given name. """ + if not isInitialized(): + init() + root = _rootLogger + if name == root.name: + return root + return root.getChild(name) diff --git a/Utils/Dataflow/pyDKB/common/hdfs.py b/Utils/Dataflow/pyDKB/common/hdfs.py index f8aa18eb2..74c8c4607 100644 --- a/Utils/Dataflow/pyDKB/common/hdfs.py +++ b/Utils/Dataflow/pyDKB/common/hdfs.py @@ -10,10 +10,13 @@ import tempfile from . import HDFSException +from . import logging DEVNULL = open(os.path.devnull, "w") DKB_HOME = "/user/DKB/" +logger = logging.getLogger(__name__) + def check_stderr(proc, timeout=None, max_lines=1): """ Wait till the end of the subprocess and send its STDERR to STDERR. @@ -33,7 +36,7 @@ def check_stderr(proc, timeout=None, max_lines=1): if err: n_lines += 1 if max_lines is None or n_lines <= max_lines: - sys.stderr.write("(INFO) (proc) %s\n" % err) + logger.info("(proc) %s" % err) if proc.poll(): raise subprocess.CalledProcessError(proc.returncode, None) return proc.poll() @@ -78,8 +81,8 @@ def movefile(fname, dest): try: os.remove(fname) except OSError, err: - sys.stderr.write("(WARN) Failed to remove local copy of HDFS file" - " (%s): %s" % (fname, err)) + logger.warn("Failed to remove local copy of HDFS file" + " (%s): %s", fname, err) def getfile(fname): diff --git a/Utils/Dataflow/pyDKB/dataflow/cds.py b/Utils/Dataflow/pyDKB/dataflow/cds.py index 342eae719..2611db73a 100644 --- a/Utils/Dataflow/pyDKB/dataflow/cds.py +++ b/Utils/Dataflow/pyDKB/dataflow/cds.py @@ -6,6 +6,9 @@ import signal import os +from pyDKB.common import logging + +logger = logging.getLogger(__name__) __all__ = ["CDSInvenioConnector", "KerberizedCDSInvenioConnector"] @@ -19,7 +22,7 @@ from invenio_client.contrib import cds import splinter except ImportError, e: - sys.stderr.write("(WARN) %s failed (%s)\n" % (__name__, e)) + logger.warn("%s failed (%s)", __name__, e) __all__ = [] else: @@ -80,9 +83,9 @@ def __init__(self, login="user", password="password"): try: kerberos except NameError: - sys.stderr.write("(ERROR) Kerberos Python package is not" - " installed. Can't proceed with Kerberos" - " authorization.\n") + logger.error("Kerberos Python package is not" + " installed. Can't proceed with Kerberos" + " authorization.") sys.exit(4) super(KerberizedCDSInvenioConnector, self).__init__("user", @@ -105,5 +108,5 @@ def _init_browser(self): self.browser.find_link_by_partial_text("Sign in").click() except kerberos.GSSError, e: - sys.stderr.write("(ERROR) %s\n" % str(e)) + logger.error(str(e)) sys.exit(3) From 4434508f6fc7e876693323c20060d632291de333 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Fri, 6 Jul 2018 12:04:44 +0000 Subject: [PATCH 02/22] pyDKB/logging: add suffix alignment for traceback messages. Looks this way: ``` 2018-07-06 11:52:58,540: (DEBUG) Traceback info: (pyDKB) (==) Traceback (most recent call last): (pyDKB) (==) File "pyDKB/__init__.py", line 24, in (pyDKB) (==) import dataflow (pyDKB) (==) File "pyDKB/dataflow/__init__.py", line 6, in (pyDKB) (==) from dkbID import dkbIDs (pyDKB) (==) ImportError: cannot import name dkbIDs (pyDKB) ``` ...instead of: ``` 2018-07-06 11:52:58,540: (DEBUG) Traceback info: (pyDKB) (==) Traceback (most recent call last): (pyDKB) (==) File "pyDKB/__init__.py", line 24, in (pyDKB) (==) import dataflow (pyDKB) (==) File "pyDKB/dataflow/__init__.py", line 6, in (pyDKB) (==) from dkbID import dkbIDs (pyDKB) (==) ImportError: cannot import name dkbIDs (pyDKB) ``` --- Utils/Dataflow/pyDKB/common/_logging.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/_logging.py b/Utils/Dataflow/pyDKB/common/_logging.py index 5122acda5..355410f77 100644 --- a/Utils/Dataflow/pyDKB/common/_logging.py +++ b/Utils/Dataflow/pyDKB/common/_logging.py @@ -77,14 +77,28 @@ def getSuffix(self): suffix = splitted[-1] return suffix - def formatExtra(self, lines, suffix=None, prefix=" (==) "): - """ Format extra lines of the log message (traceback, ...). """ + def formatExtra(self, lines, suffix=None, prefix=" (==) ", align=False): + """ Format extra lines of the log message (traceback, ...). + + Parameter 'align' shows whether the suffix should be aligned + to the right (by the longest line), or to the left (as for normal + log messages). + """ if suffix is None: suffix = self.getSuffix() if isinstance(lines, list) and len(lines): - extra = prefix + lines[0] + suffix + max_len = len(max(lines, key=len)) + if align: + suff = ' ' * (max_len - len(lines[0])) + suffix + else: + suff = suffix + extra = prefix + lines[0] + suff for line in lines[1:]: - extra += "\n" + prefix + line + suffix + if align: + suff = ' ' * (max_len - len(line)) + suffix + else: + suff = suffix + extra += "\n" + prefix + line + suff else: extra = "" return extra @@ -93,7 +107,7 @@ def formatException(self, ei): """ Format traceback as extra lines. """ s = super(MultilineFormatter, self).formatException(ei) lines = s.splitlines() - exc_text = self.formatExtra(lines) + exc_text = self.formatExtra(lines, align=True) return exc_text def format(self, record): From 073f80add93d44ebc6d879d5c8e03b040801a3a7 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Fri, 6 Jul 2018 12:08:22 +0000 Subject: [PATCH 03/22] pyDKB: fix logic of submodule import. For now we have two main submodules: 'common' and 'dataflow'. If we failed to import 'common', most likely 'dataflow' functionality is broken. If we failed to import 'dataflow', there`s very little can be done without it -- thus it is better to fail immediatly. If one day we need to use submodules independently, it is possible to: * not to import them in library init file OR * not to raise ImportError, leaving only warning message. --- Utils/Dataflow/pyDKB/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Utils/Dataflow/pyDKB/__init__.py b/Utils/Dataflow/pyDKB/__init__.py index 32eb255a7..9370a27ac 100644 --- a/Utils/Dataflow/pyDKB/__init__.py +++ b/Utils/Dataflow/pyDKB/__init__.py @@ -18,11 +18,11 @@ level=common.logging.DEBUG) logger = common.logging.getLogger(__name__) except (SyntaxError, ImportError), err: - sys.stderr.write("(ERROR) Failed to import submodule 'common': %s.\n" - % err) + raise ImportError("%s (in submodule 'common')" % err) try: import dataflow except (SyntaxError, ImportError), err: - logger.warn("Failed to import submodule 'dataflow'.") + logger.error("Failed to import submodule 'dataflow'") logger.traceback() + raise ImportError("%s (in submodule 'dataflow')" % err) From 27f893bed8efe996346925b35c0fa671fe7b6aac Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Fri, 6 Jul 2018 12:16:03 +0000 Subject: [PATCH 04/22] pyDKB: set default log level to WARN. --- Utils/Dataflow/pyDKB/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/__init__.py b/Utils/Dataflow/pyDKB/__init__.py index 9370a27ac..e9d89225c 100644 --- a/Utils/Dataflow/pyDKB/__init__.py +++ b/Utils/Dataflow/pyDKB/__init__.py @@ -15,7 +15,7 @@ # Or, in case of multithreading: # ' (%(name)s) (%(threadName)s)' common.logging.configureRootLogger(msg_format=logMsgFormat, - level=common.logging.DEBUG) + level=common.logging.WARN) logger = common.logging.getLogger(__name__) except (SyntaxError, ImportError), err: raise ImportError("%s (in submodule 'common')" % err) From 83d3af1bcb01e84329c269ee8625cca12f7b9fdd Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 21 May 2018 08:26:39 +0000 Subject: [PATCH 05/22] pyDKB/*/AbstractStage: simplify 'argparse' import error handling. As we have `try/except` clause in the library init file, we need not check it whenever it is imported. --- Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py index 9815fd6e1..3e53df1fa 100644 --- a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py +++ b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py @@ -7,15 +7,10 @@ import ConfigParser from collections import defaultdict import textwrap +import argparse from . import logLevel -try: - import argparse -except ImportError, e: - sys.stderr.write("(ERROR) argparse package is not installed.\n") - raise e - class AbstractStage(object): """ From cfc98c39042325e6cfea5ce0354530f88fcb6d20 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 9 Jul 2018 09:14:14 +0000 Subject: [PATCH 06/22] pyDKB/logging: improve 'exc_info' usage in 'logger.traceback()'. As Logger methods can work with tuple value of exc_info, it sounds like a good idea to use this possibility through `traceback()` method as well. In previous version any passed value would be replaced with '1' and thus missed (with `sys.exc_info()` used instead). --- Utils/Dataflow/pyDKB/common/_logging.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/common/_logging.py b/Utils/Dataflow/pyDKB/common/_logging.py index 355410f77..7e7bc626d 100644 --- a/Utils/Dataflow/pyDKB/common/_logging.py +++ b/Utils/Dataflow/pyDKB/common/_logging.py @@ -54,7 +54,8 @@ def traceback(self, *args, **kwargs): logger.traceback() """ if self.isEnabledFor(DEBUG): - kwargs['exc_info'] = 1 + if not (kwargs.get('exc_info')): + kwargs['exc_info'] = 1 self.debug('Traceback info:', *args, **kwargs) From 9706bf0298fa71a9c624bf2123dfe15428ed3c7b Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 9 Jul 2018 09:28:02 +0000 Subject: [PATCH 07/22] pyDKB/*/AbstracStage: fix/improve `set_error()` method description. --- Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py index 3e53df1fa..ba7cd4a37 100644 --- a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py +++ b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py @@ -199,7 +199,7 @@ def print_usage(self, fd=sys.stderr): self.__parser.print_usage(fd) def set_error(self, err_type, err_val, err_trace): - """ Set object `_err` variable from the last error info. """ + """ Set `_error` attribute from the passed error info. """ self._error = {'etype': err_type, 'exception': err_val, 'trace': err_trace} From c159e92a7e04b66bb07cca4f61b0a877a8bfcabd Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 9 Jul 2018 09:29:32 +0000 Subject: [PATCH 08/22] pyDKB/*/stage: embrace new logging functionality. Changes: * log messages are now marked with module name (not class name); * datetime in log messages; * inheriting class should rewrite `logger` property at init (to have proper module info in log messages). --- .../pyDKB/dataflow/stage/AbstractStage.py | 29 +++++-------------- .../pyDKB/dataflow/stage/ProcessorStage.py | 10 ++++--- .../Dataflow/pyDKB/dataflow/stage/__init__.py | 1 - 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py index ba7cd4a37..2a299ceda 100644 --- a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py +++ b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py @@ -9,7 +9,7 @@ import textwrap import argparse -from . import logLevel +from pyDKB.common import logging class AbstractStage(object): @@ -28,6 +28,8 @@ class AbstractStage(object): CONFIG """ + logger = logging.getLogger(__name__) + def __init__(self, description="DKB Dataflow stage"): """ Initialize the stage @@ -65,23 +67,9 @@ def __init__(self, description="DKB Dataflow stage"): self._error = None - def log(self, message, level=logLevel.INFO): + def log(self, message, level=logging.INFO): """ Output log message with given log level. """ - if not logLevel.hasMember(level): - self.log("Unknown log level: %s" % level, logLevel.WARN) - level = logLevel.INFO - if type(message) == list: - lines = message - else: - lines = message.splitlines() - if lines: - out_message = "(%s) (%s) %s" % (logLevel.memberName(level), - self.__class__.__name__, - lines[0]) - for l in lines[1:]: - out_message += "\n(==) %s" % l - out_message += "\n" - sys.stderr.write(out_message) + self.logger.log(level, message) def defaultArguments(self): """ Config argument parser with parameters common for all stages. """ @@ -132,7 +120,7 @@ def parse_args(self, args): if self.ARGS.eom is None: self.ARGS.eom = '\n' elif self.ARGS.eom == '': - self.log("Empty EOM marker specified!", logLevel.WARN) + self.log("Empty EOM marker specified!", logging.WARN) else: try: self.ARGS.eom = self.ARGS.eom.decode('string_escape') @@ -213,13 +201,12 @@ def output_error(self, message=None, exc_info=None): if not message and exc_info: message = str(exc_info[1]) if message: - self.log(message, logLevel.ERROR) + self.log(message, logging.ERROR) if exc_info: if exc_info[0] == KeyboardInterrupt: self.log("Interrupted by user.") else: - trace = traceback.format_exception(*exc_info) - self.log(''.join(trace), logLevel.DEBUG) + self.logger.traceback(exc_info=exc_info) def stop(self): """ Stop running processes and output error information. """ diff --git a/Utils/Dataflow/pyDKB/dataflow/stage/ProcessorStage.py b/Utils/Dataflow/pyDKB/dataflow/stage/ProcessorStage.py index bdd10c2e4..9c4086ce0 100644 --- a/Utils/Dataflow/pyDKB/dataflow/stage/ProcessorStage.py +++ b/Utils/Dataflow/pyDKB/dataflow/stage/ProcessorStage.py @@ -44,10 +44,10 @@ from . import AbstractStage from . import messageType -from . import logLevel from pyDKB.dataflow import DataflowException from pyDKB.common import hdfs from pyDKB.common import custom_readline +from pyDKB.common import logging from pyDKB.dataflow import communication from pyDKB.dataflow.communication import stream from pyDKB.dataflow.communication import consumer @@ -72,6 +72,8 @@ class ProcessorStage(AbstractStage): __stoppable """ + logger = logging.getLogger(__name__) + __input_message_type = None __output_message_type = None @@ -204,7 +206,7 @@ def get_out_stream(self): try: fd = self.__output.next() except DataflowException, err: - self.log(str(err), logLevel.ERROR) + self.log(str(err), logging.ERROR) raise DataflowException("Failed to configure output stream.") if not self._out_stream: self._out_stream = stream.StreamBuilder(fd, vars(self.ARGS)) \ @@ -257,13 +259,13 @@ def stop(self): p.close() except AttributeError, e: self.log("Close method is not defined for %s." % p, - logLevel.WARN) + logging.WARN) except Exception, e: failures.append((p, sys.exc_info())) if failures: for f in failures: self.log("Failed to stop %s: %s" % (f[0], f[1][1]), - logLevel.ERROR) + logging.ERROR) self.output_error(exc_info=f[1]) @staticmethod diff --git a/Utils/Dataflow/pyDKB/dataflow/stage/__init__.py b/Utils/Dataflow/pyDKB/dataflow/stage/__init__.py index 611ef0cd5..5c5c799f6 100644 --- a/Utils/Dataflow/pyDKB/dataflow/stage/__init__.py +++ b/Utils/Dataflow/pyDKB/dataflow/stage/__init__.py @@ -3,7 +3,6 @@ """ from .. import messageType -from .. import logLevel from AbstractStage import AbstractStage from ProcessorStage import ProcessorStage From b2a8a8851552c8bc4f162ce981e384b965a3276f Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 9 Jul 2018 10:29:41 +0000 Subject: [PATCH 09/22] pyDKB/*/communication: embrace new logging functionality (consumer). --- .../communication/consumer/Consumer.py | 23 +++++-------------- .../communication/consumer/FileConsumer.py | 7 ++++-- .../communication/consumer/HDFSConsumer.py | 5 +++- .../communication/consumer/StreamConsumer.py | 5 +++- .../communication/consumer/__init__.py | 5 +++- 5 files changed, 23 insertions(+), 22 deletions(-) diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/Consumer.py b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/Consumer.py index 5abd3dcee..84c3bec21 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/Consumer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/Consumer.py @@ -6,12 +6,13 @@ from collections import defaultdict from . import messageType -from . import logLevel from . import DataflowException from .. import Message from ..stream import StreamBuilder +from pyDKB.common import logging + class ConsumerException(DataflowException): """ Dataflow Consumer exception. """ @@ -21,6 +22,8 @@ class ConsumerException(DataflowException): class Consumer(object): """ Data consumer implementation. """ + logger = logging.getLogger(__name__) + config = None message_type = None @@ -32,23 +35,9 @@ def __init__(self, config={}): self.config = config self.reconfigure() - def log(self, message, level=logLevel.INFO): + def log(self, message, level=logging.INFO): """ Output log message with given log level. """ - if not logLevel.hasMember(level): - self.log("Unknown log level: %s" % level, logLevel.WARN) - level = logLevel.INFO - if type(message) == list: - lines = message - else: - lines = message.splitlines() - if lines: - out_message = "(%s) (%s) %s" % (logLevel.memberName(level), - self.__class__.__name__, - lines[0]) - for l in lines[1:]: - out_message += "\n(==) %s" % l - out_message += "\n" - sys.stderr.write(out_message) + self.logger.log(level, message) def __iter__(self): """ Initialize iteration. """ diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/FileConsumer.py b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/FileConsumer.py index d014a9679..919205d1b 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/FileConsumer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/FileConsumer.py @@ -15,13 +15,16 @@ import Consumer from . import DataflowException -from . import logLevel from .. import Message +from pyDKB.common import logging + class FileConsumer(Consumer.Consumer): """ Data consumer implementation for HDFS data source. """ + logger = logging.getLogger(__name__) + # Current file current_file = None @@ -93,7 +96,7 @@ def _filenames(self): files = self._filenames_from_dir(self.config['input_dir']) else: self.log("No input files configured; reading filenames from" - " STDIN.", logLevel.WARN) + " STDIN.", logging.WARN) files = self._filenames_from_stdin() return files diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/HDFSConsumer.py b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/HDFSConsumer.py index f5e049d48..cb45fa490 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/HDFSConsumer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/HDFSConsumer.py @@ -7,14 +7,17 @@ import FileConsumer import Consumer from . import DataflowException + from pyDKB.common import hdfs from pyDKB.common import HDFSException -from . import logLevel +from pyDKB.common import logging class HDFSConsumer(FileConsumer.FileConsumer): """ Data consumer implementation for HDFS data source. """ + logger = logging.getLogger(__name__) + # Override def reconfigure(self, config={}): """ Configure HDFS Consumer according to the config parameters. """ diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/StreamConsumer.py b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/StreamConsumer.py index 752f576c2..c927021bf 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/StreamConsumer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/StreamConsumer.py @@ -12,12 +12,15 @@ import Consumer from . import DataflowException -from . import logLevel + +from pyDKB.common import logging class StreamConsumer(Consumer.Consumer): """ Data consumer implementation for Stream data source. """ + logger = logging.getLogger(__name__) + fd = None # Override diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/__init__.py b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/__init__.py index 4a19d219f..aa79c3851 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/__init__.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/__init__.py @@ -3,7 +3,6 @@ """ from .. import messageType -from .. import logLevel from .. import DataflowException from Consumer import Consumer @@ -11,12 +10,16 @@ from HDFSConsumer import HDFSConsumer from StreamConsumer import StreamConsumer +from pyDKB.common import logging + __all__ = ['ConsumerBuilder'] class ConsumerBuilder(object): """ Constructor for Consumer instance. """ + logger = logging.getLogger(__name__) + consumerClass = None def __init__(self, config={}): From 1d94478555426c614b9fe4f4d9e030a572abfbcc Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 9 Jul 2018 10:34:17 +0000 Subject: [PATCH 10/22] pyDKB/*/communication: embrace new logging functionality (producer). --- .../communication/producer/FileProducer.py | 7 ++++-- .../communication/producer/HDFSProducer.py | 5 ++++- .../communication/producer/Producer.py | 22 +++++-------------- .../communication/producer/StreamProducer.py | 5 ++++- .../communication/producer/__init__.py | 5 ++++- 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/producer/FileProducer.py b/Utils/Dataflow/pyDKB/dataflow/communication/producer/FileProducer.py index c82d049c1..16a262b0f 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/producer/FileProducer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/producer/FileProducer.py @@ -13,12 +13,15 @@ import time from Producer import Producer, ProducerException -from . import logLevel + +from pyDKB.common import logging class FileProducer(Producer): """ Data producer implementation for local file data dest. """ + logger = logging.getLogger(__name__) + _dir = None _default_dir = None current_file = None @@ -119,7 +122,7 @@ def ensure_dir(self): os.makedirs(path, 0770) except OSError, err: self.log("Failed to create output directory\n" - "Error message: %s\n" % err, logLevel.ERROR) + "Error message: %s\n" % err, logging.ERROR) raise ProducerException return path diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/producer/HDFSProducer.py b/Utils/Dataflow/pyDKB/dataflow/communication/producer/HDFSProducer.py index f0e7085e2..388d61214 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/producer/HDFSProducer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/producer/HDFSProducer.py @@ -13,13 +13,16 @@ from Producer import ProducerException from FileProducer import FileProducer -from . import logLevel + from pyDKB.common import hdfs +from pyDKB.common import logging class HDFSProducer(FileProducer): """ Data producer implementation for HDFS data dest. """ + logger = logging.getLogger(__name__) + def config_dir(self, config={}): """ Configure output directory. """ if not config: diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/producer/Producer.py b/Utils/Dataflow/pyDKB/dataflow/communication/producer/Producer.py index bd1927a62..3bf951726 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/producer/Producer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/producer/Producer.py @@ -5,12 +5,13 @@ import sys from . import messageType -from . import logLevel from . import DataflowException from .. import Message from ..stream import StreamBuilder +from pyDKB.common import logging + class ProducerException(DataflowException): """ Dataflow Producer exception. """ @@ -20,6 +21,8 @@ class ProducerException(DataflowException): class Producer(object): """ Data producer implementation. """ + logger = logging.getLogger(__name__) + config = None message_type = None @@ -31,22 +34,9 @@ def __init__(self, config={}): self.config = config self.reconfigure() - def log(self, message, level=logLevel.INFO): + def log(self, message, level=logging.INFO): """ Output log message with given log level. """ - if not logLevel.hasMember(level): - self.log("Unknown log level: %s" % level, logLevel.WARN) - level = logLevel.INFO - if type(message) == list: - lines = message - else: - lines = message.splitlines() - if lines: - out_message = "(%s) (%s) %s" % (logLevel.memberName(level), - self.__class__.__name__, lines[0]) - for l in lines[1:]: - out_message += "\n(==) %s" % l - out_message += "\n" - sys.stderr.write(out_message) + self.logger.log(level, message) def reconfigure(self, config={}): """ (Re)initialize producer with stage config arguments. """ diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/producer/StreamProducer.py b/Utils/Dataflow/pyDKB/dataflow/communication/producer/StreamProducer.py index dfb02baf5..e889a5c45 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/producer/StreamProducer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/producer/StreamProducer.py @@ -11,12 +11,15 @@ from Producer import Producer from . import DataflowException -from . import logLevel + +from pyDKB.common import logging class StreamProducer(Producer): """ Data producer implementation for Stream data dest. """ + logger = logging.getLogger(__name__) + fd = None # Override diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/producer/__init__.py b/Utils/Dataflow/pyDKB/dataflow/communication/producer/__init__.py index ae404d333..a7d212063 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/producer/__init__.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/producer/__init__.py @@ -3,7 +3,6 @@ """ from .. import messageType -from .. import logLevel from .. import DataflowException from Producer import Producer @@ -11,12 +10,16 @@ from HDFSProducer import HDFSProducer from StreamProducer import StreamProducer +from pyDKB.common import logging + __all__ = ['ProducerBuilder'] class ProducerBuilder(object): """ Constructor for Producer instance. """ + logger = logging.getLogger(__name__) + producerClass = None message_type = None src_info = None From 38f421b959b3c30280639f078b31f9f139480c9d Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 9 Jul 2018 10:36:46 +0000 Subject: [PATCH 11/22] pyDKB/*/communication: embrace new logging functionality (stream). --- .../communication/stream/InputStream.py | 7 ++++-- .../communication/stream/OutputStream.py | 5 +++- .../dataflow/communication/stream/Stream.py | 23 +++++-------------- .../dataflow/communication/stream/__init__.py | 5 +++- 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/stream/InputStream.py b/Utils/Dataflow/pyDKB/dataflow/communication/stream/InputStream.py index f6e4ee0bb..4912f199a 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/stream/InputStream.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/stream/InputStream.py @@ -4,14 +4,17 @@ from Stream import Stream from . import messageType -from . import logLevel from . import Message + from pyDKB.common import custom_readline +from pyDKB.common import logging class InputStream(Stream): """ Implementation of the input stream. """ + logger = logging.getLogger(__name__) + __iterator = None def __iter__(self): @@ -58,7 +61,7 @@ def parse_message(self, message): "Cause: %s\n" "Original message: '%s'" % (messageClass.typeName(), err, message), - logLevel.WARN) + logging.WARN) return False def get_message(self): diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/stream/OutputStream.py b/Utils/Dataflow/pyDKB/dataflow/communication/stream/OutputStream.py index 8411a5a39..03ab8ec21 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/stream/OutputStream.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/stream/OutputStream.py @@ -4,14 +4,17 @@ from Stream import Stream from . import messageType -from . import logLevel from . import Message + from pyDKB.common import custom_readline +from pyDKB.common import logging class OutputStream(Stream): """ Implementation of the output stream. """ + logger = logging.getLogger(__name__) + msg_buffer = [] def configure(self, config={}): diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/stream/Stream.py b/Utils/Dataflow/pyDKB/dataflow/communication/stream/Stream.py index 4ca501104..e6b94e19c 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/stream/Stream.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/stream/Stream.py @@ -5,13 +5,16 @@ import sys from . import messageType -from . import logLevel from . import Message +from pyDKB.common import logging + class Stream(object): """ Abstract class for input/output streams. """ + logger = logging.getLogger(__name__) + message_type = None _fd = None @@ -20,23 +23,9 @@ def __init__(self, fd=None, config={}): self.reset(fd) self.configure(config) - def log(self, message, level=logLevel.INFO): + def log(self, message, level=logging.INFO): """ Output log message with given log level. """ - if not logLevel.hasMember(level): - self.log("Unknown log level: %s" % level, logLevel.WARN) - level = logLevel.INFO - if type(message) == list: - lines = message - else: - lines = message.splitlines() - if lines: - out_message = "(%s) (%s) %s" % (logLevel.memberName(level), - self.__class__.__name__, - lines[0]) - for l in lines[1:]: - out_message += "\n(==) %s" % l - out_message += "\n" - sys.stderr.write(out_message) + self.logger.log(level, message) def configure(self, config): """ Stream configuration. """ diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/stream/__init__.py b/Utils/Dataflow/pyDKB/dataflow/communication/stream/__init__.py index f648d3184..2ef5ef1ce 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/stream/__init__.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/stream/__init__.py @@ -4,7 +4,6 @@ from .. import messageType from .. import codeType -from .. import logLevel from .. import DataflowException from .. import Message from InputStream import InputStream @@ -13,6 +12,8 @@ from InputStream import InputStream from OutputStream import OutputStream +from pyDKB.common import logging + __all__ = ['StreamBuilder', 'StreamException', 'Stream', 'InputStream', 'OutputStream'] @@ -25,6 +26,8 @@ class StreamException(DataflowException): class StreamBuilder(object): """ Constructor for Stream object. """ + logger = logging.getLogger(__name__) + message_type = None streamClass = None From b8d393d818ea12b02c9735c556ce7210000b0698 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 9 Jul 2018 10:40:27 +0000 Subject: [PATCH 12/22] pyDKB/*/communication: embrace new logging functionality (messages). As `logLevel` is no longer used, it is removed from the library. --- .../Dataflow/pyDKB/dataflow/communication/__init__.py | 1 - .../Dataflow/pyDKB/dataflow/communication/messages.py | 11 ++++++++--- Utils/Dataflow/pyDKB/dataflow/types.py | 3 +-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/__init__.py b/Utils/Dataflow/pyDKB/dataflow/communication/__init__.py index 014963cdc..ef2908394 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/__init__.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/__init__.py @@ -4,7 +4,6 @@ from .. import messageType from .. import codeType -from .. import logLevel from .. import DataflowException from messages import Message diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/messages.py b/Utils/Dataflow/pyDKB/dataflow/communication/messages.py index 860a94709..20ea0bf1f 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/messages.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/messages.py @@ -7,9 +7,13 @@ from . import messageType from . import codeType +from pyDKB.common import logging + import json import sys +logger = logging.getLogger(__name__) + __message_class = {} @@ -35,9 +39,8 @@ def Message(msg_type): raise ValueError("Message type must be a member of messageType") cls = __message_class.get(msg_type) if not cls: - sys.stderr.write( - "(WARN) Message class for type %s is not implemented. " - "Using AbstractMessage instead.") + logger.warn("Message class for type %s is not implemented. " + "Using AbstractMessage instead.") cls = AbstractMessage return cls @@ -46,6 +49,8 @@ def Message(msg_type): class AbstractMessage(object): """ Abstract message """ + logger = logging.getLogger("%s.AbstractMessage" % __name__) + msg_type = None native_types = [] diff --git a/Utils/Dataflow/pyDKB/dataflow/types.py b/Utils/Dataflow/pyDKB/dataflow/types.py index ecc1f790a..b21e0db9a 100644 --- a/Utils/Dataflow/pyDKB/dataflow/types.py +++ b/Utils/Dataflow/pyDKB/dataflow/types.py @@ -4,9 +4,8 @@ from ..common import Type -__all__ = ["dataType", "messageType", "codeType", "logLevel"] +__all__ = ["dataType", "messageType", "codeType"] dataType = Type("DOCUMENT", "AUTHOR", "DATASET") messageType = Type("STRING", "JSON", "TTL") codeType = Type("STRING") -logLevel = Type("TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL") From 9a0e504da6fac5713e6cc9f6fd8045387f9b2d25 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 9 Jul 2018 10:42:14 +0000 Subject: [PATCH 13/22] pyDKB/*/communication: fix log message (missed value for `'%s'`). --- Utils/Dataflow/pyDKB/dataflow/communication/messages.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/messages.py b/Utils/Dataflow/pyDKB/dataflow/communication/messages.py index 20ea0bf1f..e27b80a1e 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/messages.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/messages.py @@ -40,7 +40,8 @@ def Message(msg_type): cls = __message_class.get(msg_type) if not cls: logger.warn("Message class for type %s is not implemented. " - "Using AbstractMessage instead.") + "Using AbstractMessage instead." + % messageType.memberName(msg_type)) cls = AbstractMessage return cls From a5c118fb719cb49d7e49e551649ba08214879ef6 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 9 Jul 2018 10:47:01 +0000 Subject: [PATCH 14/22] kafka: fix indentation in ExternalProcessorLogger. --- .../kiae/dkb/kafka/common/external/ExternalProcessLogger.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Utils/Dataflow/000_kafka/ru/kiae/dkb/kafka/common/external/ExternalProcessLogger.java b/Utils/Dataflow/000_kafka/ru/kiae/dkb/kafka/common/external/ExternalProcessLogger.java index 009a15819..2f948a3ec 100644 --- a/Utils/Dataflow/000_kafka/ru/kiae/dkb/kafka/common/external/ExternalProcessLogger.java +++ b/Utils/Dataflow/000_kafka/ru/kiae/dkb/kafka/common/external/ExternalProcessLogger.java @@ -27,9 +27,9 @@ public class ExternalProcessLogger implements Runnable { private BufferedReader STDERR; + private Pattern lmt_p = Pattern.compile("\\(?(TRACE|DEBUG|INFO|" + + "WARN(?:ING)?|ERROR)\\)?"); - private Pattern lmt_p = Pattern.compile("\\(?(TRACE|DEBUG|INFO|" - + "WARN(?:ING)?|ERROR)\\)?"); public ExternalProcessLogger(Process process, String command) { this.process = process; From e0d1269878a03de692a332121e1c098080a0316d Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 9 Jul 2018 10:48:51 +0000 Subject: [PATCH 15/22] kafka: add functionality to handle `(==)` in ExternalProcessor logs. --- .../dkb/kafka/common/external/ExternalProcessLogger.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Utils/Dataflow/000_kafka/ru/kiae/dkb/kafka/common/external/ExternalProcessLogger.java b/Utils/Dataflow/000_kafka/ru/kiae/dkb/kafka/common/external/ExternalProcessLogger.java index 2f948a3ec..a6a792464 100644 --- a/Utils/Dataflow/000_kafka/ru/kiae/dkb/kafka/common/external/ExternalProcessLogger.java +++ b/Utils/Dataflow/000_kafka/ru/kiae/dkb/kafka/common/external/ExternalProcessLogger.java @@ -28,8 +28,9 @@ public class ExternalProcessLogger implements Runnable { private BufferedReader STDERR; private Pattern lmt_p = Pattern.compile("\\(?(TRACE|DEBUG|INFO|" - + "WARN(?:ING)?|ERROR)\\)?"); + + "WARN(?:ING)?|ERROR|==)\\)?"); + private String prev_type = ""; public ExternalProcessLogger(Process process, String command) { this.process = process; @@ -55,6 +56,9 @@ private void external_log(String line) { line = line.replaceFirst("\\(?" + type + "\\)?", ""); } line = "(" + this.command + ")" + line; + if (type == "==") { + type = prev_type; + } switch (type) { case "TRACE": log.trace(line); @@ -75,6 +79,6 @@ private void external_log(String line) { default: log.trace(line); } - + prev_type = type; } } From cc8ca3d6b2d00407d724ad8b3303b481a169dc14 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 9 Jul 2018 10:52:12 +0000 Subject: [PATCH 16/22] kafka: remove excessive timestamp from external process log messages. --- .../kiae/dkb/kafka/common/external/ExternalProcessLogger.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/000_kafka/ru/kiae/dkb/kafka/common/external/ExternalProcessLogger.java b/Utils/Dataflow/000_kafka/ru/kiae/dkb/kafka/common/external/ExternalProcessLogger.java index a6a792464..bdd119e1c 100644 --- a/Utils/Dataflow/000_kafka/ru/kiae/dkb/kafka/common/external/ExternalProcessLogger.java +++ b/Utils/Dataflow/000_kafka/ru/kiae/dkb/kafka/common/external/ExternalProcessLogger.java @@ -53,7 +53,7 @@ private void external_log(String line) { String type = "TRACE"; if (m.lookingAt()) { type = m.group(1); - line = line.replaceFirst("\\(?" + type + "\\)?", ""); + line = line.replaceFirst("^(.*)?\\(?" + type + "\\)?", ""); } line = "(" + this.command + ")" + line; if (type == "==") { From b64700a2ad884131fe2f72a622a85b241bf2106a Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 9 Jul 2018 14:58:54 +0300 Subject: [PATCH 17/22] pyDKB/logging: Python 2.6, logging.__version__ < 0.5.x.x. In Python 2.6 available by default version of `logging` module is 0.4.x.x. In this version: * `Logger` and `Formatter` are old-style classes, so `super()` doesn't work (so `object` is added as a second parent class to our classes, to make then new-style)[1]; * no `Manager.setLoggerClass()` (so we have to set logger class or the whole module)[2]; * no `Handler.set|get_name()` (so we have to set/get `_name` attribute manually); * no `Logger.getChild()` (so for new logger we just directly ask `manager` of our `root` logger); [1] As original class was old-style, we can not set manually `__class__` to a new-style class, we have to re-implement `RootLogger` functionality from `logging` module and create logger hierarchy from scratch (not as a sub-hierarchy under `logging.root`). In fact, it feels a little better than creating sub-hierarchy and isolate it from the `logging.root` (except that, again, we have to re-implement the `RootLogger` class). [2] As we use module-wide settings, any application which uses pyDKB, importing `logging` module, has same settings -- and if it changes default Logger class, it can affect pyDKB. However as loggers are initialized right when the `pyDKB` is imported, their behaviour will be left unchanged, and it is good. --- Utils/Dataflow/pyDKB/common/_logging.py | 52 +++++++++++++++++-------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/_logging.py b/Utils/Dataflow/pyDKB/common/_logging.py index 7e7bc626d..ef79a02a4 100644 --- a/Utils/Dataflow/pyDKB/common/_logging.py +++ b/Utils/Dataflow/pyDKB/common/_logging.py @@ -8,6 +8,8 @@ from logging import CRITICAL, FATAL, ERROR, WARNING, WARN, INFO, DEBUG, NOTSET import sys +__logging_version = int(logging.__version__.split('.')[1]) + # --------------------------------------------------- # Module variables # --------------------------------------------------- @@ -28,7 +30,7 @@ # ------------------------------------------- -class Logger(logging.Logger): +class Logger(logging.Logger, object): """ Logger implementation, aware of 'TRACE' log level. New methods: @@ -59,7 +61,14 @@ def traceback(self, *args, **kwargs): self.debug('Traceback info:', *args, **kwargs) -class MultilineFormatter(logging.Formatter): +class RootLogger(Logger): + """ Same as Logger, but must must have `Level` and be the only one. """ + def __init__(self, level, name='root'): + """ Initialize new root logger. """ + Logger.__init__(self, name, level) + + +class MultilineFormatter(logging.Formatter, object): """ Formatter for multiline messages. Every extra line (directly in the message body, or the traceback) @@ -175,16 +184,16 @@ def initRootLogger(**kwargs): name = kwargs.get('name') if not name: name = __name__.split('.')[0] - root = logging.getLogger(name) - # Cast new root to our custom class - root.__class__ = Logger - # Separate new 'root' from logging.root (to avoid possible interfere - # to/from any other usage of logging module): - # 1) prevent log record propagation to parent Handlers, if any; - root.propagate = 0 - # 2) create new manager (object, responsible for new loggers creation); + # Create new root logger + root = RootLogger(WARNING, name) + # Set Logger class 'root' to the new root + Logger.root = root + # Create new manager (object, responsible for new loggers creation) manager = logging.Manager(root) - manager.setLoggerClass(Logger) + if __logging_version < 5: + logging.setLoggerClass(Logger) + else: + manager.setLoggerClass(Logger) Logger.manager = manager root.manager = Logger.manager # 3) reset root logger for our class. @@ -204,7 +213,10 @@ def initRootLogger(**kwargs): hdlr_name = "stream_%s" % stream.fileno() hdlr = logging.StreamHandler(stream) - hdlr.set_name(hdlr_name) + if __logging_version < 5: + hdlr._name = hdlr_name + else: + hdlr.set_name(hdlr_name) fs = kwargs.get('msg_format', logging.BASIC_FORMAT) dfs = kwargs.get('datefmt', None) @@ -252,13 +264,21 @@ def configureRootLogger(**kwargs): else: hdlr = old_hdlr - if not hdlr.get_name(): - hdlr.set_name(hdlr_name) + if __logging_version < 5: + if not getattr(hdlr, '_name', None): + hdlr._name = hdlr_name + else: + if not hdlr.get_name(): + hdlr.set_name(hdlr_name) # Remove old handler or go on with it instead of the newly created one # (if it is configured just the same way). if old_hdlr != hdlr: - if old_hdlr.get_name() != hdlr.get_name(): + if __logging_version < 5 \ + and (getattr(old_hdlr, '_name', None) + != getattr(hdlr, '_name', '')) \ + or __logging_version >= 5 \ + and old_hdlr.get_name() != hdlr.get_name(): old_hdlr.close() root.removeHandler(old_hdlr) root.addHandler(hdlr) @@ -296,4 +316,4 @@ def getLogger(name): root = _rootLogger if name == root.name: return root - return root.getChild(name) + return root.manager.getLogger(name) From 2fa44737207612058c76c0fd0b902a99ab905200 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 10 Jul 2018 12:58:25 +0300 Subject: [PATCH 18/22] pyDKB/logging: cleanup `*args` from `traceback()` method. `*args` are used in other log methods to expand some user-defined format sequences in the message string. As it supposed to be no message in `traceback()`, `*args` are also excessive. --- Utils/Dataflow/pyDKB/common/_logging.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/_logging.py b/Utils/Dataflow/pyDKB/common/_logging.py index ef79a02a4..1324322ca 100644 --- a/Utils/Dataflow/pyDKB/common/_logging.py +++ b/Utils/Dataflow/pyDKB/common/_logging.py @@ -50,7 +50,7 @@ def trace(self, msg, *args, **kwargs): if self.isEnabledFor(TRACE): self._log(TRACE, msg, args, **kwargs) - def traceback(self, *args, **kwargs): + def traceback(self, **kwargs): """ Log traceback without additionat messages with severity 'DEBUG'. logger.traceback() @@ -58,7 +58,7 @@ def traceback(self, *args, **kwargs): if self.isEnabledFor(DEBUG): if not (kwargs.get('exc_info')): kwargs['exc_info'] = 1 - self.debug('Traceback info:', *args, **kwargs) + self.debug('Traceback info:', **kwargs) class RootLogger(Logger): From 0d5f5b94a2cf18db7530e7bb04734ab0ae4b816c Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 10 Jul 2018 16:06:04 +0300 Subject: [PATCH 19/22] pyDKB/logging: change format suffix handling. Instead of determinig the suffix every now and then from the format, we can store it in object attribute variable (`_suffix`) and reuse wherever it is needed. As `Formatter` does not support format change on the fly, it is safe enough (suffix won't last longer than the format it was taken from). --- Utils/Dataflow/pyDKB/common/_logging.py | 31 ++++++++++++++++++------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/_logging.py b/Utils/Dataflow/pyDKB/common/_logging.py index 1324322ca..1e2da59ca 100644 --- a/Utils/Dataflow/pyDKB/common/_logging.py +++ b/Utils/Dataflow/pyDKB/common/_logging.py @@ -78,14 +78,29 @@ class MultilineFormatter(logging.Formatter, object): '%(message)s' part. """ - def getSuffix(self): - """ Return format suffix (everything that goes after msg body). """ - suffix = '' - fmt = self._fmt + _suffix = None + + def __init__(self, *args): + """ Split format string into message format and suffix. """ + new_args = list(args) + if len(args): + fmt = args[0] + new_args[0], self._suffix = self.splitFormat(fmt) + super(MultilineFormatter, self).__init__(*new_args) + + def splitFormat(self, fmt): + """ Split format string into msg format and suffix. + + Suffix is everything that goes after the message body. + """ + format, suffix = ('', '') splitted = fmt.split("%(message)s") if len(splitted) > 1: + format = "%(message)s".join(splitted[:-1]) + "%(message)s" suffix = splitted[-1] - return suffix + else: + format = fmt + return format, suffix def formatExtra(self, lines, suffix=None, prefix=" (==) ", align=False): """ Format extra lines of the log message (traceback, ...). @@ -95,7 +110,7 @@ def formatExtra(self, lines, suffix=None, prefix=" (==) ", align=False): log messages). """ if suffix is None: - suffix = self.getSuffix() + suffix = self._suffix if isinstance(lines, list) and len(lines): max_len = len(max(lines, key=len)) if align: @@ -134,10 +149,10 @@ def format(self, record): record.msg = msg formatted = super(MultilineFormatter, self).format(record) lines = formatted.splitlines() - msg = lines[0] if lines else '' + msg = (lines[0] + self._suffix) if lines else '' if len(lines) > 1: extra += '\n' - # Need to expand suffixes of extra lines (missed parent`s + # Need to expand suffixes (as they are added after parent`s # 'format()' operation). result = (msg + extra + '\n'.join(lines[1:])) % record.__dict__ return result From c7df13288e83c31ef8f8527c6599301de3cd6df1 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 10 Jul 2018 16:15:52 +0300 Subject: [PATCH 20/22] pyDKB/logging: simplify extra line formatting. Instead of determining suffix every now and again, we can use string formatting and extend every line content to the length of the longest line. Then we simply add suffix to the end of this content. --- Utils/Dataflow/pyDKB/common/_logging.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/_logging.py b/Utils/Dataflow/pyDKB/common/_logging.py index 1e2da59ca..9adeb695b 100644 --- a/Utils/Dataflow/pyDKB/common/_logging.py +++ b/Utils/Dataflow/pyDKB/common/_logging.py @@ -114,16 +114,12 @@ def formatExtra(self, lines, suffix=None, prefix=" (==) ", align=False): if isinstance(lines, list) and len(lines): max_len = len(max(lines, key=len)) if align: - suff = ' ' * (max_len - len(lines[0])) + suffix + line_fmt = "%%(line)-%ds" % max_len else: - suff = suffix - extra = prefix + lines[0] + suff + line_fmt = "%(line)s" + extra = prefix + line_fmt % {'line': lines[0]} + suffix for line in lines[1:]: - if align: - suff = ' ' * (max_len - len(line)) + suffix - else: - suff = suffix - extra += "\n" + prefix + line + suff + extra += "\n" + prefix + line_fmt % {'line': line} + suffix else: extra = "" return extra From 890a41da6b42b9f544ee2dc32a499875cb4e199e Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 10 Jul 2018 16:26:51 +0300 Subject: [PATCH 21/22] pyDKB/logging: improve extra line formatting function. We do not need to align lines when there's no suffix at all. --- Utils/Dataflow/pyDKB/common/_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/common/_logging.py b/Utils/Dataflow/pyDKB/common/_logging.py index 9adeb695b..8abb372a5 100644 --- a/Utils/Dataflow/pyDKB/common/_logging.py +++ b/Utils/Dataflow/pyDKB/common/_logging.py @@ -113,7 +113,7 @@ def formatExtra(self, lines, suffix=None, prefix=" (==) ", align=False): suffix = self._suffix if isinstance(lines, list) and len(lines): max_len = len(max(lines, key=len)) - if align: + if suffix and align: line_fmt = "%%(line)-%ds" % max_len else: line_fmt = "%(line)s" From 4880b651c8f5f0c4dcb7af624ef6a696d15857ef Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 10 Jul 2018 16:51:13 +0300 Subject: [PATCH 22/22] pyDKB/logging: fix issue with multiline log message with arguments... ...and simplify logic of multiline formatting. Previously multiline message with arguments would fail with error: ``` >>> pyDKB.logger.error("%(line1)s\n%(line2)s", {"line1": "First line", ... "line2": "Second line"}) Traceback (most recent call last): File "/usr/lib/python2.6/site-packages/logging/__init__.py", line 723, in emit msg = self.format(record) File "/usr/lib/python2.6/site-packages/logging/__init__.py", line 609, in format return fmt.format(record) File "/home/mgolosova/dkb/Utils/Dataflow/pyDKB/common/_logging.py", line 162, in format result = (msg + extra + '\n'.join(lines[1:])) % record.__dict__ KeyError: 'line2' ``` Now the whole message goes to the `logger.Formatter.format()` function, thus message with argumets works perfectly fine. Then we format the suffix (as it is definitely the same for every line), and then `formatExtra()` just adds prefixes for extra lines and suffixes to all lines. It also saves from specific `formatException()` method, called from `logger.Formatter.format()`, as we do not need to think about prefixes and suffixes before the message is fully formatted and appended with traceback info. It also affects alignment: now suffix is aligned for all lines, not only traceback ones. --- Utils/Dataflow/pyDKB/common/_logging.py | 36 +++++++++---------------- 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/_logging.py b/Utils/Dataflow/pyDKB/common/_logging.py index 8abb372a5..240fce093 100644 --- a/Utils/Dataflow/pyDKB/common/_logging.py +++ b/Utils/Dataflow/pyDKB/common/_logging.py @@ -102,55 +102,43 @@ def splitFormat(self, fmt): format = fmt return format, suffix - def formatExtra(self, lines, suffix=None, prefix=" (==) ", align=False): + def formatSuffix(self, record): + """ Return formatted suffix. """ + return self._suffix % record.__dict__ + + def formatExtra(self, lines, prefix=" (==) ", suffix='', align=False): """ Format extra lines of the log message (traceback, ...). Parameter 'align' shows whether the suffix should be aligned to the right (by the longest line), or to the left (as for normal log messages). """ - if suffix is None: - suffix = self._suffix if isinstance(lines, list) and len(lines): max_len = len(max(lines, key=len)) + # Need to add prefix len (in case that the longest line is + # among those that will be prefixed). + max_len += len(prefix) if suffix and align: line_fmt = "%%(line)-%ds" % max_len else: line_fmt = "%(line)s" - extra = prefix + line_fmt % {'line': lines[0]} + suffix + extra = line_fmt % {'line': lines[0]} + suffix for line in lines[1:]: - extra += "\n" + prefix + line_fmt % {'line': line} + suffix + extra += "\n" + line_fmt % {'line': prefix + line} + suffix else: extra = "" return extra - def formatException(self, ei): - """ Format traceback as extra lines. """ - s = super(MultilineFormatter, self).formatException(ei) - lines = s.splitlines() - exc_text = self.formatExtra(lines, align=True) - return exc_text - def format(self, record): """ Format multiline message. Second and further lines from initial message are formatted 'extra' lines. """ - lines = record.msg.splitlines() - msg = lines[0] if lines else '' - extra = self.formatExtra(lines[1:]) - if extra and extra[:1] != '\n': - extra = '\n' + extra - record.msg = msg formatted = super(MultilineFormatter, self).format(record) lines = formatted.splitlines() - msg = (lines[0] + self._suffix) if lines else '' - if len(lines) > 1: - extra += '\n' - # Need to expand suffixes (as they are added after parent`s - # 'format()' operation). - result = (msg + extra + '\n'.join(lines[1:])) % record.__dict__ + suffix = self.formatSuffix(record) + result = self.formatExtra(lines, suffix=suffix, align=True) return result