Top

pyMez.Code.Analysis.GeneralAnalysis module

GeneralAnalysis is a module containing classes and functions for general analytic tasks.

Examples

Requirements

Help

pyMez.Code.Analysis

Documentation Home | API Documentation Home | Examples Home | Index

#-----------------------------------------------------------------------------
# Name:        GeneralAnalysis
# Purpose:    To hold functions and classes for general analysis tasks
# Author:      Aric Sanders
# Created:     7/27/2018
# License:     MIT License
#-----------------------------------------------------------------------------
""" GeneralAnalysis is a module containing classes and functions for general analytic tasks.

 Examples
--------


Requirements
------------
+ [sys](https://docs.python.org/2/library/sys.html)
+ [os](https://docs.python.org/2/library/os.html)
+ [re](https://docs.python.org/2/library/re.html)
+ [types](https://docs.python.org/2/library/types.html)
+ [numpy](https://docs.scipy.org/doc/)
+ [scipy](https://docs.scipy.org/doc/)
+ [sympy](http://www.sympy.org/en/index.html)

Help
---------------
<a href="./index.html">`pyMez.Code.Analysis`</a>
<div>
<a href="../../../pyMez_Documentation.html">Documentation Home</a> |
<a href="../../index.html">API Documentation Home</a> |
<a href="../../../Examples/html/Examples_Home.html">Examples Home</a> |
<a href="../../../Reference_Index.html">Index</a>
</div>
"""
#-----------------------------------------------------------------------------
# Standard Imports
import os
import re
import datetime
import sys
import cmath
import math
#-----------------------------------------------------------------------------
# Third Party Imports
sys.path.append(os.path.join(os.path.dirname( __file__ ), '..','..'))
try:
    import numpy as np
except:
    np.ndarray='np.ndarray'
    print("Numpy was not imported")
    pass
try:
    import pandas
except:
    print("Pandas was not imported")
    pass
try:
    from scipy.stats.mstats import gmean
except:
    print("The function gmean from the package scipy.stats.mstats did not import correctly ")
try:
    from statsmodels.robust.scale import mad
except:
    print("The function mad from the package statsmodels.robust.scale did not import correctly ")
try:
    #Todo: this could lead to a cyclic dependency, it really should import only the models it analyzes
    #Todo: If analysis is to be in the top import, none of the models should rely on it
    #import pyMez.Code.DataHandlers.NISTModels
    from Code.DataHandlers.NISTModels import *
    from Code.DataHandlers.GeneralModels import *
    from Code.DataHandlers.Translations import *
    #from pyMez import *
except:
    print("The subpackage pyMez.Code.DataHandlers did not import properly,"
          "please check that it is on the python path and that unit tests passed")
    raise
try:
    import matplotlib.pyplot as plt
except:
    print("The module matplotlib was not found,"
          "please put it on the python path")
#-----------------------------------------------------------------------------
# Module Constants

#-----------------------------------------------------------------------------
# Module Functions
def independent_variable_model_collapse(model,independent_column_name="Frequency", **options):
    """Returns a model with a single set of independent variables. Default is to average values together
    but geometric mean, std, variance, rss, mad and median are options.
    Geometric means of odd number of negative values fails"""
    if isinstance(model,pandas.DataFrame):
        model_1 = DataFrame_to_AsciiDataTable(model)
    defaults = {"method": "mean"}
    # load other options from model
    for option, value in model.options.items():
        if not re.search('begin_line|end_line', option):
            defaults[option] = value
    for element in model.elements:
        if model.__dict__[element]:
            if re.search("meta", element, re.IGNORECASE):
                defaults["metadata"] = model.metadata.copy()
            else:
                defaults[element] = model.__dict__[element][:]
    # We need to preserve the frequency column some how
    collapse_options = {}
    for key, value in defaults.items():
        collapse_options[key] = value
    for key, value in options.items():
        collapse_options[key] = value
    unique_independent_variable_list = sorted(list(set(model[independent_column_name])))
    independent_variable_selector = model.column_names.index(independent_column_name)
    out_data = []
    for index, independent_variable in enumerate(unique_independent_variable_list):
        data_row = [x for x in model.data[:] if x[independent_variable_selector] == independent_variable]
        if re.search('mean|av', collapse_options["method"], re.IGNORECASE):
            new_row = np.mean(np.array(data_row), axis=0).tolist()
        elif re.search('median', collapse_options["method"], re.IGNORECASE):
            new_row = np.median(np.array(data_row), axis=0).tolist()
        elif re.search('geometric', collapse_options["method"], re.IGNORECASE):
            new_row = gmean(np.array(data_row), axis=0).tolist()
        elif re.search('st', collapse_options["method"], re.IGNORECASE):
            new_row = np.std(np.array(data_row), axis=0).tolist()
        elif re.search('var', collapse_options["method"], re.IGNORECASE):
            new_row = np.var(np.array(data_row), axis=0, dtype=np.float64).tolist()
        elif re.search('rms', collapse_options["method"], re.IGNORECASE):
            new_row = np.sqrt(np.mean(np.square(np.array(data_row)), axis=0, dtype=np.float64)).tolist()
        elif re.search('rss', collapse_options["method"], re.IGNORECASE):
            new_row = np.sqrt(np.sum(np.square(np.array(data_row)), axis=0, dtype=np.float64)).tolist()
        elif re.search('mad', collapse_options["method"], re.IGNORECASE):
            new_row = mad(np.array(data_row), axis=0).tolist()
        new_row[independent_variable_selector]=independent_variable
        out_data.append(new_row)

    collapse_options["data"] = out_data

    if collapse_options["specific_descriptor"]:
        collapse_options["specific_descriptor"] = collapse_options["method"] + "_" + \
                                                  collapse_options["specific_descriptor"]
    resulting_model = AsciiDataTable(None, **collapse_options)
    return resulting_model


def independent_variable_model_difference(model_1, model_2, independent_column_name="Frequency", **options):
    """Takes the difference of two models that both have frequency and a similar set of columns. Returns an object that is
    a list of [[independent_variable,column_1,..column_n],...] where columns are the same in the models. If  a particular subset of
    columns is desired use columns=[independent_variable,column_name_1,..column_name_n] models
    can be any subclass of AsciiDataTable, SNP, or
    pandas.DataFrame, if a column is a non-numeric type it drops it. """
    # Set up defaults and pass options
    defaults = {"columns": "all", "interpolate": False, "average": True}
    difference_options = {}
    for key, value in defaults.items():
        difference_options[key] = value
    for key, value in options.items():
        difference_options[key] = value

    # first check type, if it is a panadas data frame a little conversion is needed, else is for all other models
    if isinstance(model_1,pandas.DataFrame):
        model_1 = DataFrame_to_AsciiDataTable(model_1)
    if isinstance(model_2,pandas.DataFrame):
        model_2 = DataFrame_to_AsciiDataTable(model_2)
    # now start with a set of frequencies (unique values from both)
    independent_variable_set_1 = set(model_1[independent_column_name])
    independent_variable_set_2 = set(model_2[independent_column_name])
    model_2_independent_variable_selector = model_2.column_names.index(independent_column_name)
    column_names_set_1 = set(model_1.column_names)
    column_names_set_2 = set(model_2.column_names)

    # All points must be in the intersection to be used
    independent_variable_intersection = list(independent_variable_set_1.intersection(independent_variable_set_2))
    column_names_intersection = list(column_names_set_1.intersection(column_names_set_2))

    if not independent_variable_intersection:
        print(("The models do not have any {0} points in common".format(independent_column_name)))
        return None
    new_column_names = [independent_column_name]
    column_types = ['float']
    for column_index, column in enumerate(model_1.column_names):
        if column in column_names_intersection and column not in [independent_column_name]:
            new_column_names.append(column)
            column_types.append(model_1.options["column_types"][column_index])

    difference_data = []
    for row_index, independent_variable in enumerate(model_1[independent_column_name]):
        new_row = [independent_variable]
        if independent_variable in independent_variable_intersection:
            model_2_independent_variable_row = \
            filter(lambda x: x[model_2_independent_variable_selector] == independent_variable,
                   model_2.data)[0]

            for column_index, column in enumerate(model_1.column_names):
                if column in column_names_intersection and column not in [independent_column_name]:
                    model_2_column_selector = model_2.column_names.index(column)
                    if re.search('int|float',
                                 model_1.options["column_types"][column_index],
                                 re.IGNORECASE) and re.search('int|float',
                                                              model_2.options["column_types"][model_2_column_selector],
                                                              re.IGNORECASE):

                        new_row.append(
                            model_1.data[row_index][column_index] - model_2_independent_variable_row[
                                model_2_column_selector])

                        # Print("New Column Names are {0}".format(new_column_names))
                    elif difference_options["columns"] in ["all"]:
                        new_row.append(model_1.data[row_index][column_index])
            difference_data.append(new_row)
    difference_options["column_names"] = new_column_names
    # print("New Column Names are {0}".format(new_column_names))
    difference_options["data"] = difference_data
    difference_options["column_types"] = column_types
    # print column_types
    result = AsciiDataTable(None, **difference_options)
    return result
#-----------------------------------------------------------------------------
# Module Classes

#-----------------------------------------------------------------------------
# Module Scripts

#-----------------------------------------------------------------------------
# Module Runner
if __name__ == '__main__':
    pass
    

Functions

def independent_variable_model_collapse(

model, independent_column_name='Frequency', **options)

Returns a model with a single set of independent variables. Default is to average values together but geometric mean, std, variance, rss, mad and median are options. Geometric means of odd number of negative values fails

def independent_variable_model_collapse(model,independent_column_name="Frequency", **options):
    """Returns a model with a single set of independent variables. Default is to average values together
    but geometric mean, std, variance, rss, mad and median are options.
    Geometric means of odd number of negative values fails"""
    if isinstance(model,pandas.DataFrame):
        model_1 = DataFrame_to_AsciiDataTable(model)
    defaults = {"method": "mean"}
    # load other options from model
    for option, value in model.options.items():
        if not re.search('begin_line|end_line', option):
            defaults[option] = value
    for element in model.elements:
        if model.__dict__[element]:
            if re.search("meta", element, re.IGNORECASE):
                defaults["metadata"] = model.metadata.copy()
            else:
                defaults[element] = model.__dict__[element][:]
    # We need to preserve the frequency column some how
    collapse_options = {}
    for key, value in defaults.items():
        collapse_options[key] = value
    for key, value in options.items():
        collapse_options[key] = value
    unique_independent_variable_list = sorted(list(set(model[independent_column_name])))
    independent_variable_selector = model.column_names.index(independent_column_name)
    out_data = []
    for index, independent_variable in enumerate(unique_independent_variable_list):
        data_row = [x for x in model.data[:] if x[independent_variable_selector] == independent_variable]
        if re.search('mean|av', collapse_options["method"], re.IGNORECASE):
            new_row = np.mean(np.array(data_row), axis=0).tolist()
        elif re.search('median', collapse_options["method"], re.IGNORECASE):
            new_row = np.median(np.array(data_row), axis=0).tolist()
        elif re.search('geometric', collapse_options["method"], re.IGNORECASE):
            new_row = gmean(np.array(data_row), axis=0).tolist()
        elif re.search('st', collapse_options["method"], re.IGNORECASE):
            new_row = np.std(np.array(data_row), axis=0).tolist()
        elif re.search('var', collapse_options["method"], re.IGNORECASE):
            new_row = np.var(np.array(data_row), axis=0, dtype=np.float64).tolist()
        elif re.search('rms', collapse_options["method"], re.IGNORECASE):
            new_row = np.sqrt(np.mean(np.square(np.array(data_row)), axis=0, dtype=np.float64)).tolist()
        elif re.search('rss', collapse_options["method"], re.IGNORECASE):
            new_row = np.sqrt(np.sum(np.square(np.array(data_row)), axis=0, dtype=np.float64)).tolist()
        elif re.search('mad', collapse_options["method"], re.IGNORECASE):
            new_row = mad(np.array(data_row), axis=0).tolist()
        new_row[independent_variable_selector]=independent_variable
        out_data.append(new_row)

    collapse_options["data"] = out_data

    if collapse_options["specific_descriptor"]:
        collapse_options["specific_descriptor"] = collapse_options["method"] + "_" + \
                                                  collapse_options["specific_descriptor"]
    resulting_model = AsciiDataTable(None, **collapse_options)
    return resulting_model

def independent_variable_model_difference(

model_1, model_2, independent_column_name='Frequency', **options)

Takes the difference of two models that both have frequency and a similar set of columns. Returns an object that is a list of [[independent_variable,column_1,..column_n],...] where columns are the same in the models. If a particular subset of columns is desired use columns=[independent_variable,column_name_1,..column_name_n] models can be any subclass of AsciiDataTable, SNP, or pandas.DataFrame, if a column is a non-numeric type it drops it.

def independent_variable_model_difference(model_1, model_2, independent_column_name="Frequency", **options):
    """Takes the difference of two models that both have frequency and a similar set of columns. Returns an object that is
    a list of [[independent_variable,column_1,..column_n],...] where columns are the same in the models. If  a particular subset of
    columns is desired use columns=[independent_variable,column_name_1,..column_name_n] models
    can be any subclass of AsciiDataTable, SNP, or
    pandas.DataFrame, if a column is a non-numeric type it drops it. """
    # Set up defaults and pass options
    defaults = {"columns": "all", "interpolate": False, "average": True}
    difference_options = {}
    for key, value in defaults.items():
        difference_options[key] = value
    for key, value in options.items():
        difference_options[key] = value

    # first check type, if it is a panadas data frame a little conversion is needed, else is for all other models
    if isinstance(model_1,pandas.DataFrame):
        model_1 = DataFrame_to_AsciiDataTable(model_1)
    if isinstance(model_2,pandas.DataFrame):
        model_2 = DataFrame_to_AsciiDataTable(model_2)
    # now start with a set of frequencies (unique values from both)
    independent_variable_set_1 = set(model_1[independent_column_name])
    independent_variable_set_2 = set(model_2[independent_column_name])
    model_2_independent_variable_selector = model_2.column_names.index(independent_column_name)
    column_names_set_1 = set(model_1.column_names)
    column_names_set_2 = set(model_2.column_names)

    # All points must be in the intersection to be used
    independent_variable_intersection = list(independent_variable_set_1.intersection(independent_variable_set_2))
    column_names_intersection = list(column_names_set_1.intersection(column_names_set_2))

    if not independent_variable_intersection:
        print(("The models do not have any {0} points in common".format(independent_column_name)))
        return None
    new_column_names = [independent_column_name]
    column_types = ['float']
    for column_index, column in enumerate(model_1.column_names):
        if column in column_names_intersection and column not in [independent_column_name]:
            new_column_names.append(column)
            column_types.append(model_1.options["column_types"][column_index])

    difference_data = []
    for row_index, independent_variable in enumerate(model_1[independent_column_name]):
        new_row = [independent_variable]
        if independent_variable in independent_variable_intersection:
            model_2_independent_variable_row = \
            filter(lambda x: x[model_2_independent_variable_selector] == independent_variable,
                   model_2.data)[0]

            for column_index, column in enumerate(model_1.column_names):
                if column in column_names_intersection and column not in [independent_column_name]:
                    model_2_column_selector = model_2.column_names.index(column)
                    if re.search('int|float',
                                 model_1.options["column_types"][column_index],
                                 re.IGNORECASE) and re.search('int|float',
                                                              model_2.options["column_types"][model_2_column_selector],
                                                              re.IGNORECASE):

                        new_row.append(
                            model_1.data[row_index][column_index] - model_2_independent_variable_row[
                                model_2_column_selector])

                        # Print("New Column Names are {0}".format(new_column_names))
                    elif difference_options["columns"] in ["all"]:
                        new_row.append(model_1.data[row_index][column_index])
            difference_data.append(new_row)
    difference_options["column_names"] = new_column_names
    # print("New Column Names are {0}".format(new_column_names))
    difference_options["data"] = difference_data
    difference_options["column_types"] = column_types
    # print column_types
    result = AsciiDataTable(None, **difference_options)
    return result

Module variables

var COMBINE_S11_S22

var COMMENT_PATTERN

var CONVERT_S21

var DEFAULT_FILE_NAME

var DEFAULT_INSTRUMENT_SHEET_STYLE

var DEFAULT_INSTRUMENT_STATE_STYLE

var DEFAULT_LOG_STYLE

var DEFAULT_MEASUREMENT_STYLE

var DEFAULT_METADATA_STYLE

var DEFAULT_REGISTER_STYLE

var DEFAULT_STYLE

var DRIVER_FILE_EXTENSIONS

var DUT_COLUMN_NAMES

var EXIF_AVAILABLE

var EXTENSION_PATTERN

var FORMATS

var FREQUENCY_UNITS

var GENERAL_DESCRIPTORS

var GET_STATS_FIELDS

var HTML_TEMPLATE_DIRECTORY

var IMAGE_FILE_EXTENSIONS

var INKSCAPE_PATH

var INSTRUMENT_SHEETS

var METHOD_ALIASES

var MINIMUM_DB_ARG_VALUE

var MINIMUM_DB_VALUE

var NODE_TYPE_DICTIONARY

var NUMBER_MATCH_STRING

var ONE_PORT_COLUMN_NAMES

var OPTION_LINE_PATTERN

var OS_STAT_FIELDS

var PARAMETERS

var PDF_CONVERT

var PIL_AVAILABLE

var POWER_3TERM_COLUMN_DESCRIPTIONS

var POWER_3TERM_COLUMN_NAMES

var POWER_4TERM_COLUMN_DESCRIPTIONS

var POWER_4TERM_COLUMN_NAMES

var PYMEASURE_ROOT

var RESULTS_FILE_ONE_PORT_COLUMN_NAMES

var RESULTS_FILE_POWER_COLUMN_NAMES

var RESULTS_FILE_TWO_PORT_COLUMN_NAMES

var S1P_DB_COLUMN_NAMES

var S1P_MA_COLUMN_NAMES

var S1P_RI_COLUMN_NAMES

var S2P_COMPLEX_COLUMN_NAMES

var S2P_DB_COLUMN_DESCRIPTION

var S2P_DB_COLUMN_NAMES

var S2P_MA_COLUMN_DESCRIPTION

var S2P_MA_COLUMN_NAMES

var S2P_NOISE_PARAMETER_COLUMN_NAMES

var S2P_RI_COLUMN_DESCRIPTION

var S2P_RI_COLUMN_NAMES

var SMITHPLOT

var SOLUTION_VECTOR_COLUMN_NAMES

var StringTypes

var TESTS_DIRECTORY

var TOUCHSTONE_KEYWORDS

var TWELVE_TERM_ERROR_COLUMN_NAMES

var WINDOWS_COM

var WINDOWS_WRAPPER

var WKHTML_PATH

var XSLT_CAPABLE

var XSLT_REPOSITORY

var type_names