"""Pandas DataFrame manipulation."""
import re
import pandas as pd
from . import cleaning


def match_columns(trajectory, streams, keep_time=True):
    """Find columns of a dataframe that match regular expressions.

    Parameters
    ----------
    trajectory : DataFrame
        A collection of time series data

    streams : list of str
        Regular expressions that will be used to find matching streams in
        the columns of 'trajectory'. If None is given, we take all streams.

    keep_time : bool, optional (default: True)
        Keep the column with the name 'Time'

    Returns
    -------
    columns : list of str
        Columns that match given regular expressions (+ 'Time'). Columns are
        ordered first by given stream order and then by the dataframe's order
        of columns.
    """
    if streams is None:
        columns = list(trajectory.columns)
        columns.remove("Time")
    else:
        streams_re = [re.compile(s) for s in streams]
        columns = []
        for sre in streams_re:
            for c in trajectory.columns:
                if c not in columns and sre.match(c):
                    columns.append(c)

    if len(columns) == 0:
        raise ValueError(
            "No streams match the given patterns: %s.\n"
            "Available streams are: %s"
            % (", ".join(streams), ", ".join(trajectory.columns)))

    if keep_time and "Time" in trajectory:
        columns.append("Time")

    return columns


def rename_stream_groups(trajectory, streams, inplace=False):
    """Rename a group of columns that matches a pattern.

    Parameters
    ----------
    trajectory : DataFrame
        A collection of time series data

    streams : dict mapping from str to str
        The key of the dict is a regular expressions that will be used to find
        a group of matching streams in the columns of 'trajectories'. The value
        is the new name of the stream group. Note that the part that should
        be replaced has to be enclosed in brackets `( )`.

    inplace : bool, optional (default: False)
        Whether to return a new DataFrame

    Returns
    -------
    trajectory : DataFrame
        A collection of time series data with renamed columns
    """
    if inplace:
        result = trajectory
    else:
        result = trajectory.copy()
    for pattern, new_group_name in streams.items():
        for col in trajectory.columns:
            match = re.search(pattern, col)
            if match:
                old_group_name = match.group(1)
                new_col = col.replace(old_group_name, new_group_name)
                result.rename(columns={col: new_col}, inplace=True)
    return result


def extract_segment(trajectory, streams, start_index, end_index,
                    keep_time=True):
    """Extract segment of given streams.

    Parameters
    ----------
    trajectory : DataFrame
        A collection of time series data

    streams : list of str
        Regular expressions that will be used to find matching streams in
        the columns of 'trajectory'

    start_index : int
        Index at which the segment starts

    end_index : int
        Index at which the segment ends

    keep_time : bool, optional (default: True)
        Keep the column with the name 'Time'

    Returns
    -------
    segment : DataFrame
        Extracted segment. A collection of time series data with streams
        'streams'.
    """
    columns = match_columns(trajectory, streams, keep_time)
    return trajectory[columns].iloc[start_index:end_index]


def extract_markers(trajectory, markers, keep_time=True):
    """Extract 3D marker streams (specific for Qualisys streams).

    Parameters
    ----------
    trajectory : DataFrame
        A collection of time series data

    markers : list of str
        Name of the Qualisys markers that will be used to find matching
        streams in the columns of 'trajectory'. We assume that each
        marker has three associated streams with ' X', ' Y', and ' Z' at
        the end of their names respectively.

    keep_time : bool, optional (default: True)
        Keep the column with the name 'Time'

    Returns
    -------
    trajectory : DataFrame
        A collection of time series data with only the given markers
    """
    columns = [m for m in trajectory.columns if m[:-2] in markers]
    if keep_time:
        columns = ["Time"] + columns
    return trajectory[columns]


def get_all_markers(trajectory):
    """Get names of 3D markers (specific for Qualisys streams).

    Parameters
    ----------
    trajectory : DataFrame
        A collection of time series data

    Returns
    -------
    markers : list of str
        Name of the Qualisys markers. We assume that each marker has three
        associated streams with ' X', ' Y', and ' Z' at the end of their
        names respectively.
    """
    return [c[:-2] for c in trajectory.columns if c.endswith(" X")]


def concat_trajectories(trajectories):
    """Concatenate trajectories.

    This might be useful to perform pandas operations on multiple
    trajectories at once.

    Parameters
    ----------
    trajectories : list of DataFrame
        A list of collections of time series data

    Returns
    -------
    trajectories : DataFrame
        Resulting concatenation of trajectories
    """
    return pd.concat(trajectories, keys=range(len(trajectories)))


def merge_trajectories(trajectories, start_time="first", end_time="last"):
    """Merge multiple time series to one.

    Performs a full outer joint on the 'Time' stream. Resulting NaN cells
    will be interpolated.

    Parameters
    ----------
    trajectories : list of pandas.DataFrame
        Time series data, will be joined on 'Time' stream

    start_time : str, optional (default: 'first')
        Start time will be either the first time step of all trajectories
        ('first') or the first time step of the trajectory that starts
        latest ('last').

    end_time : str, optional (default: 'last')
        End time will be either the last time step of all trajectories ('last')
        or the last time step of the trajectory that ends first.

    Returns
    -------
    trajectory : pandas.DataFrame
        Merged time series
    """
    df = trajectories[0]
    for trajectory in trajectories[1:]:
        df = pd.merge_ordered(
            left=df, right=trajectory, how="outer", left_on="Time",
            right_on="Time")

    start_times = [trajectory["Time"].iloc[0] for trajectory in trajectories]
    if start_time == "first":
        threshold = min(start_times)
    elif start_time == "last":
        threshold = max(start_times)
    else:
        raise ValueError("Start time must be 'first' or 'last'.")
    df.drop(df[df["Time"] < threshold].index, inplace=True)

    end_times = [trajectory["Time"].iloc[-1] for trajectory in trajectories]
    if end_time == "first":
        threshold = min(end_times)
    elif end_time == "last":
        threshold = max(end_times)
    else:
        raise ValueError("End time must be 'first' or 'last'.")
    df.drop(df[df["Time"] > threshold].index, inplace=True)

    df = cleaning.interpolate_nan(df)
    return df
