Source code for ego.tools.mv_cluster

# -*- coding: utf-8 -*-
# Copyright 2016-2018 Europa-Universität Flensburg,
# Flensburg University of Applied Sciences,
# Centre for Sustainable Energy Systems
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# File description
"""
This file contains all functions regarding the clustering of MV grids
"""
__copyright__ = ("Flensburg University of Applied Sciences, "
                 "Europa-Universität Flensburg, "
                 "Centre for Sustainable Energy Systems")
__license__ = "GNU Affero General Public License Version 3 (AGPL-3.0)"
__author__ = "wolf_bunke, maltesc"

# Import
#from __future__ import print_function
import os
import logging

if not 'READTHEDOCS' in os.environ:
    import pickle
    
    import pandas as pd
    
    from sklearn.cluster import KMeans
    import numpy as np
    
logger = logging.getLogger(__name__)

[docs]def analyze_attributes(ding0_files):
    """
    Calculates the attributes wind and solar capacity and farthest node
    for all files in ding0_files. Results are written to ding0_files
    
    Parameters
    ----------
    ding0_files : :obj:`str`
        Path to ding0 files
        
    """
    base_path = ding0_files

    not_found = []
    tccs = []  # Total Cumulative Capacity of Solar
    tccw = []  # Total Cumulative Capacity of Wind
    fnlvmv = []  # the Farthest Node in both networks (lv and mv)
    MV_id_list = []  # Distrct id list

    for district_number in list(range(1, 4000)):

        try:
            pickle_name = 'ding0_grids__{}.pkl'.format(
                district_number)  
            nd = pickle.load(open(os.path.join(base_path, pickle_name), 'rb'))
            print('District no.', district_number, 'found!')
        except:
            not_found.append(district_number)
            continue

        MV_id = 0
        MV_id = nd._mv_grid_districts[0].id_db

        mv_cum_solar_MV = 0  # Solar cumulative capacity in MV
        mv_cum_wind_MV = 0  # Solar cumulative capacity in MV

        # cumulative capacity of solar and wind in MV
        for geno in nd._mv_grid_districts[0].mv_grid.generators():
            if geno.type == 'solar':
                mv_cum_solar_MV += geno.capacity
            if geno.type == 'wind':
                mv_cum_wind_MV += geno.capacity

        lvg = 0
        mv_cum_solar_LV = 0
        mv_cum_wind_LV = 0

        # cumulative capacity of solar and wind in LV
        for lvgs in nd._mv_grid_districts[0].lv_load_areas():
            for lvgs1 in lvgs.lv_grid_districts():
                lvg += len(list(lvgs1.lv_grid.generators()))
                for deno in lvgs1.lv_grid.generators():
                    if deno.type == 'solar':
                        mv_cum_solar_LV += deno.capacity
                    if deno.type == 'wind':
                        mv_cum_wind_LV += deno.capacity

        # Total solar cumulative capacity in lv and mv
        total_cum_solar = mv_cum_solar_MV + mv_cum_solar_LV
        # Total wind cumulative capacity in lv and mv
        total_cum_wind = mv_cum_wind_MV + mv_cum_wind_LV

        # append to lists
        tccs.append(total_cum_solar)
        tccw.append(total_cum_wind)

        # The farthest node length from MV substation
        from ding0.core.network.stations import LVStationDing0

        tot_dist = []
        max_length = 0
        max_length_list = []
        max_of_max = 0

        # make CB open (normal operation case)
        nd.control_circuit_breakers(mode='open')
        # setting the root to measure the path from
        root_mv = nd._mv_grid_districts[0].mv_grid.station()
        # 1st from MV substation to LV station node
        # Iteration through nodes
        for node2 in nd._mv_grid_districts[0].mv_grid._graph.nodes():
            # select only LV station nodes
            if isinstance(
                    node2, 
                    LVStationDing0) and not node2.lv_load_area.is_aggregated:

                length_from_MV_to_LV_station = 0
                # Distance from MV substation to LV station node
                length_from_MV_to_LV_station = nd._mv_grid_districts[
                        0
                        ].mv_grid.graph_path_length(
                    node_source=node2, node_target=root_mv) / 1000

                # Iteration through lv load areas
                for lvgs in nd._mv_grid_districts[0].lv_load_areas():
                    for lvgs1 in lvgs.lv_grid_districts():  
                        if lvgs1.lv_grid._station == node2:
                            root_lv = node2  # setting a new root
                            for node1 in lvgs1.lv_grid._graph.nodes():  

                                length_from_LV_staion_to_LV_node = 0
                                
                                # Distance from LV station to LV nodes
                                length_from_LV_staion_to_LV_node = (
                                        lvgs1.lv_grid.graph_path_length(
                                    node_source=node1, 
                                    node_target=root_lv) / 1000)

                                length_from_LV_node_to_MV_substation = 0
                                
                                # total distances in both grids MV and LV
                                length_from_LV_node_to_MV_substation = (
                                        length_from_MV_to_LV_station 
                                        + length_from_LV_staion_to_LV_node)

                                # append the total distance to a list
                                tot_dist.append(
                                    length_from_LV_node_to_MV_substation)
                            if any(tot_dist):  
                                max_length = max(tot_dist)
                                
                                # append max lengths of all grids to a list
                                max_length_list.append(max_length)
                    if any(max_length_list):  
                        # to pick up max of max
                        max_of_max = max(max_length_list)

        fnlvmv.append(max_of_max)  # append to a new list
        MV_id_list.append(MV_id)  # append the network id to a new list

    # export results to dataframes
    d = {'id': MV_id_list, 'Solar_cumulative_capacity': tccs,
         'Wind_cumulative_capacity': tccw,
         'The_Farthest_node': fnlvmv}  # assign lists to columns
    # not founded networks
    are_not_found = {'District_files_that_are_not_found': not_found}

    df = pd.DataFrame(d)  # dataframe for results

    # dataframe for not found files id
    df_are_not_found = pd.DataFrame(are_not_found)

    # Exporting dataframe to CSV files
    df.to_csv(base_path + '/' + 'attributes.csv', sep=',')
    df_are_not_found.to_csv(base_path + '/' + 'Not_found_grids.csv', sep=',')


[docs]def cluster_mv_grids(      
        no_grids,
        cluster_base):
    """
    Clusters the MV grids based on the attributes, for a given number
    of MV grids
    
    Parameters
    ----------
    ding0_files : :obj:`str`
        Path to ding0 files
    no_grids : int
        Desired number of clusters (of MV grids)
        
    Returns
    -------
    :pandas:`pandas.DataFrame<dataframe>`
        Dataframe containing the clustered MV grids and their weightings

    """
    cluster_base_pu = pd.DataFrame()
    
    for attribute in cluster_base:
        attribute_max = cluster_base[attribute].max()
        cluster_base_pu[attribute] = cluster_base[attribute] / attribute_max
          
    id_ = []
    m = []
    for idx, row in cluster_base_pu.iterrows():
        id_.append(idx)
        f = []
        for attribute in row:
            f.append(attribute)
            
        m.append(f)
        
    X = np.array(m)
    
    logger.info(
            'Used Clustering Attributes: \n {}'.format(
                    list(cluster_base.columns)))
        
    no_clusters = no_grids  
    
    ran_state = 1808

    # Starting KMeans clustering
    kmeans = KMeans(n_clusters=no_clusters, random_state=ran_state)

    # Return a label for each point 
    cluster_labels = kmeans.fit_predict(X)

    # Centers of clusters
    centroids = kmeans.cluster_centers_

    id_clus_dist = {}

    # Iterate through each point in dataset array X
    for i in range(len(X)):
        clus = cluster_labels[i]  # point's cluster id
        cent = centroids[cluster_labels[i]]  # Cluster's center coordinates

        # Distance from that point to cluster's center (3d coordinates)
        dist = (
                (X[i][0] - centroids[clus][0]) ** 2 
                + (X[i][1] - centroids[clus][1]) ** 2 
                + (X[i][2] - centroids[clus][2]) ** 2) ** (1 / 2)

        id_clus_dist.setdefault(clus, []).append({id_[i]: dist})
   
    cluster_df = pd.DataFrame(
            columns=[
            'no_of_points_per_cluster',
            'cluster_percentage',
            'the_selected_network_id',
            'represented_grids'])
    cluster_df.index.name = 'cluster_id'
   
    for key, value in id_clus_dist.items():
        no_points_clus = sum(1 for v in value if v)  
        # percentage of points per cluster
        clus_perc = (no_points_clus / len(X)) * 100

        id_dist = {}
        for value_1 in value:
            id_dist.update(value_1)

        # returns the shortest distance point (selected network) 
        short_dist_net_id_dist = min(id_dist.items(), key=lambda x: x[1])
        
        cluster_df.loc[key] = [
                no_points_clus,
                round(clus_perc, 2),
                short_dist_net_id_dist[0],
                list(id_dist.keys())]
        
    return cluster_df