| | import pandas as pd |
| |
|
| | enterpriseGroups = ['facialRecognition',['safetySecurity','recruitment','biometricData']] |
| | societyGroups = [['policing','controlAccessToServices']] |
| | dataTypeGroups = [['dataTypePersonal','dataTypeSensistivePersonal'],['dataTypeRestricted']] |
| | capabilitiesGroups = ['decisionSupportSystems'] |
| |
|
| | technicalRisks = ['Robustness', 'Efficacy', |
| | 'Privacy', 'Bias', 'Explainability'] |
| | governanceRisks = ['Financial', 'Reputational', 'Ethics', 'Regulation'] |
| | riskVerticals = ['Overall'] + governanceRisks + technicalRisks |
| |
|
| |
|
| | def mergeCostColumns(home, commisioned, licensed): |
| | if home == 'insignificant' or commisioned == 'insignificant' or licensed == 'insignificant': |
| | output = 1 |
| | else: |
| | output = 0 |
| | return output |
| |
|
| |
|
| | def generateUniqueEntries(targetColumn): |
| | listOfEntries = [] |
| | for i in targetColumn.values: |
| | listOfEntries += i.split(',') |
| | listOfEntries = set(listOfEntries) |
| | return list(listOfEntries) |
| |
|
| |
|
| | def generateOneHot(dataframe, targetColumn, groups): |
| | for group in groups: |
| | groupColumnName = '' |
| | if type(group) == str: |
| | groupColumnName = targetColumn + '_' + group |
| | else: |
| | for element in group: |
| | if groupColumnName == '': |
| | groupColumnName += targetColumn + '_' + element |
| | else: |
| | groupColumnName += '_' + element |
| | dataframe[groupColumnName] = 0 |
| |
|
| | for i, targetColumnData in enumerate(dataframe[targetColumn].values): |
| | if type(group) == str: |
| | if group in targetColumnData.split(','): |
| | dataframe.loc[ |
| | i, groupColumnName] = 1 |
| | else: |
| | for element in group: |
| | if element in targetColumnData.split(','): |
| | dataframe.loc[ |
| | i, groupColumnName] = 1 |
| |
|
| | dataframe.insert(0, groupColumnName, dataframe.pop(groupColumnName)) |
| |
|
| | dataframe.pop(targetColumn) |
| |
|
| |
|
| | def convertToBinaryColumn(dataframe, targetColumn, |
| | positiveGroup): |
| | for i, targetColumnData in enumerate(dataframe[targetColumn].values): |
| | if targetColumnData in positiveGroup: |
| | dataframe.loc[i, targetColumn] = 1 |
| | else: |
| | dataframe.loc[i, targetColumn] = 0 |
| |
|
| |
|
| | def mainDataWrangling(data): |
| | |
| | columnsToKeep = [1, 4, 5, 6, 7, 8, 10, 22, 24, 34, 35, 36, 37, 39, 45, 48, 49, 50, 51, 52, 53, 54, 55, 56, |
| | 57] |
| | data = data.iloc[:, columnsToKeep] |
| |
|
| | |
| | data['insignificant'] = data.apply( |
| | lambda x: mergeCostColumns(x['homeBuiltAmount'], x['commisionedAmount'], x['licensedAmount']), axis=1) |
| | data.drop(data.iloc[:, 1:4], axis=1, inplace=True) |
| |
|
| | |
| | data.iloc[ |
| | 0, -1] = 'insignificant' |
| | data.iloc[0, 0] = 'projectName' |
| | data.iloc[0, -11:-1] = data.columns[-11:-1].values |
| | data.columns = data.iloc[0, :].values |
| | data = data.drop([0]) |
| | data.reset_index(drop=True, inplace=True) |
| | data.insert(0, 'insignificant', data.pop('insignificant')) |
| |
|
| | |
| | data = data.dropna( |
| | subset=data.columns[-10:].values) |
| | data.reset_index(drop=True, inplace=True) |
| | data['howEssentialHumanInTheLoop'].fillna('low', inplace=True) |
| |
|
| | |
| | generateOneHot(data, 'enterpriseUseCases', enterpriseGroups) |
| | generateOneHot(data, 'soceityLevel', societyGroups) |
| | convertToBinaryColumn(data, 'externalParties', ['yes']) |
| | data['howWidelyDeployed'] = data['howWidelyDeployed'].map( |
| | {'controlledEnvironment': 0, 'local': 0.2, 'multipleJurisdictions': 0.5, 'global': 1}) |
| | generateOneHot(data, 'dataType', dataTypeGroups) |
| | data['autonomy'] = data['autonomy'].map({'humanInTheLoop': 0, 'autonomous': 1}) |
| | data['howEssentialHumanInTheLoop'] = data['howEssentialHumanInTheLoop'].map({'low': 0, 'medium': 0.5, 'high': 1}) |
| | data['damageCausedIfSubstantialFailure'] = data['damageCausedIfSubstantialFailure'].map( |
| | {'none': 0, 'minor': 0.3, 'major': 0.7, 'critical': 1}) |
| | data['damageCausedIfMarginalFailure'] = data['damageCausedIfMarginalFailure'].map( |
| | {'none': 0, 'minor': 0.3, 'major': 0.7, 'critical': 1}) |
| | generateOneHot(data, 'capabilities', capabilitiesGroups) |
| | data['selfAdapting'] = data['selfAdapting'].map({'no': 0, 'yesWhenUpdatedMade': 0.5, 'yesRealTime': 1}) |
| |
|
| | |
| | |
| | for riskColumn in data.columns[-10:]: |
| | data[riskColumn + '_binaryLow'] = data[riskColumn].map({'Low': 1, 'Medium': 0, 'High': 0}) |
| | |
| | for riskColumn in data.columns[-20:-10]: |
| | data[riskColumn + '_binaryHigh'] = data[riskColumn].map({'Low': 0, 'Medium': 0, 'High': 1}) |
| |
|
| | data.insert(0, 'projectName', data.pop('projectName')) |
| | data['insignificant'] = pd.to_numeric(data['insignificant']) |
| | data['externalParties'] = pd.to_numeric(data['externalParties']) |
| |
|
| | return data |