A4 - pyIBA and Tensorflow: Machine Learning demo#

[1]:
# if pyIBA has been installed with pip3,
# the above 4 lines can be removed
import sys
from os.path import abspath
path_pyIBA = abspath('../../../../..')
sys.path.insert(0, path_pyIBA)

# import pyIBA
from pyIBA import IDF
from pyIBA.codes.NDF import run_ndf, read_str_file

IDFViewer = sys.path[0] + 'NDF_gui.py'

sys.path.insert(1, 'path_to_RBSpy/')
from RBSpy import rbsAux

from os import mkdir
from os.path import dirname

import numpy as np
import matplotlib.pyplot as plt

# %matplotlib notebook
%matplotlib inline

Create the IDF file#

[2]:
save_path = '../Example_a4/original_file.xml'
[3]:
idf_file = IDF(save_path)

Define molecules#

[4]:
molecules = {'nelements': '3',
             0: {'name': 'Co 0.45 Pt 0.55',
              'density': '',
              'concentration': ['0', '5000'],
              'depth': ['0','1']},
             1: {'name': 'Si 1 O 2',
              'density': '',
              'concentration': ['0','5000'],
              'depth': ['0','1']},
             2: {'name': 'Si',
              'density': '',
              'concentration': ['100','10000000'],
              'depth':[ '0', '1']}}

idf_file.set_elements(molecules)
[5]:
idf_file.get_elements()
[5]:
{'nelements': 3,
 0: {'name': 'Co 0.45 Pt 0.55',
  'density': '',
  'concentration': ['0', '5000'],
  'depth': ['0', '1']},
 1: {'name': 'Si 1 O 2',
  'density': '',
  'concentration': ['0', '5000'],
  'depth': ['0', '1']},
 2: {'name': 'Si',
  'density': '',
  'concentration': ['100', '10000000'],
  'depth': ['0', '1']}}

Define profile#

[6]:
profile = {'nlayers': '3',
         0: {'thickness': 1500, 'concentrations': [100.0, 0.0, 0.0]},
         1: {'thickness': 3000, 'concentrations': [0.0, 100.0, 0.0]},
         2: {'thickness': 5e6, 'concentrations': [0.0, 0.0, 100.0]},
         'names': ['Co 0.45 Pt 0.55', 'Si 1 O 2', 'Si']}

idf_file.set_profile(profile)

Set NDF run options#

[7]:
# options = ['fitmethod','channelcompreesion','convolute','distribution','smooth','normalisation']

idf_file.set_NDF_run_option('channelcompreesion', '0 - No compression')
idf_file.set_NDF_run_option('convolute', '1 - Convolute FWHM')
idf_file.set_NDF_run_option('distribution', '0 - Don\'t use isotropic distribution')
idf_file.set_NDF_run_option('smooth', '1 - Smooth data')
idf_file.set_NDF_run_option('normalisation', '1 - Normalise profile')
idf_file.set_NDF_run_option('fitmethod', '0 - Simulate one spectrum from ndf.prf, no fit')

Save file#

[8]:
idf_file.save_idf(save_path);

Open the file in IDFVIewer to check and add missing data#

[10]:
!python3 $IDFViewer $save_path
Opening file...
Path: ../Example_a4/
File: original_file.xml

Run NDF#

[ ]:
run_ndf(idf_file)
[ ]:
idf_file.set_spectra_result(spectra_id = 0)
channel, counts = idf_file.get_dataxy_fit()

# create the figure
fig, ax = rbsAux.create_figure(
    xlim = [0, 1050], ylim=[-50, 4500], title = '', fig_size=[6, 4], depth_scale = False)

ax.plot(channel, counts)

fig.tight_layout()

Run NDF simulations for multiple concentrations of Co\(_x\)Pt\(_{1-x}\)#

[14]:
def create_tree_folder(molecules):
    # organize new files with new folders and names
    path_dir = 'ML_data/' #idf_file.path_dir

    name_file = molecules[0]['name'].replace(' ', '_').replace('.','')
    new_dir = path_dir + name_file + '/'
    new_name = name_file + '.xml'

    try: mkdir(new_dir)
    except: pass

    return new_dir + new_name



concentrations = np.linspace(0, 1, 11)
list_idfs = []

for c in concentrations:
    molecules[0]['name'] = 'Co %0.2f Pt %0.2f'%(c, 1-c)
    profile['names'][0] = molecules[0]['name']

    idf_file.set_elements(molecules)
    idf_file.set_profile(profile)

    path_new_idf = create_tree_folder(molecules)


    idf_file.save_idf(path_new_idf)

    list_idfs.append(idf_file.copy())

for idf_new in list_idfs:
    print(idf_new.file_name)

#     print(molecules[0]['name'])
Co_000_Pt_100.xml
Co_010_Pt_090.xml
Co_020_Pt_080.xml
Co_030_Pt_070.xml
Co_040_Pt_060.xml
Co_050_Pt_050.xml
Co_060_Pt_040.xml
Co_070_Pt_030.xml
Co_080_Pt_020.xml
Co_090_Pt_010.xml
Co_100_Pt_000.xml
[ ]:
for idf in list_idfs:
    run_ndf(idf)
[ ]:
idf_file = IDF(save_path)
[ ]:
idf_file.append_simulation_entry(len(list_idfs))

fig, ax = rbsAux.create_figure(
    xlim = [0, 1050], ylim=[-50, 6500], title = '', fig_size=[6, 4], depth_scale = False)

for i,idf in enumerate(list_idfs):
    idf.set_spectra_result()
    idf.set_elements_result()
    channel, counts = idf.get_dataxy_fit()

    idf_file.set_spectrum_data_fit_result(channel, counts, simulation_id=i)

    ax.plot(channel, counts, label = idf.file_name)

ax.legend(frameon = False)
fig.tight_layout()
[20]:
idf_file.save_idf(save_path)
[20]:
<pyIBA.IDF.IDF at 0x7f4bb4ee3940>

Machine Learning#

[11]:
#load modules
from tensorflow import keras
from tensorflow.keras import layers
import datetime

#this part of the code is only needed to reset the learning session on the Jupyter Kernel.
import tensorflow as tf


tf.keras.backend.clear_session()

Define Neural Network#

[15]:
## NN deffinition
#del model
num_neurons = 32
input_len = len(list_idfs[0].get_dataxy_fit()[0])
output_len = 1

activation='tanh'

model = keras.Sequential()
# Add layers:
model.add(layers.GaussianNoise(0.0001, input_shape=(input_len,)))
model.add(layers.Dense(45,input_dim=input_len, activation=activation))
# model.add(layers.Dense(2, activation=activation))
model.add(layers.Dense(num_neurons, activation=activation))
model.add(layers.Dense(num_neurons, activation=activation))
model.add(layers.Dense(output_len, activation='relu'))

# Compile
model.compile(optimizer=keras.optimizers.Adam(0.0003),
              loss='mse',
              metrics=['mse'])

Organize inputs/outputs#

[16]:
inputs_real = []
for idf in list_idfs:
    channel, counts = idf.get_dataxy_fit()

    inputs_real.append(counts)

inputs_real = np.array(inputs_real)
outputs_real = np.array(concentrations)

print(inputs_real.shape)
print(outputs_real.shape)
(11, 1023)
(11,)

Create more spectra by adding random noise#

[17]:
samples_to_add = 5

inputs = []
outputs = []

for inp, outp in zip(inputs_real, outputs_real):
    for _ in range(samples_to_add):
        input_noise = inp + np.random.normal(0, 25, size=input_len)
        inputs.append(input_noise)

        outputs.append(outp)

inputs = np.array(inputs)
outputs= np.array(outputs)


#randomize inputs and outputs
index_random = np.random.permutation(len(outputs))
inputs = inputs[index_random,:]
outputs = outputs[index_random]
[18]:
fig, ax = rbsAux.create_figure(
    xlim = [0, 1050], ylim=[-50, 6500], title = '', fig_size=[6, 4], depth_scale = False)

for inp, outp in zip(inputs, outputs):
    ax.plot(channel, inp)

fig.tight_layout()
../../../_images/jupyter_notebooks_advanced_examples_Example_a4_ML_pyIBA_33_0.png

Train the network#

[19]:
%%time

log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

model.load_weights('ML_data/final_model_state')
# hist = model.fit(inputs, outputs, epochs=600, batch_size=11, callbacks=[tensorboard_callback])
CPU times: user 101 ms, sys: 1.67 ms, total: 103 ms
Wall time: 124 ms
[19]:
<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fb66ce3f0d0>
[24]:
try:
    plt.figure(figsize=(6,4))
    plt.plot(hist.history['mse'])
    plt.xlabel('Epoch')
    plt.ylabel('Mean Squared Error')
    model.summary()
except:
    pass
<Figure size 432x288 with 0 Axes>

Verify the training#

[21]:
idf_file = IDF(save_path)
concentrations_compare = [0.15, 0.65, 0.43, 0.81, 0.94]
list_idfs_compare = []

for c in concentrations_compare:
    molecules[0]['name'] = 'Co %0.2f Pt %0.2f'%(c, 1-c)
    profile['names'][0] = molecules[0]['name']

    idf_file.set_elements(molecules)
    idf_file.set_profile(profile)

    path_new_idf = create_tree_folder(molecules)


    idf_file.save_idf(path_new_idf)
    list_idfs_compare.append(idf_file.copy())

for idf in list_idfs_compare:
    run_ndf(idf)
Opening NDF...
wine /home/msequeira/Dropbox/CTN/Radiate/IDF_python/GUI/test/pyIBA/codes/NDF_11_MS/NDF.exe Co_015_Pt_085.xml 0 0 1 0 1 1
Opening NDF...
wine /home/msequeira/Dropbox/CTN/Radiate/IDF_python/GUI/test/pyIBA/codes/NDF_11_MS/NDF.exe Co_065_Pt_035.xml 0 0 1 0 1 1
Opening NDF...
wine /home/msequeira/Dropbox/CTN/Radiate/IDF_python/GUI/test/pyIBA/codes/NDF_11_MS/NDF.exe Co_043_Pt_057.xml 0 0 1 0 1 1
Opening NDF...
wine /home/msequeira/Dropbox/CTN/Radiate/IDF_python/GUI/test/pyIBA/codes/NDF_11_MS/NDF.exe Co_081_Pt_019.xml 0 0 1 0 1 1
Opening NDF...
wine /home/msequeira/Dropbox/CTN/Radiate/IDF_python/GUI/test/pyIBA/codes/NDF_11_MS/NDF.exe Co_094_Pt_006.xml 0 0 1 0 1 1
[22]:
for idf in list_idfs_compare:
    idf.set_spectra_result()

[23]:
print('True --> Predicted')

fig, ax = rbsAux.create_figure(
    xlim = [0, 1050], ylim=[-50, 6500], title = '', fig_size=[6, 4], depth_scale = False)

for idf, conc in zip(list_idfs_compare, concentrations_compare):
    channel, data = idf.get_dataxy_fit()
    prediction = model.predict(np.array([data]))

    label = r'%0.2f $\longrightarrow$ %0.3f (%0.2f%%)' %(conc, prediction[0][0], 100 - conc/prediction[0][0]*100)
    plt.plot(channel, data, label = label)

ax.legend(frameon=False)
fig.tight_layout()
True --> Predicted
../../../_images/jupyter_notebooks_advanced_examples_Example_a4_ML_pyIBA_40_1.png

Save model training weights#

[39]:
# model.save_weights('ML_data/final_model_state')