Commit f706632b authored by Christian Weymann's avatar Christian Weymann
Browse files

Create the workflow

parent d1407ec8
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"General purpose imports"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import multiscale as ms\n",
"import h5py\n",
"\n",
"import os\n",
"from glob import glob\n",
"\n",
"from IPython.display import clear_output"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we load all the data in `raw_data_path` into the `.hdf5` format and store it in `data_path`. We make one `.hdf5` file per sample, by assuming the raw data filenames start with the sample name followed by an underscore."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw_data_path = os.path.join('data', 'to_convert')\n",
"data_path = 'data'\n",
"\n",
"for fn in glob(os.path.join(raw_data_path), '**', recursive=True):\n",
" \n",
" sample_name = os.path.basename(fn).split('_')[0]\n",
" #skip empty filenames\n",
" if sample_name == '':\n",
" continue\n",
" #what to do when we can't convert the file at hand?\n",
" sample_path = os.path.join(data_path, sample_name+'.hdf5')\n",
" #check if the sample file exists and create it if not\n",
" if not os.path.isfile(sample_path):\n",
" #create sample file\n",
" #check if sample thickness is given\n",
" #check if the measurement is present in the sample file"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'text.txt'"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.path.basename('/foo/bar/text.txt').split('_')[0]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['data\\\\',\n",
" 'data\\\\Celine',\n",
" 'data\\\\Celine\\\\20201123 Zoom Meeting Ankit.pdf',\n",
" 'data\\\\Celine\\\\m20053',\n",
" 'data\\\\Celine\\\\m20053\\\\SSPFM53_01.ARDF',\n",
" 'data\\\\Celine\\\\m20053\\\\SSPFM53_01.hdf5',\n",
" 'data\\\\Celine\\\\m20053\\\\SSPFM_00.ARDF',\n",
" 'data\\\\Celine\\\\m20053\\\\SSPFM_00.hdf5',\n",
" 'data\\\\Celine\\\\m20053.zip',\n",
" 'data\\\\Celine\\\\m20054',\n",
" 'data\\\\Celine\\\\m20054\\\\SSPFM_00.ARDF',\n",
" 'data\\\\Celine\\\\m20054\\\\SSPFM_00.hdf5',\n",
" 'data\\\\Celine\\\\m20054.zip',\n",
" 'data\\\\Celine\\\\m20055',\n",
" 'data\\\\Celine\\\\m20055\\\\SSPFM55_00.ARDF',\n",
" 'data\\\\Celine\\\\m20055\\\\SSPFM55_00.hdf5',\n",
" 'data\\\\Celine\\\\m20055\\\\SSPFM55_01.ARDF',\n",
" 'data\\\\Celine\\\\m20055\\\\SSPFM55_01.hdf5',\n",
" 'data\\\\Celine\\\\m20055\\\\SSPFM_02.ARDF',\n",
" 'data\\\\Celine\\\\m20055\\\\SSPFM_02.hdf5',\n",
" 'data\\\\Celine\\\\m20055.zip',\n",
" 'data\\\\Celine\\\\m20056',\n",
" 'data\\\\Celine\\\\m20056\\\\SSPFM56_00.ARDF',\n",
" 'data\\\\Celine\\\\m20056\\\\SSPFM56_00.hdf5',\n",
" 'data\\\\Celine\\\\m20057',\n",
" 'data\\\\Celine\\\\m20057\\\\SSPFM57_00.ARDF',\n",
" 'data\\\\Celine\\\\m20057\\\\SSPFM57_00.hdf5',\n",
" 'data\\\\Celine\\\\m20058',\n",
" 'data\\\\Celine\\\\m20058\\\\SSPFM_00.ARDF',\n",
" 'data\\\\Celine\\\\m20058\\\\SSPFM_00.hdf5',\n",
" 'data\\\\Celine\\\\m20058\\\\SSPFM_01.ARDF',\n",
" 'data\\\\Celine\\\\m20058\\\\SSPFM_01.hdf5',\n",
" 'data\\\\Celine\\\\m20058.zip',\n",
" 'data\\\\Celine\\\\m20059',\n",
" 'data\\\\Celine\\\\m20059\\\\SSPFM59_00.ARDF',\n",
" 'data\\\\Celine\\\\m20059\\\\SSPFM59_00.hdf5',\n",
" 'data\\\\Celine\\\\m20059\\\\SSPFM59_01.ARDF',\n",
" 'data\\\\Celine\\\\m20059\\\\SSPFM59_01.hdf5',\n",
" 'data\\\\Celine\\\\m20059.zip',\n",
" 'data\\\\LoopsCeline-color.png',\n",
" 'data\\\\test.txt',\n",
" 'data\\\\to_convert',\n",
" 'data\\\\to_convert\\\\m21003b_topo_MC_009.sxm',\n",
" 'data\\\\to_convert\\\\m21004DSOJ4_topo_MC_005.sxm',\n",
" 'data\\\\to_convert\\\\m21004DSOJ4_topo_MC_008.sxm',\n",
" 'data\\\\to_convert\\\\m21005b_topo_MC_006.sxm',\n",
" 'data\\\\to_convert\\\\m21005b_topo_MC_009.sxm',\n",
" 'data\\\\to_convert\\\\m21006DSOI2_topo_MC_005.sxm',\n",
" 'data\\\\to_convert\\\\m21007b_topo_MC_003.sxm',\n",
" 'data\\\\to_convert\\\\old',\n",
" 'data\\\\to_convert\\\\old\\\\m20036_topo_MC_006.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m20036_topo_MC_007.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m20036_topo_MC_008.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m20037_topo_MC_003.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m20037_topo_MC_004.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m20038_topo_MC_006.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m20038_topo_MC_007.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m20038_topo_MC_010.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m20039_topo_MC_010.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m20039_topo_MC_011.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m20041_topo_MC_006.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m20041_topo_MC_007.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m21003DSOJ2_topo_MC_006.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m21003DSOJ2_topo_MC_009.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m21005DSOF2_topo_MC_005.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m21005DSOF2_topo_MC_008.sxm',\n",
" 'data\\\\to_convert\\\\old\\\\m21007DSOC3_topo_MC_005.sxm']"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"glob('data/**', recursive=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
%% Cell type:markdown id: tags:
General purpose imports
%% Cell type:code id: tags:
``` python
import numpy as np
import matplotlib.pyplot as plt
import multiscale as ms
import h5py
import os
from glob import glob
from IPython.display import clear_output
```
%% Cell type:markdown id: tags:
Next, we load all the data in `raw_data_path` into the `.hdf5` format and store it in `data_path`. We make one `.hdf5` file per sample, by assuming the raw data filenames start with the sample name followed by an underscore.
%% Cell type:code id: tags:
``` python
raw_data_path = os.path.join('data', 'to_convert')
data_path = 'data'
for fn in glob(os.path.join(raw_data_path), '**', recursive=True):
sample_name = os.path.basename(fn).split('_')[0]
#skip empty filenames
if sample_name == '':
continue
#what to do when we can't convert the file at hand?
sample_path = os.path.join(data_path, sample_name+'.hdf5')
#check if the sample file exists and create it if not
if not os.path.isfile(sample_path):
#create sample file
#check if sample thickness is given
#check if the measurement is present in the sample file
```
%% Cell type:code id: tags:
``` python
os.path.basename('/foo/bar/text.txt').split('_')[0]
```
%%%% Output: execute_result
'text.txt'
%% Cell type:code id: tags:
``` python
glob('data/**', recursive=True)
```
%%%% Output: execute_result
['data\\',
'data\\Celine',
'data\\Celine\\20201123 Zoom Meeting Ankit.pdf',
'data\\Celine\\m20053',
'data\\Celine\\m20053\\SSPFM53_01.ARDF',
'data\\Celine\\m20053\\SSPFM53_01.hdf5',
'data\\Celine\\m20053\\SSPFM_00.ARDF',
'data\\Celine\\m20053\\SSPFM_00.hdf5',
'data\\Celine\\m20053.zip',
'data\\Celine\\m20054',
'data\\Celine\\m20054\\SSPFM_00.ARDF',
'data\\Celine\\m20054\\SSPFM_00.hdf5',
'data\\Celine\\m20054.zip',
'data\\Celine\\m20055',
'data\\Celine\\m20055\\SSPFM55_00.ARDF',
'data\\Celine\\m20055\\SSPFM55_00.hdf5',
'data\\Celine\\m20055\\SSPFM55_01.ARDF',
'data\\Celine\\m20055\\SSPFM55_01.hdf5',
'data\\Celine\\m20055\\SSPFM_02.ARDF',
'data\\Celine\\m20055\\SSPFM_02.hdf5',
'data\\Celine\\m20055.zip',
'data\\Celine\\m20056',
'data\\Celine\\m20056\\SSPFM56_00.ARDF',
'data\\Celine\\m20056\\SSPFM56_00.hdf5',
'data\\Celine\\m20057',
'data\\Celine\\m20057\\SSPFM57_00.ARDF',
'data\\Celine\\m20057\\SSPFM57_00.hdf5',
'data\\Celine\\m20058',
'data\\Celine\\m20058\\SSPFM_00.ARDF',
'data\\Celine\\m20058\\SSPFM_00.hdf5',
'data\\Celine\\m20058\\SSPFM_01.ARDF',
'data\\Celine\\m20058\\SSPFM_01.hdf5',
'data\\Celine\\m20058.zip',
'data\\Celine\\m20059',
'data\\Celine\\m20059\\SSPFM59_00.ARDF',
'data\\Celine\\m20059\\SSPFM59_00.hdf5',
'data\\Celine\\m20059\\SSPFM59_01.ARDF',
'data\\Celine\\m20059\\SSPFM59_01.hdf5',
'data\\Celine\\m20059.zip',
'data\\LoopsCeline-color.png',
'data\\test.txt',
'data\\to_convert',
'data\\to_convert\\m21003b_topo_MC_009.sxm',
'data\\to_convert\\m21004DSOJ4_topo_MC_005.sxm',
'data\\to_convert\\m21004DSOJ4_topo_MC_008.sxm',
'data\\to_convert\\m21005b_topo_MC_006.sxm',
'data\\to_convert\\m21005b_topo_MC_009.sxm',
'data\\to_convert\\m21006DSOI2_topo_MC_005.sxm',
'data\\to_convert\\m21007b_topo_MC_003.sxm',
'data\\to_convert\\old',
'data\\to_convert\\old\\m20036_topo_MC_006.sxm',
'data\\to_convert\\old\\m20036_topo_MC_007.sxm',
'data\\to_convert\\old\\m20036_topo_MC_008.sxm',
'data\\to_convert\\old\\m20037_topo_MC_003.sxm',
'data\\to_convert\\old\\m20037_topo_MC_004.sxm',
'data\\to_convert\\old\\m20038_topo_MC_006.sxm',
'data\\to_convert\\old\\m20038_topo_MC_007.sxm',
'data\\to_convert\\old\\m20038_topo_MC_010.sxm',
'data\\to_convert\\old\\m20039_topo_MC_010.sxm',
'data\\to_convert\\old\\m20039_topo_MC_011.sxm',
'data\\to_convert\\old\\m20041_topo_MC_006.sxm',
'data\\to_convert\\old\\m20041_topo_MC_007.sxm',
'data\\to_convert\\old\\m21003DSOJ2_topo_MC_006.sxm',
'data\\to_convert\\old\\m21003DSOJ2_topo_MC_009.sxm',
'data\\to_convert\\old\\m21005DSOF2_topo_MC_005.sxm',
'data\\to_convert\\old\\m21005DSOF2_topo_MC_008.sxm',
'data\\to_convert\\old\\m21007DSOC3_topo_MC_005.sxm']
%% Cell type:code id: tags:
``` python
```
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment