.github/workflows/cache_pst_data.yml
name: Cache PST Data
on: workflow_dispatch
jobs:
cache_enron_data:
name: Cache Enron data
runs-on: ubuntu-latest
env:
EDRM_DIR: /tmp/libratom/test_data/RevisedEDRMv1_Complete
strategy:
matrix:
edrm-part: [1, 2, 3, 4, 12, 44, 129]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install libratom
run: |
python -m pip install --upgrade pip
pip install -r ./requirements-dev.txt
pip install -e ./
- name: Download Enron data from the web
run: python ./libratom/scripts/download_edrm_zipped_files.py -n ${{ matrix.edrm-part }}
- name: Check local data
run: find ${{ env.EDRM_DIR }} -type f
- name: Upload Enron data into github storage
uses: actions/upload-artifact@v3
with:
name: EDRM_part_${{ matrix.edrm-part }}
path: ${{ env.EDRM_DIR }}
retention-days: 90