remap
Browse files- .gitignore +5 -1
- README.md +23 -3
- dpacman/data/README.md +24 -1
- dpacman/data/remap/analyze.py +46 -0
- dpacman/data/tfclust/analyze.py +77 -0
- dpacman/data/tfclust/combine.py +54 -0
- dpacman/data/tfclust/download.py +1 -1
- environment.yaml +1 -1
.gitignore
CHANGED
|
@@ -1,3 +1,7 @@
|
|
| 1 |
dpacman/data_files
|
| 2 |
dpacman/data/tfclust/*.log
|
| 3 |
-
dpacman/data/tfclust/temp.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
dpacman/data_files
|
| 2 |
dpacman/data/tfclust/*.log
|
| 3 |
+
dpacman/data/tfclust/temp.py
|
| 4 |
+
bigBedToBed
|
| 5 |
+
dpacman/data/remap/*.log
|
| 6 |
+
dpacman/data/remap/temp.py
|
| 7 |
+
dpacman/data/tfclust/figures
|
README.md
CHANGED
|
@@ -13,12 +13,23 @@ license: cc-by-nc-nd-4.0
|
|
| 13 |
│ │ ├── chip_atlas
|
| 14 |
│ │ │ ├── full_data_loading.py
|
| 15 |
│ │ │ └── smaller_data_loading.py
|
|
|
|
|
|
|
| 16 |
│ │ └── tfclust
|
|
|
|
| 17 |
│ │ ├── api_download.py
|
| 18 |
-
│ │ ├──
|
| 19 |
│ │ ├── download.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
│ │ ├── hg38_success_download.log
|
| 21 |
-
│ │ └── temp.py
|
| 22 |
│ └── data_files
|
| 23 |
│ ├── processed
|
| 24 |
│ │ └── tfclust
|
|
@@ -42,6 +53,11 @@ license: cc-by-nc-nd-4.0
|
|
| 42 |
│ │ │ ├── hg19_chr1.json
|
| 43 |
│ │ └── hg38
|
| 44 |
│ │ ├── hg38_chr1.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
│ └── tfclust
|
| 46 |
│ ├── encRegTfbsClusteredWithCells.hg19.bed
|
| 47 |
│ ├── encRegTfbsClusteredWithCells.hg38.bed
|
|
@@ -56,4 +72,8 @@ license: cc-by-nc-nd-4.0
|
|
| 56 |
```
|
| 57 |
20 directories, 3089 files
|
| 58 |
|
| 59 |
-
In `data_files` subfolders, only representative files for certain chromosomes are shown. In reality, any file that contains the substring "_chr" exists for every chromosome in that genome. Genome hg38 has 711 chromosomes. Genome hg19 has 298 chromosomes.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
│ │ ├── chip_atlas
|
| 14 |
│ │ │ ├── full_data_loading.py
|
| 15 |
│ │ │ └── smaller_data_loading.py
|
| 16 |
+
│ │ ├── remap
|
| 17 |
+
│ │ │ ├── analyze.py
|
| 18 |
│ │ └── tfclust
|
| 19 |
+
│ │ ├── analyze.py
|
| 20 |
│ │ ├── api_download.py
|
| 21 |
+
│ │ ├── combine.py
|
| 22 |
│ │ ├── download.py
|
| 23 |
+
│ │ ├── figures
|
| 24 |
+
│ │ │ ├── seq_lengths_box.png
|
| 25 |
+
│ │ │ ├── seq_lengths_flanked_box.png
|
| 26 |
+
│ │ │ ├── seq_lengths_flanked_hist.png
|
| 27 |
+
│ │ │ ├── seq_lengths_flanked_xlog_box.png
|
| 28 |
+
│ │ │ ├── seq_lengths_flanked_xlog_hist.png
|
| 29 |
+
│ │ │ ├── seq_lengths_hist.png
|
| 30 |
+
│ │ │ ├── seq_lengths_xlog_box.png
|
| 31 |
+
│ │ │ └── seq_lengths_xlog_hist.png
|
| 32 |
│ │ ├── hg38_success_download.log
|
|
|
|
| 33 |
│ └── data_files
|
| 34 |
│ ├── processed
|
| 35 |
│ │ └── tfclust
|
|
|
|
| 53 |
│ │ │ ├── hg19_chr1.json
|
| 54 |
│ │ └── hg38
|
| 55 |
│ │ ├── hg38_chr1.json
|
| 56 |
+
│ ├── remap
|
| 57 |
+
│ │ ├── reMap2022.bb
|
| 58 |
+
│ │ ├── reMap2022.bed
|
| 59 |
+
│ │ ├── remap2022_all_macs2_hg38_v1_0.bed.gz
|
| 60 |
+
│ │ └── remap2022_crm_macs2_hg38_v1_0.bed
|
| 61 |
│ └── tfclust
|
| 62 |
│ ├── encRegTfbsClusteredWithCells.hg19.bed
|
| 63 |
│ ├── encRegTfbsClusteredWithCells.hg38.bed
|
|
|
|
| 72 |
```
|
| 73 |
20 directories, 3089 files
|
| 74 |
|
| 75 |
+
In `data_files` subfolders, only representative files for certain chromosomes are shown. In reality, any file that contains the substring "_chr" exists for every chromosome in that genome. Genome hg38 has 711 chromosomes. Genome hg19 has 298 chromosomes. To reconstruct a full directory structure, run the following from `DPACMAN`
|
| 76 |
+
|
| 77 |
+
```
|
| 78 |
+
tree -I '__pycache__|*.egg-info|*.git' > tree.txt
|
| 79 |
+
```
|
dpacman/data/README.md
CHANGED
|
@@ -15,4 +15,27 @@ gunzip encRegTfbsClusteredWithCells.hg38.bed.gz
|
|
| 15 |
```
|
| 16 |
wget https://hgdownload.soe.ucsc.edu/goldenPath/hg19/encRegTfbsClustered/encRegTfbsClusteredWithCells.hg19.bed.gz
|
| 17 |
gunzip encRegTfbsClusteredWithCells.hg19.bed.gz
|
| 18 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
```
|
| 16 |
wget https://hgdownload.soe.ucsc.edu/goldenPath/hg19/encRegTfbsClustered/encRegTfbsClusteredWithCells.hg19.bed.gz
|
| 17 |
gunzip encRegTfbsClusteredWithCells.hg19.bed.gz
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
3. ReMap big bed file
|
| 21 |
+
```
|
| 22 |
+
wget https://hgdownload.soe.ucsc.edu/gbdb/hg38/reMap/reMap2022.bb
|
| 23 |
+
wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bigBedToBed
|
| 24 |
+
chmod +x bigBedToBed
|
| 25 |
+
./bigBedToBed /home/a03-svincoff/DPACMAN/dpacman/data_files/raw/remap/reMap2022.bb /home/a03-svincoff/DPACMAN/dpacman/data_files/raw/remap/reMap2022.bed
|
| 26 |
+
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
4. ReMap CRM file from their actual website
|
| 30 |
+
```
|
| 31 |
+
wget https://remap.univ-amu.fr/storage/remap2022/hg38/MACS2/remap2022_crm_macs2_hg38_v1_0.bed.gz
|
| 32 |
+
gunzip remap2022_crm_macs2_hg38_v1_0.bed.gz
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
4. Run `download.py` to download:
|
| 36 |
+
- Full sequences of each chromosome for genomes hg38 and hg19
|
| 37 |
+
- encRegTfbsClusteredWithCells, a table of clustered transcription factors by their binding sites, for hg38 and hg19
|
| 38 |
+
- processed databases per genome per chromosome with the following columns: "bin","chrom","chromStart","chromEnd","name","score","scoreCount","sourceIds","sourceScores","seq","seq_flanked","chromStart_flanked","chromEnd_flanked","flank5","flank3"
|
| 39 |
+
|
| 40 |
+
### Data Processing
|
| 41 |
+
1. Run `combine.py` to combine these individual files into one large DataFrame
|
dpacman/data/remap/analyze.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
def main(logger=None):
|
| 5 |
+
if logger is None:
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
# Read the BED file
|
| 9 |
+
bed_file_path = "../../data_files/raw/remap/reMap2022.bed"
|
| 10 |
+
df = pd.read_csv(bed_file_path, sep="\t", header=None)
|
| 11 |
+
df.columns = ["#chrom", "chromStart", "chromEnd", "name", "score", "strand", "thickStart", "thickEnd", "reserved", "TF", "Biotypes"]
|
| 12 |
+
print(f"{len(df):,}")
|
| 13 |
+
print(df.head(50))
|
| 14 |
+
|
| 15 |
+
crm_bed_file_path = "../../data_files/raw/remap/remap2022_crm_macs2_hg38_v1_0.bed"
|
| 16 |
+
crm = pd.read_csv(crm_bed_file_path, sep="\t", header=None)
|
| 17 |
+
crm.columns = ["#chrom", "chromStart", "chromEnd", "name", "score", "strand", "thickStart", "thickEnd", "reserved"]
|
| 18 |
+
crm["chromLen"] = crm["chromEnd"] - crm["chromStart"]
|
| 19 |
+
crm["thickLen"] = crm["thickEnd"] - crm["thickStart"]
|
| 20 |
+
print(f"{len(crm):,}")
|
| 21 |
+
print(f"thick length statistics:")
|
| 22 |
+
print(crm["thickLen"].describe())
|
| 23 |
+
print(f"chrom length statistics:")
|
| 24 |
+
print(crm["chromLen"].describe())
|
| 25 |
+
print(crm[["#chrom", "chromStart", "chromEnd", "name", "score", "strand", "thickStart", "thickEnd", "reserved"]].head(50))
|
| 26 |
+
crm.head(50).to_csv("crm_example.csv",index=False)
|
| 27 |
+
|
| 28 |
+
crm["name"] = crm["name"].apply(lambda x: x.split(","))
|
| 29 |
+
crm = crm.explode("name").reset_index(drop=True)
|
| 30 |
+
crm.loc[crm["name"]=="ERG"].reset_index(drop=True).head(50).to_csv("crm_example_ERG.csv",index=False)
|
| 31 |
+
|
| 32 |
+
if __name__ == "__main__":
|
| 33 |
+
log_path = "analyze.log"
|
| 34 |
+
|
| 35 |
+
logger = logging.getLogger(__name__)
|
| 36 |
+
logger.setLevel(logging.DEBUG)
|
| 37 |
+
|
| 38 |
+
# Create file handler
|
| 39 |
+
file_handler = logging.FileHandler(log_path, mode="w", encoding="utf-8")
|
| 40 |
+
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
| 41 |
+
file_handler.setFormatter(formatter)
|
| 42 |
+
|
| 43 |
+
# Attach handlers
|
| 44 |
+
logger.addHandler(file_handler)
|
| 45 |
+
|
| 46 |
+
main(logger)
|
dpacman/data/tfclust/analyze.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
import dask.dataframe as dd
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
|
| 7 |
+
def plot_sequence_lengths_box(lengths, xlog=False, title="Sequence Lengths", out_dir="figures", fname="sequence_lengths_box.png"):
|
| 8 |
+
"""
|
| 9 |
+
Plot sequence lengths. Can be used with original sequence or flank sequence.
|
| 10 |
+
"""
|
| 11 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 12 |
+
out_path = os.path.join(out_dir, fname)
|
| 13 |
+
|
| 14 |
+
plt.figure(figsize=(10, 4))
|
| 15 |
+
plt.boxplot(lengths, vert=False)
|
| 16 |
+
if xlog:
|
| 17 |
+
plt.xscale('log')
|
| 18 |
+
plt.xlabel("Sequence Length")
|
| 19 |
+
plt.title(title)
|
| 20 |
+
plt.grid(True, axis='y', linestyle='--', alpha=0.6)
|
| 21 |
+
plt.tight_layout()
|
| 22 |
+
|
| 23 |
+
plt.savefig(out_path, dpi=300)
|
| 24 |
+
|
| 25 |
+
def plot_sequence_lengths_hist(lengths, xlog=False, title="Sequence Lengths", out_dir="figures", fname="sequence_lengths_hist.png"):
|
| 26 |
+
"""
|
| 27 |
+
Plot sequence lengths. Can be used with original sequence or flank sequence.
|
| 28 |
+
"""
|
| 29 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 30 |
+
out_path = os.path.join(out_dir, fname)
|
| 31 |
+
|
| 32 |
+
plt.figure(figsize=(10, 4))
|
| 33 |
+
plt.hist(lengths, bins=100, density=True, alpha=0.75)
|
| 34 |
+
if xlog:
|
| 35 |
+
plt.xscale('log')
|
| 36 |
+
# percentage format
|
| 37 |
+
plt.gca().yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: '{:.2%}'.format(100*y)))
|
| 38 |
+
plt.xlabel("Sequence Length")
|
| 39 |
+
plt.ylabel("Frequency")
|
| 40 |
+
plt.title(title)
|
| 41 |
+
plt.grid(True, axis='y', linestyle='--', alpha=0.6)
|
| 42 |
+
plt.tight_layout()
|
| 43 |
+
|
| 44 |
+
plt.savefig(out_path, dpi=300)
|
| 45 |
+
|
| 46 |
+
def main(logger):
|
| 47 |
+
df_dir = "../../data_files/processed/tfclust/combined"
|
| 48 |
+
df_savepath = os.path.join(df_dir, "encRegTfbsClustered_hg38_hg19.parquet")
|
| 49 |
+
logger.info("Starting to load data file from parquet")
|
| 50 |
+
df = pd.read_parquet(df_savepath, engine="auto")
|
| 51 |
+
logger.info(df.head())
|
| 52 |
+
|
| 53 |
+
plot_sequence_lengths_hist(df["seq_len"], title="TF Binding Sites",fname="seq_lengths_hist.png")
|
| 54 |
+
plot_sequence_lengths_hist(df["seq_flanked_len"], title="TF Binding Sites with 1000nt Flanks", fname="seq_lengths_flanked_hist.png")
|
| 55 |
+
plot_sequence_lengths_box(df["seq_len"], title="TF Binding Sites",fname="seq_lengths_box.png")
|
| 56 |
+
plot_sequence_lengths_box(df["seq_flanked_len"], title="TF Binding Sites with 1000nt Flanks", fname="seq_lengths_flanked_box.png")
|
| 57 |
+
|
| 58 |
+
plot_sequence_lengths_hist(df["seq_len"], xlog=True, title="TF Binding Sites",fname="seq_lengths_xlog_hist.png")
|
| 59 |
+
plot_sequence_lengths_hist(df["seq_flanked_len"], xlog=True, title="TF Binding Sites with 1000nt Flanks", fname="seq_lengths_flanked_xlog_hist.png")
|
| 60 |
+
plot_sequence_lengths_box(df["seq_len"], xlog=True, title="TF Binding Sites",fname="seq_lengths_xlog_box.png")
|
| 61 |
+
plot_sequence_lengths_box(df["seq_flanked_len"],xlog=True, title="TF Binding Sites with 1000nt Flanks", fname="seq_lengths_flanked_xlog_box.png")
|
| 62 |
+
|
| 63 |
+
if __name__ == "__main__":
|
| 64 |
+
log_path = "analyze.log"
|
| 65 |
+
|
| 66 |
+
logger = logging.getLogger(__name__)
|
| 67 |
+
logger.setLevel(logging.DEBUG)
|
| 68 |
+
|
| 69 |
+
# Create file handler
|
| 70 |
+
file_handler = logging.FileHandler(log_path, mode="w", encoding="utf-8")
|
| 71 |
+
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
| 72 |
+
file_handler.setFormatter(formatter)
|
| 73 |
+
|
| 74 |
+
# Attach handlers
|
| 75 |
+
logger.addHandler(file_handler)
|
| 76 |
+
|
| 77 |
+
main(logger)
|
dpacman/data/tfclust/combine.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
import dask.dataframe as dd
|
| 5 |
+
|
| 6 |
+
def main():
|
| 7 |
+
hg38_dir = "../../data_files/processed/tfclust/hg38"
|
| 8 |
+
hg19_dir = "../../data_files/processed/tfclust/hg19"
|
| 9 |
+
|
| 10 |
+
# See how many files there are
|
| 11 |
+
hg38_files = [os.path.join(hg38_dir,x) for x in os.listdir(hg38_dir) if os.path.isfile(os.path.join(hg38_dir,x))]
|
| 12 |
+
hg19_files = [os.path.join(hg19_dir,x) for x in os.listdir(hg19_dir) if os.path.isfile(os.path.join(hg19_dir,x))]
|
| 13 |
+
|
| 14 |
+
logging.info(f"Total hg38 files: {len(hg38_files)}")
|
| 15 |
+
logging.info(f"Total hg19 files: {len(hg19_files)}")
|
| 16 |
+
|
| 17 |
+
# See how many datapoints there are
|
| 18 |
+
hg38_complete = pd.read_csv(os.path.join(hg38_dir,"logs/completed.txt"), sep="\t")
|
| 19 |
+
hg19_complete = pd.read_csv(os.path.join(hg19_dir,"logs/completed.txt"), sep="\t")
|
| 20 |
+
|
| 21 |
+
logging.info(f"Total hg38 rows: {sum(hg38_complete['row_count']):,}")
|
| 22 |
+
logging.info(f"Total hg19 rows: {sum(hg19_complete['row_count']):,}")
|
| 23 |
+
logging.info(f"Total: {sum(hg38_complete['row_count']) + sum(hg19_complete['row_count']) :,}")
|
| 24 |
+
|
| 25 |
+
# Now try to combine all the files into one
|
| 26 |
+
|
| 27 |
+
# Read all CSVs in the folder as a single Dask dataframe
|
| 28 |
+
# Read each genome separately
|
| 29 |
+
full_df_hg38 = dd.read_csv(hg38_files)
|
| 30 |
+
full_df_hg38 = full_df_hg38.assign(genome="hg38") # ✅ Dask-safe assignment
|
| 31 |
+
|
| 32 |
+
full_df_hg19 = dd.read_csv(hg19_files)
|
| 33 |
+
full_df_hg19 = full_df_hg19.assign(genome="hg19")
|
| 34 |
+
|
| 35 |
+
# Concatenate both into one Dask DataFrame
|
| 36 |
+
full_df = dd.concat([full_df_hg38, full_df_hg19])
|
| 37 |
+
|
| 38 |
+
logging.info(f"Added all files to ccombined DataFrame. Total rows: {len(full_df)}")
|
| 39 |
+
|
| 40 |
+
full_df["seq_len"] = full_df["seq"].str.len()
|
| 41 |
+
full_df["seq_flanked_len"] = full_df["seq_flanked"].str.len()
|
| 42 |
+
logging.info(f"Added sequence length column.")
|
| 43 |
+
|
| 44 |
+
full_df_dir = "../../data_files/processed/tfclust/combined"
|
| 45 |
+
full_df_savepath = os.path.join(full_df_dir, "encRegTfbsClustered_hg38_hg19.parquet")
|
| 46 |
+
os.makedirs(full_df_dir, exist_ok=True)
|
| 47 |
+
full_df.to_parquet(full_df_savepath) # much faster and more compact
|
| 48 |
+
logging.info(f"Saved combined DataFrame to {full_df_savepath}.")
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
if __name__ == "__main__":
|
| 52 |
+
logging.basicConfig(filename="combine.log", encoding="utf-8", level=logging.DEBUG, filemode="w")
|
| 53 |
+
|
| 54 |
+
main()
|
dpacman/data/tfclust/download.py
CHANGED
|
@@ -170,7 +170,7 @@ def get_sequence(
|
|
| 170 |
results_dict = {
|
| 171 |
"chromStart": new_start,
|
| 172 |
"chromEnd": new_end,
|
| 173 |
-
"seq": chrom_seq[new_start:new_end
|
| 174 |
}
|
| 175 |
return results_dict
|
| 176 |
|
|
|
|
| 170 |
results_dict = {
|
| 171 |
"chromStart": new_start,
|
| 172 |
"chromEnd": new_end,
|
| 173 |
+
"seq": chrom_seq[new_start:new_end]# end is NOT inclusive!!
|
| 174 |
}
|
| 175 |
return results_dict
|
| 176 |
|
environment.yaml
CHANGED
|
@@ -22,7 +22,7 @@ channels:
|
|
| 22 |
|
| 23 |
dependencies:
|
| 24 |
- python=3.10
|
| 25 |
-
|
| 26 |
- pip>=23
|
| 27 |
- pip:
|
| 28 |
- rootutils
|
|
|
|
| 22 |
|
| 23 |
dependencies:
|
| 24 |
- python=3.10
|
| 25 |
+
- dask[complete]
|
| 26 |
- pip>=23
|
| 27 |
- pip:
|
| 28 |
- rootutils
|