{ "cells": [ { "cell_type": "markdown", "id": "82a7f2d0", "metadata": {}, "source": [ "# Scrap notebook for figuring out how to make better splits" ] }, { "cell_type": "code", "execution_count": 38, "id": "9004776a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | ID | \n", "tr_seqid | \n", "dna_seqid | \n", "peak_seqid | \n", "chrpeak_id | \n", "tr_name | \n", "chipscore | \n", "total_jaspar_hits | \n", "dna_sequence | \n", "tr_sequence | \n", "scores | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "trseq26_dnaseq49877 | \n", "trseq26 | \n", "dnaseq49877 | \n", "peakseq24686 | \n", "chr12_peak1150 | \n", "NFYB | \n", "6.0 | \n", "1 | \n", "GCTCTTAAAGATGGTGTGTCCAGAGTTTGTTCCTTCAGATGTTCAG... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "
| 1 | \n", "trseq26_dnaseq75052 | \n", "trseq26 | \n", "dnaseq75052 | \n", "peakseq12129 | \n", "chr9_peak512 | \n", "NFYB | \n", "5.0 | \n", "1 | \n", "TGTTGGTCTCGCTGACCTCAAGAACGGAGCCGTGGACCCTCGCGGT... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "
| 2 | \n", "trseq26_dnaseq14843 | \n", "trseq26 | \n", "dnaseq14843 | \n", "peakseq12863 | \n", "chr1_peak1335 | \n", "NFYB | \n", "6.0 | \n", "3 | \n", "AGTTTGGGTACTCAAATATGGTACCAGCAACCAGATGGTGAGTTGC... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "
| 3 | \n", "trseq26_dnaseq39522 | \n", "trseq26 | \n", "dnaseq39522 | \n", "peakseq5250 | \n", "chr5_peak280 | \n", "NFYB | \n", "4.0 | \n", "1 | \n", "CTTGGAGAACCTTTATGTCTAGCTAAGGGATTGTAAATACACCAAT... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "
| 4 | \n", "trseq26_dnaseq49215 | \n", "trseq26 | \n", "dnaseq49215 | \n", "peakseq4451 | \n", "chr4_peak201 | \n", "NFYB | \n", "138.0 | \n", "2 | \n", "GCGGTGACTGTTACAGTTCTTAAAGGCGGCGTGTCTGGAGTTTGTT... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 71152 | \n", "trseq9_dnaseq4066 | \n", "trseq9 | \n", "dnaseq4066 | \n", "peakseq12188 | \n", "chr18_peak213 | \n", "CTCF | \n", "1000.0 | \n", "1 | \n", "AATAATATATCTATTTCTTTATCTTTGTCTTCCCTACTGGACTAGC... | \n", "MEGDAVEAIVEESETFIKGKERKTYQRRREGGQEEDACHLPQNQTD... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "
| 71153 | \n", "trseq9_dnaseq62118 | \n", "trseq9 | \n", "dnaseq62118 | \n", "peakseq46829 | \n", "chr18_peak812 | \n", "CTCF | \n", "1000.0 | \n", "1 | \n", "TAAATATGTATTTTAGTAAAGTGTTATGATACACTGTGATGGGGGT... | \n", "MEGDAVEAIVEESETFIKGKERKTYQRRREGGQEEDACHLPQNQTD... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "
| 71154 | \n", "trseq9_dnaseq41538 | \n", "trseq9 | \n", "dnaseq41538 | \n", "peakseq3957 | \n", "chrY_peak4 | \n", "CTCF | \n", "267.0 | \n", "1 | \n", "GACAGGAGTTGTGTACGAATGTGTGTGAATGTGGGAGCCTAACTAG... | \n", "MEGDAVEAIVEESETFIKGKERKTYQRRREGGQEEDACHLPQNQTD... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "
| 71155 | \n", "trseq9_dnaseq38134 | \n", "trseq9 | \n", "dnaseq38134 | \n", "peakseq40502 | \n", "chr5_peak1955 | \n", "CTCF | \n", "1000.0 | \n", "2 | \n", "CTGGGCGGGTAGGTGAGAGGACAGGAGGGCGAAGTGGAGAGGAGGG... | \n", "MEGDAVEAIVEESETFIKGKERKTYQRRREGGQEEDACHLPQNQTD... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "
| 71156 | \n", "trseq9_dnaseq5106 | \n", "trseq9 | \n", "dnaseq5106 | \n", "peakseq37888 | \n", "chr1_peak4019 | \n", "CTCF | \n", "14.0 | \n", "2 | \n", "ACAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAA... | \n", "MEGDAVEAIVEESETFIKGKERKTYQRRREGGQEEDACHLPQNQTD... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "
71157 rows × 11 columns
\n", "| \n", " | ID | \n", "tr_seqid | \n", "dna_seqid | \n", "peak_seqid | \n", "chrpeak_id | \n", "tr_name | \n", "chipscore | \n", "total_jaspar_hits | \n", "dna_sequence | \n", "tr_sequence | \n", "scores | \n", "tr_cluster_rep | \n", "dna_cluster_rep | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "trseq26_dnaseq49877 | \n", "trseq26 | \n", "dnaseq49877 | \n", "peakseq24686 | \n", "chr12_peak1150 | \n", "NFYB | \n", "6.0 | \n", "1 | \n", "GCTCTTAAAGATGGTGTGTCCAGAGTTTGTTCCTTCAGATGTTCAG... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq26 | \n", "dnaseq58018 | \n", "
| 1 | \n", "trseq26_dnaseq75052 | \n", "trseq26 | \n", "dnaseq75052 | \n", "peakseq12129 | \n", "chr9_peak512 | \n", "NFYB | \n", "5.0 | \n", "1 | \n", "TGTTGGTCTCGCTGACCTCAAGAACGGAGCCGTGGACCCTCGCGGT... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq26 | \n", "dnaseq75052 | \n", "
| 2 | \n", "trseq26_dnaseq14843 | \n", "trseq26 | \n", "dnaseq14843 | \n", "peakseq12863 | \n", "chr1_peak1335 | \n", "NFYB | \n", "6.0 | \n", "3 | \n", "AGTTTGGGTACTCAAATATGGTACCAGCAACCAGATGGTGAGTTGC... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq26 | \n", "dnaseq14843 | \n", "
| 3 | \n", "trseq26_dnaseq39522 | \n", "trseq26 | \n", "dnaseq39522 | \n", "peakseq5250 | \n", "chr5_peak280 | \n", "NFYB | \n", "4.0 | \n", "1 | \n", "CTTGGAGAACCTTTATGTCTAGCTAAGGGATTGTAAATACACCAAT... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq26 | \n", "dnaseq3280 | \n", "
| 4 | \n", "trseq26_dnaseq49215 | \n", "trseq26 | \n", "dnaseq49215 | \n", "peakseq4451 | \n", "chr4_peak201 | \n", "NFYB | \n", "138.0 | \n", "2 | \n", "GCGGTGACTGTTACAGTTCTTAAAGGCGGCGTGTCTGGAGTTTGTT... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq26 | \n", "dnaseq57257 | \n", "
| \n", " | Unnamed: 0 | \n", "From | \n", "Entry | \n", "Reviewed | \n", "Entry Name | \n", "Protein names | \n", "Gene Names | \n", "Organism | \n", "Length | \n", "Sequence | \n", "InterPro | \n", "Pfam | \n", "Motif | \n", "Zinc finger | \n", "Protein families | \n", "Binding site | \n", "Site | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "E2F8 | \n", "A0AVK6 | \n", "reviewed | \n", "E2F8_HUMAN | \n", "Transcription factor E2F8 (E2F-8) | \n", "E2F8 | \n", "Homo sapiens (Human) | \n", "867 | \n", "MENEKENLFCEPHKRGLMKTPLKESTTANIVLAEIQPDFGPLTTPT... | \n", "IPR015633;IPR003316;IPR036388;IPR036390; | \n", "PF02319; | \n", "NaN | \n", "NaN | \n", "E2F/DP family | \n", "NaN | \n", "NaN | \n", "
| 1 | \n", "1 | \n", "FEZF1 | \n", "A0PJY2 | \n", "reviewed | \n", "FEZF1_HUMAN | \n", "Fez family zinc finger protein 1 (Zinc finger ... | \n", "FEZF1 FEZ ZNF312B | \n", "Homo sapiens (Human) | \n", "475 | \n", "MDSSCHNATTKMLATAPARGNMMSTSKPLAFSIERIMARTPEPKAL... | \n", "IPR036236;IPR013087; | \n", "PF00096;PF13912; | \n", "MOTIF 28..43; /note=\"Engrailed homology 1 repr... | \n", "ZN_FING 260..282; /note=\"C2H2-type 1\"; /eviden... | \n", "Krueppel C2H2-type zinc-finger protein family | \n", "NaN | \n", "NaN | \n", "
| 2 | \n", "2 | \n", "ZNF320 | \n", "A2RRD8 | \n", "reviewed | \n", "ZN320_HUMAN | \n", "Zinc finger protein 320 | \n", "ZNF320 | \n", "Homo sapiens (Human) | \n", "509 | \n", "MALSQGLLTFRDVAIEFSQEEWKCLDPAQRTLYRDVMLENYRNLVS... | \n", "IPR050589;IPR001909;IPR036051;IPR036236;IPR013... | \n", "PF01352;PF00096; | \n", "NaN | \n", "ZN_FING 161..183; /note=\"C2H2-type 1\"; /eviden... | \n", "Krueppel C2H2-type zinc-finger protein family | \n", "NaN | \n", "NaN | \n", "
| 3 | \n", "3 | \n", "BDP1 | \n", "A6H8Y1 | \n", "reviewed | \n", "BDP1_HUMAN | \n", "Transcription factor TFIIIB component B'' homo... | \n", "BDP1 KIAA1241 KIAA1689 TFNR | \n", "Homo sapiens (Human) | \n", "2624 | \n", "MFRRARLSVKPNVRPGVGARGSTASNPQRGRESPRPPDPATDSASK... | \n", "IPR009057;IPR001005;IPR039467; | \n", "PF15963; | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4 | \n", "4 | \n", "ZNF316 | \n", "A6NFI3 | \n", "reviewed | \n", "ZN316_HUMAN | \n", "Zinc finger protein 316 | \n", "ZNF316 | \n", "Homo sapiens (Human) | \n", "1004 | \n", "MAALHTTPDSPAAQLERAEDGSECDPDQEEEEEEEEKGEEVQEVEE... | \n", "IPR001909;IPR036051;IPR036236;IPR013087; | \n", "PF01352;PF00096; | \n", "NaN | \n", "ZN_FING 345..367; /note=\"C2H2-type 1\"; /eviden... | \n", "Krueppel C2H2-type zinc-finger protein family | \n", "NaN | \n", "NaN | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 1177 | \n", "1177 | \n", "GMEB1 | \n", "Q9Y692 | \n", "reviewed | \n", "GMEB1_HUMAN | \n", "Glucocorticoid modulatory element-binding prot... | \n", "GMEB1 | \n", "Homo sapiens (Human) | \n", "573 | \n", "MANAEVSVPVGDVVVVPTEGNEGENPEDTKTQVILQLQPVQQGLFI... | \n", "IPR010919;IPR000770; | \n", "PF01342; | \n", "NaN | \n", "NaN | \n", "NaN | \n", "BINDING 113; /ligand=\"Zn(2+)\"; /ligand_id=\"ChE... | \n", "NaN | \n", "
| 1178 | \n", "1178 | \n", "ZFP37 | \n", "Q9Y6Q3 | \n", "reviewed | \n", "ZFP37_HUMAN | \n", "Zinc finger protein 37 homolog (Zfp-37) | \n", "ZFP37 | \n", "Homo sapiens (Human) | \n", "630 | \n", "MSVSSGVQILTKPETVDRRRSAETTKEAGRPLEMAVSEPEASAAEW... | \n", "IPR001909;IPR036051;IPR050826;IPR056436;IPR036... | \n", "PF01352;PF00096;PF23561; | \n", "NaN | \n", "ZN_FING 293..315; /note=\"C2H2-type 1\"; /eviden... | \n", "Krueppel C2H2-type zinc-finger protein family | \n", "NaN | \n", "NaN | \n", "
| 1179 | \n", "1179 | \n", "NCOA3 | \n", "Q9Y6Q9 | \n", "reviewed | \n", "NCOA3_HUMAN | \n", "Nuclear receptor coactivator 3 (NCoA-3) (EC 2.... | \n", "NCOA3 AIB1 BHLHE42 RAC3 TRAM1 | \n", "Homo sapiens (Human) | \n", "1424 | \n", "MSGLGENLDPLASDSRKRKLPCDTPGQGLTCSGEKRRREQESKYIE... | \n", "IPR011598;IPR056193;IPR036638;IPR010011;IPR032... | \n", "PF23172;PF07469;PF16279;PF16665;PF08815;PF0098... | \n", "MOTIF 685..689; /note=\"LXXLL motif 1\"; MOTIF 7... | \n", "NaN | \n", "SRC/p160 nuclear receptor coactivator family | \n", "NaN | \n", "NaN | \n", "
| 1180 | \n", "1180 | \n", "ZHX2 | \n", "Q9Y6X8 | \n", "reviewed | \n", "ZHX2_HUMAN | \n", "Zinc fingers and homeoboxes protein 2 (Alpha-f... | \n", "ZHX2 AFR1 KIAA0854 RAF | \n", "Homo sapiens (Human) | \n", "837 | \n", "MASKRKSTTPCMVRTSQVVEQDVPEEVDRAKEKGIGTPQPDVAKDS... | \n", "IPR001356;IPR009057;IPR041057;IPR036236;IPR013... | \n", "PF00046;PF18387; | \n", "NaN | \n", "ZN_FING 78..101; /note=\"C2H2-type 1\"; /evidenc... | \n", "ZHX family | \n", "NaN | \n", "NaN | \n", "
| 1181 | \n", "1181 | \n", "MORC2 | \n", "Q9Y6X9 | \n", "reviewed | \n", "MORC2_HUMAN | \n", "ATPase MORC2 (EC 3.6.1.-) (MORC family CW-type... | \n", "MORC2 KIAA0852 ZCWCC1 | \n", "Homo sapiens (Human) | \n", "1032 | \n", "MAFTNYSSLNRAQLTFEYLHTNSTTHEFLFGALAELVDNARDADAT... | \n", "IPR056360;IPR036890;IPR041006;IPR011124; | \n", "PF23327;PF13589;PF17942;PF07496; | \n", "NaN | \n", "ZN_FING 490..544; /note=\"CW-type\"; /evidence=\"... | \n", "NaN | \n", "BINDING 39; /ligand=\"ATP\"; /ligand_id=\"ChEBI:C... | \n", "NaN | \n", "
1182 rows × 17 columns
\n", "| \n", " | ID | \n", "tr_seqid | \n", "dna_seqid | \n", "peak_seqid | \n", "chrpeak_id | \n", "tr_name | \n", "chipscore | \n", "total_jaspar_hits | \n", "dna_sequence | \n", "tr_sequence | \n", "scores | \n", "tr_cluster_rep | \n", "dna_cluster_rep | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "trseq26_dnaseq49877 | \n", "trseq26 | \n", "dnaseq49877 | \n", "peakseq24686 | \n", "chr12_peak1150 | \n", "NFYB | \n", "6.0 | \n", "1 | \n", "GCTCTTAAAGATGGTGTGTCCAGAGTTTGTTCCTTCAGATGTTCAG... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq26 | \n", "dnaseq58018 | \n", "
| 1 | \n", "trseq26_dnaseq75052 | \n", "trseq26 | \n", "dnaseq75052 | \n", "peakseq12129 | \n", "chr9_peak512 | \n", "NFYB | \n", "5.0 | \n", "1 | \n", "TGTTGGTCTCGCTGACCTCAAGAACGGAGCCGTGGACCCTCGCGGT... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq26 | \n", "dnaseq75052 | \n", "
| 2 | \n", "trseq26_dnaseq14843 | \n", "trseq26 | \n", "dnaseq14843 | \n", "peakseq12863 | \n", "chr1_peak1335 | \n", "NFYB | \n", "6.0 | \n", "3 | \n", "AGTTTGGGTACTCAAATATGGTACCAGCAACCAGATGGTGAGTTGC... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq26 | \n", "dnaseq14843 | \n", "
| 3 | \n", "trseq26_dnaseq39522 | \n", "trseq26 | \n", "dnaseq39522 | \n", "peakseq5250 | \n", "chr5_peak280 | \n", "NFYB | \n", "4.0 | \n", "1 | \n", "CTTGGAGAACCTTTATGTCTAGCTAAGGGATTGTAAATACACCAAT... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq26 | \n", "dnaseq3280 | \n", "
| 4 | \n", "trseq26_dnaseq49215 | \n", "trseq26 | \n", "dnaseq49215 | \n", "peakseq4451 | \n", "chr4_peak201 | \n", "NFYB | \n", "138.0 | \n", "2 | \n", "GCGGTGACTGTTACAGTTCTTAAAGGCGGCGTGTCTGGAGTTTGTT... | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq26 | \n", "dnaseq57257 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 71152 | \n", "trseq9_dnaseq4066 | \n", "trseq9 | \n", "dnaseq4066 | \n", "peakseq12188 | \n", "chr18_peak213 | \n", "CTCF | \n", "1000.0 | \n", "1 | \n", "AATAATATATCTATTTCTTTATCTTTGTCTTCCCTACTGGACTAGC... | \n", "MEGDAVEAIVEESETFIKGKERKTYQRRREGGQEEDACHLPQNQTD... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq9 | \n", "dnaseq48336 | \n", "
| 71153 | \n", "trseq9_dnaseq62118 | \n", "trseq9 | \n", "dnaseq62118 | \n", "peakseq46829 | \n", "chr18_peak812 | \n", "CTCF | \n", "1000.0 | \n", "1 | \n", "TAAATATGTATTTTAGTAAAGTGTTATGATACACTGTGATGGGGGT... | \n", "MEGDAVEAIVEESETFIKGKERKTYQRRREGGQEEDACHLPQNQTD... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq9 | \n", "dnaseq62118 | \n", "
| 71154 | \n", "trseq9_dnaseq41538 | \n", "trseq9 | \n", "dnaseq41538 | \n", "peakseq3957 | \n", "chrY_peak4 | \n", "CTCF | \n", "267.0 | \n", "1 | \n", "GACAGGAGTTGTGTACGAATGTGTGTGAATGTGGGAGCCTAACTAG... | \n", "MEGDAVEAIVEESETFIKGKERKTYQRRREGGQEEDACHLPQNQTD... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq9 | \n", "dnaseq41538 | \n", "
| 71155 | \n", "trseq9_dnaseq38134 | \n", "trseq9 | \n", "dnaseq38134 | \n", "peakseq40502 | \n", "chr5_peak1955 | \n", "CTCF | \n", "1000.0 | \n", "2 | \n", "CTGGGCGGGTAGGTGAGAGGACAGGAGGGCGAAGTGGAGAGGAGGG... | \n", "MEGDAVEAIVEESETFIKGKERKTYQRRREGGQEEDACHLPQNQTD... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq9 | \n", "dnaseq73435 | \n", "
| 71156 | \n", "trseq9_dnaseq5106 | \n", "trseq9 | \n", "dnaseq5106 | \n", "peakseq37888 | \n", "chr1_peak4019 | \n", "CTCF | \n", "14.0 | \n", "2 | \n", "ACAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAA... | \n", "MEGDAVEAIVEESETFIKGKERKTYQRRREGGQEEDACHLPQNQTD... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq9 | \n", "dnaseq5106 | \n", "
71157 rows × 13 columns
\n", "| \n", " | tr_cluster_rep | \n", "count | \n", "Protein names | \n", "Protein families | \n", "
|---|---|---|---|---|
| 0 | \n", "trseq4 | \n", "46445 | \n", "Zinc finger protein 263 (Zinc finger protein F... | \n", "Krueppel C2H2-type zinc-finger protein family | \n", "
| 1 | \n", "trseq1 | \n", "8962 | \n", "Early growth response protein 1 (EGR-1) (AT225... | \n", "EGR C2H2-type zinc-finger protein family | \n", "
| 2 | \n", "trseq23 | \n", "6407 | \n", "Transcription factor Sp2 | \n", "Sp1 C2H2-type zinc-finger protein family | \n", "
| 3 | \n", "trseq9 | \n", "2641 | \n", "Transcriptional repressor CTCF (11-zinc finger... | \n", "CTCF zinc-finger protein family | \n", "
| 4 | \n", "trseq5 | \n", "1839 | \n", "RE1-silencing transcription factor (Neural-res... | \n", "NaN | \n", "
| 5 | \n", "trseq19 | \n", "1820 | \n", "Interferon regulatory factor 1 (IRF-1) | \n", "IRF family | \n", "
| 6 | \n", "trseq26 | \n", "873 | \n", "Nuclear transcription factor Y subunit beta (C... | \n", "NFYB/HAP3 subunit family | \n", "
| 7 | \n", "trseq24 | \n", "873 | \n", "Transcription factor Sp1 | \n", "Sp1 C2H2-type zinc-finger protein family | \n", "
| 8 | \n", "trseq10 | \n", "528 | \n", "Nuclear transcription factor Y subunit alpha (... | \n", "NFYA/HAP2 subunit family | \n", "
| 9 | \n", "trseq16 | \n", "487 | \n", "Forkhead box protein P1 (Mac-1-regulated forkh... | \n", "NaN | \n", "
| 10 | \n", "trseq29 | \n", "71 | \n", "Ras-responsive element-binding protein 1 (RREB... | \n", "Krueppel C2H2-type zinc-finger protein family | \n", "
| 11 | \n", "trseq14 | \n", "62 | \n", "Serum response factor (SRF) | \n", "NaN | \n", "
| 12 | \n", "trseq21 | \n", "34 | \n", "DNA-binding protein RFX2 (Regulatory factor X 2) | \n", "RFX family | \n", "
| 13 | \n", "trseq17 | \n", "30 | \n", "Tumor protein 63 (p63) (Chronic ulcerative sto... | \n", "P53 family | \n", "
| 14 | \n", "trseq28 | \n", "29 | \n", "Nuclear receptor subfamily 2 group C member 2 ... | \n", "Nuclear hormone receptor family, NR2 subfamily | \n", "
| 15 | \n", "trseq22 | \n", "16 | \n", "Transcription factor E2F3 (E2F-3) | \n", "E2F/DP family | \n", "
| 16 | \n", "trseq15 | \n", "9 | \n", "Transcription factor AP-2 gamma (AP2-gamma) (A... | \n", "AP-2 family | \n", "
| 17 | \n", "trseq2 | \n", "9 | \n", "ETS-related transcription factor Elf-1 (E74-li... | \n", "ETS family | \n", "
| 18 | \n", "trseq20 | \n", "3 | \n", "Interferon regulatory factor 2 (IRF-2) | \n", "IRF family | \n", "
| 19 | \n", "trseq8 | \n", "3 | \n", "Cellular tumor antigen p53 (Antigen NY-CO-13) ... | \n", "P53 family | \n", "
| 20 | \n", "trseq27 | \n", "3 | \n", "Estrogen receptor (ER) (ER-alpha) (Estradiol r... | \n", "Nuclear hormone receptor family, NR3 subfamily | \n", "
| 21 | \n", "trseq11 | \n", "2 | \n", "Transcriptional regulator Kaiso (Zinc finger a... | \n", "NaN | \n", "
| 22 | \n", "trseq25 | \n", "2 | \n", "Transcription factor MafF (U-Maf) (V-maf muscu... | \n", "BZIP family, Maf subfamily | \n", "
| 23 | \n", "trseq6 | \n", "2 | \n", "Heat shock factor protein 1 (HSF 1) (Heat shoc... | \n", "HSF family | \n", "
| 24 | \n", "trseq3 | \n", "2 | \n", "Transcriptional repressor protein YY1 (Delta t... | \n", "YY transcription factor family | \n", "
| 25 | \n", "trseq18 | \n", "2 | \n", "Hepatocyte nuclear factor 4-gamma (HNF-4-gamma... | \n", "Nuclear hormone receptor family, NR2 subfamily | \n", "
| 26 | \n", "trseq13 | \n", "1 | \n", "Myocyte-specific enhancer factor 2A (Serum res... | \n", "MEF2 family | \n", "
| 27 | \n", "trseq7 | \n", "1 | \n", "Glucocorticoid receptor (GR) (Nuclear receptor... | \n", "Nuclear hormone receptor family, NR3 subfamily | \n", "
| 28 | \n", "trseq12 | \n", "1 | \n", "Androgen receptor (Dihydrotestosterone recepto... | \n", "Nuclear hormone receptor family, NR3 subfamily | \n", "
| \n", " | ID | \n", "tr_seqid | \n", "dna_seqid | \n", "peak_seqid | \n", "chrpeak_id | \n", "tr_name | \n", "chipscore | \n", "total_jaspar_hits | \n", "dna_sequence | \n", "tr_sequence | \n", "scores | \n", "tr_cluster_rep | \n", "dna_cluster_rep | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 28719 | \n", "trseq4_dnaseq38058 | \n", "trseq4 | \n", "dnaseq38058 | \n", "peakseq59667 | \n", "chr8_peak2619 | \n", "ZNF263 | \n", "5.0 | \n", "2 | \n", "CTGGGAGTCTCCCAGTGAATCCTCTCCCTCCAGGAAGCATTCAGGG... | \n", "MASGPGSQEREGLLIVKLEEDCAWSQELPPPDPGPSPEASHLRFRR... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq4 | \n", "dnaseq38058 | \n", "
| 30710 | \n", "trseq4_dnaseq58107 | \n", "trseq4 | \n", "dnaseq58107 | \n", "peakseq38026 | \n", "chr22_peak949 | \n", "ZNF263 | \n", "909.0 | \n", "1 | \n", "GTAACGATGCCTTCCTAGGCACTGGCGTTACCGCCTGACCAAGGAG... | \n", "MASGPGSQEREGLLIVKLEEDCAWSQELPPPDPGPSPEASHLRFRR... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq4 | \n", "dnaseq58107 | \n", "
| 8736 | \n", "trseq4_dnaseq18629 | \n", "trseq4 | \n", "dnaseq18629 | \n", "peakseq39291 | \n", "chr6_peak2197 | \n", "ZNF263 | \n", "61.0 | \n", "1 | \n", "CAAAGAAAGAAAATCTACTTTATTACAAGGAACAAAAACATAATAG... | \n", "MASGPGSQEREGLLIVKLEEDCAWSQELPPPDPGPSPEASHLRFRR... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq4 | \n", "dnaseq18629 | \n", "
| 5747 | \n", "trseq4_dnaseq4928 | \n", "trseq4 | \n", "dnaseq4928 | \n", "peakseq39282 | \n", "chr1_peak4157 | \n", "ZNF263 | \n", "394.0 | \n", "1 | \n", "AATTTCTATCAACTGAGGCAAAAGTCTTAAGTTCCCCCAAACCAAT... | \n", "MASGPGSQEREGLLIVKLEEDCAWSQELPPPDPGPSPEASHLRFRR... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq4 | \n", "dnaseq4928 | \n", "
| 28098 | \n", "trseq4_dnaseq17297 | \n", "trseq4 | \n", "dnaseq17297 | \n", "peakseq46179 | \n", "chr1_peak4932 | \n", "ZNF263 | \n", "10.0 | \n", "1 | \n", "ATGTGGGAGTAGAGATAAAGAAATCAGTGCAGTTAAGGAGGGTAGA... | \n", "MASGPGSQEREGLLIVKLEEDCAWSQELPPPDPGPSPEASHLRFRR... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq4 | \n", "dnaseq17297 | \n", "
| 6990 | \n", "trseq4_dnaseq45969 | \n", "trseq4 | \n", "dnaseq45969 | \n", "peakseq3403 | \n", "chr12_peak164 | \n", "ZNF263 | \n", "1000.0 | \n", "1 | \n", "GCAGGAGAGGTCACAGACCCACAGAATCGTCCAATCCCTGCCCCAG... | \n", "MASGPGSQEREGLLIVKLEEDCAWSQELPPPDPGPSPEASHLRFRR... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq4 | \n", "dnaseq45969 | \n", "
| 29914 | \n", "trseq4_dnaseq70215 | \n", "trseq4 | \n", "dnaseq70215 | \n", "peakseq29873 | \n", "chr10_peak1270 | \n", "ZNF263 | \n", "99.0 | \n", "3 | \n", "TGAGAAAAATAAACCCTGGGATATACAAAGGGACATCTGTCCACAG... | \n", "MASGPGSQEREGLLIVKLEEDCAWSQELPPPDPGPSPEASHLRFRR... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,99,99,99,99,99,9... | \n", "trseq4 | \n", "dnaseq70215 | \n", "
| 52579 | \n", "trseq4_dnaseq3768 | \n", "trseq4 | \n", "dnaseq3768 | \n", "peakseq43489 | \n", "chr22_peak1089 | \n", "ZNF263 | \n", "65.0 | \n", "9 | \n", "AAGTAGCTGGGATTACAGACGTACACCACCACGCCTGGCTAATTTT... | \n", "MASGPGSQEREGLLIVKLEEDCAWSQELPPPDPGPSPEASHLRFRR... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq4 | \n", "dnaseq3768 | \n", "
| 6423 | \n", "trseq4_dnaseq19395 | \n", "trseq4 | \n", "dnaseq19395 | \n", "peakseq60837 | \n", "chr20_peak1976 | \n", "ZNF263 | \n", "76.0 | \n", "2 | \n", "CAATACTCAATTTCCCCCTATTTATTTCTAGTGCAGGTTTCACAGC... | \n", "MASGPGSQEREGLLIVKLEEDCAWSQELPPPDPGPSPEASHLRFRR... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq4 | \n", "dnaseq19395 | \n", "
| 49313 | \n", "trseq1_dnaseq5894 | \n", "trseq1 | \n", "dnaseq5894 | \n", "peakseq60338 | \n", "chr17_peak3283 | \n", "EGR1 | \n", "1000.0 | \n", "1 | \n", "ACAGGGCAACAGCGGGATTGAGAGATGGAGGGATCCCCGCATCTGA... | \n", "MAAAKAEMQLMSPLQISDPFGSFPHSPTMDNYPKLEEMMLLSNGAP... | \n", "0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... | \n", "trseq1 | \n", "dnaseq5894 | \n", "
| \n", " | tr_cluster_rep | \n", "count | \n", "sequence | \n", "Unnamed: 0 | \n", "From | \n", "Entry | \n", "Reviewed | \n", "Entry Name | \n", "Protein names | \n", "Gene Names | \n", "Organism | \n", "Length | \n", "InterPro | \n", "Pfam | \n", "Motif | \n", "Zinc finger | \n", "Protein families | \n", "Binding site | \n", "Site | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | \n", "trseq23 | \n", "6407 | \n", "MSDPQTSMAATAAVSPSDYLQPAASTTQDSQPSPLALLAATCSKIG... | \n", "443 | \n", "SP2 | \n", "Q02086 | \n", "reviewed | \n", "SP2_HUMAN | \n", "Transcription factor Sp2 | \n", "SP2 KIAA0048 | \n", "Homo sapiens (Human) | \n", "613 | \n", "IPR036236;IPR013087; | \n", "PF00096; | \n", "MOTIF 361..369; /note=\"9aaTAD; inactive\"; /evi... | \n", "ZN_FING 525..549; /note=\"C2H2-type 1\"; /eviden... | \n", "Sp1 C2H2-type zinc-finger protein family | \n", "NaN | \n", "NaN | \n", "
| 6 | \n", "trseq26 | \n", "873 | \n", "MTMDGDSSTTDASQLGISADYIGGSHYVIQPHDDTEDSMNDHEDTN... | \n", "273 | \n", "NFYB | \n", "P25208 | \n", "reviewed | \n", "NFYB_HUMAN | \n", "Nuclear transcription factor Y subunit beta (C... | \n", "NFYB HAP3 | \n", "Homo sapiens (Human) | \n", "207 | \n", "IPR003958;IPR009072;IPR027113;IPR003956; | \n", "PF00808; | \n", "NaN | \n", "NaN | \n", "NFYB/HAP3 subunit family | \n", "NaN | \n", "NaN | \n", "
| 13 | \n", "trseq17 | \n", "30 | \n", "MNFETSRCATLQYCPDPYIQRFVETPAHFSWKESYYRSTMSQSTQT... | \n", "1004 | \n", "TP63 | \n", "Q9H3D4 | \n", "reviewed | \n", "P63_HUMAN | \n", "Tumor protein 63 (p63) (Chronic ulcerative sto... | \n", "TP63 KET P63 P73H P73L TP73L | \n", "Homo sapiens (Human) | \n", "680 | \n", "IPR008967;IPR012346;IPR011615;IPR036674;IPR010... | \n", "PF00870;PF07710;PF07647; | \n", "NaN | \n", "NaN | \n", "P53 family | \n", "BINDING 244; /ligand=\"Zn(2+)\"; /ligand_id=\"ChE... | \n", "NaN | \n", "