assembly_id	genome_id	genome_def	crispr_array_locus_merge	crispr_array_location_merge	crispr_locus_id	crispr_pred_method	array_in_prot	prot_within_array_20000	prot_in_genome	crispr_type_by_cas_prot	consensus_repeat	repeat_length	self-targeting_spacer_number	self-targeting_target_number	spacer_location	protospacer_location	repeat_type	spacer_locus_num	spacer_num	correct_crispr_type	genome_cas_prots	unknown_protein_around_crispr	L10	L10_domain	L9	L9_domain	L8	L8_domain	L7	L7_domain	L6	L6_domain	L5	L5_domain	L4	L4_domain	L3	L3_domain	L2	L2_domain	L1	L1_domain	R1	R1_domain	R2	R2_domain	R3	R3_domain	R4	R4_domain	R5	R5_domain	R6	R6_domain	R7	R7_domain	R8	R8_domain	R9	R9_domain	R10	R10_domain
GCF_000328545.1_ASM32854v1	NC_019970	Thermoanaerobacterium thermosaccharolyticum M0795, complete genome	1	266775-269560	1,1,1,2	PILER-CR,CRISPRCasFinder,CRT,PILER-CR	no		cas3,RT,cas4,csa3,WYL,DinG,csx1,cmr4gr7,cmr5gr11,cmr6gr7,cmr1gr7,cas10,cmr3gr5,csm6,DEDDh,Cas9_archaeal,cas2,cas1,cas5,cas7,cas8b1,cas6,csm2gr11,csm3gr7,csx10gr5,csx20	Orphan	GTTTTTAGCCTACCTATGAGGAATTGAAAC,GTTTTTAGCCTACCTATGAGGAATTGAAAC,GTTTTTAGCCTACCTATNAGGAATTGAAAC,GTTTTTAGCCTACCTATGAGGAATTGAAAC	30,30,30,30	2	2	269428-269465|269433-269470	NC_019970.1_2383518-2383481|NC_019970.1_2383518-2383481	NA:NA:NA:NA	40,41,41,40	41	Orphan	cas3,RT,cas4,csa3,WYL,DinG,csx1,cmr4gr7,cmr5gr11,cmr6gr7,cmr1gr7,cas10,cmr3gr5,csm6,DEDDh,Cas9_archaeal,cas2,cas1,cas5,cas7,cas8b1,cas6,csm2gr11,csm3gr7,csx10gr5,csx20,csf2gr7,csf1gr8	NA|57aa|up_3|NC_019970.1_263676_263847_+,NA|61aa|up_0|NC_019970.1_266233_266416_+,NA|186aa|down_8|NC_019970.1_280702_281260_+	NA|540aa|up_9|NC_019970.1_257755_259375_-	PRK14869, PRK14869, putative manganese-dependent inorganic diphosphatase	NA|159aa|up_8|NC_019970.1_259535_260012_+	TIGR02893, Spore_protein_YabQ, spore cortex biosynthesis protein YabQ	NA|100aa|up_7|NC_019970.1_260089_260389_+	pfam04977, DivIC, Septum formation initiator	NA|141aa|up_6|NC_019970.1_260485_260908_+	PRK05807, PRK05807, RNA-binding protein S1	NA|307aa|up_5|NC_019970.1_260952_261873_+	COG0248, GppA, Exopolyphosphatase [Nucleotide transport and metabolism / Inorganic ion transport and metabolism]	NA|82aa|up_4|NC_019970.1_262883_263129_-	pfam07508, Recombinase, Recombinase	NA|57aa|up_3|NC_019970.1_263676_263847_+	NA	NA|278aa|up_2|NC_019970.1_264119_264953_+	smart00283, MA, Methyl-accepting chemotaxis-like domains (chemotaxis sensory transducer)	NA|192aa|up_1|NC_019970.1_265580_266156_+	cd06260, DUF820, Domain of unknown function (DUF820)	NA|61aa|up_0|NC_019970.1_266233_266416_+	NA	NA|390aa|down_0|NC_019970.1_269912_271082_+	cd17333, MFS_FucP_MFSD4_like, Bacterial fucose permease, eukaryotic Major facilitator superfamily domain-containing protein 4, and similar proteins	NA|430aa|down_1|NC_019970.1_271435_272725_-	pfam02810, SEC-C, SEC-C motif	NA|571aa|down_2|NC_019970.1_273002_274715_-	COG5421, COG5421, Transposase [DNA replication, recombination, and repair]	NA|367aa|down_3|NC_019970.1_275174_276275_+	pfam02317, Octopine_DH, NAD/NADP octopine/nopaline dehydrogenase, alpha-helical domain	NA|551aa|down_4|NC_019970.1_276271_277924_+	pfam02310, B12-binding, B12 binding domain	NA|218aa|down_5|NC_019970.1_278065_278719_+	cd02696, MurNAc-LAA, N-acetylmuramoyl-L-alanine amidase or MurNAc-LAA (also known as peptidoglycan aminohydrolase, NAMLA amidase, NAMLAA, Amidase 3, and peptidoglycan amidase; EC 3	NA|112aa|down_6|NC_019970.1_278723_279059_+	TIGR01673, putative_holin, phage holin, LL-H family	NA|426aa|down_7|NC_019970.1_279120_280398_-	PRK10720, PRK10720, uracil transporter; Provisional	NA|186aa|down_8|NC_019970.1_280702_281260_+	NA	NA|375aa|down_9|NC_019970.1_281274_282399_-	pfam12698, ABC2_membrane_3, ABC-2 family transporter protein
GCF_000328545.1_ASM32854v1	NC_019970	Thermoanaerobacterium thermosaccharolyticum M0795, complete genome	2	2643367-2661244	3,2,2	PILER-CR,CRISPRCasFinder,CRT	no	cas2,cas1,cas4,cas3,cas5,cas7,cas8b1,cas6,csm2gr11,csm3gr7,csx10gr5	cas3,RT,cas4,csa3,WYL,DinG,csx1,cmr4gr7,cmr5gr11,cmr6gr7,cmr1gr7,cas10,cmr3gr5,csm6,DEDDh,Cas9_archaeal,cas2,cas1,cas5,cas7,cas8b1,cas6,csm2gr11,csm3gr7,csx10gr5,csx20	Type I-B	GTTTTTAGCCTACCTATGAGGAATTGAAAC,GTTTCAATTCCTCATAGGTAGGCTAAAAAC,GTTTCAATTCCTCATAGGTAGGCTAAAAAC	30,30,30	0	0	NA	NA	NA:NA:NA	265,266,266	266	TypeI-B	cas3,RT,cas4,csa3,WYL,DinG,csx1,cmr4gr7,cmr5gr11,cmr6gr7,cmr1gr7,cas10,cmr3gr5,csm6,DEDDh,Cas9_archaeal,cas2,cas1,cas5,cas7,cas8b1,cas6,csm2gr11,csm3gr7,csx10gr5,csx20,csf2gr7,csf1gr8	NA,csm2gr11|122aa|down_5|NC_019970.1_2675933_2676299_-,csm2gr11|150aa|down_9|NC_019970.1_2678702_2679152_-	NA|742aa|up_9|NC_019970.1_2633038_2635264_-	COG1132, MdlB, ABC-type multidrug transport system, ATPase and permease components [Defense mechanisms]	NA|148aa|up_8|NC_019970.1_2635296_2635740_-	smart00347, HTH_MARR, helix_turn_helix multiple antibiotic resistance protein	NA|522aa|up_7|NC_019970.1_2635873_2637439_-	cd01031, EriC, ClC chloride channel EriC	NA|207aa|up_6|NC_019970.1_2637645_2638266_+	COG0490, COG0490, Putative regulatory, ligand-binding protein related to C-terminal domains of K+ channels [Inorganic ion transport and metabolism]	NA|370aa|up_5|NC_019970.1_2638386_2639496_+	COG1125, OpuBA, ABC-type proline/glycine betaine transport systems, ATPase components [Amino acid transport and metabolism]	NA|213aa|up_4|NC_019970.1_2639508_2640147_+	COG1174, OpuBB, ABC-type proline/glycine betaine transport systems, permease component [Amino acid transport and metabolism]	NA|299aa|up_3|NC_019970.1_2640155_2641052_+	cd13609, PBP2_Opu_like_1, Substrate-binding domain of putative ABC-type osmoprotectant uptake system; the type 2 periplasmic-binding protein fold	cas2|88aa|up_2|NC_019970.1_2641082_2641346_-	cd09725, Cas2_I_II_III, CRISPR/Cas system-associated protein Cas2	cas1|331aa|up_1|NC_019970.1_2641358_2642351_-	TIGR03641, cas1_HMARI, CRISPR-associated endonuclease Cas1, subtype I-B/HMARI/TNEAP	cas4|166aa|up_0|NC_019970.1_2642347_2642845_-	pfam01930, Cas_Cas4, Domain of unknown function DUF83	cas3|789aa|down_0|NC_019970.1_2669159_2671526_-	cd17930, DEXHc_cas3, DEXH/Q-box helicase domain of Cas3	cas5|238aa|down_1|NC_019970.1_2671500_2672214_-	TIGR02592, hypothetical_protein_CTC_01466, CRISPR-associated protein Cas5, subtype I-B/HMARI	cas7|304aa|down_2|NC_019970.1_2672289_2673201_-	TIGR02590, hypothetical_protein_MM_0563, CRISPR-associated protein Cas7/Csh2, subtype I-B/HMARI	cas8b1|587aa|down_3|NC_019970.1_2673197_2674958_-	pfam09484, Cas_TM1802, CRISPR-associated protein TM1802 (cas_TM1802)	cas6|247aa|down_4|NC_019970.1_2674973_2675714_-	TIGR01877, CRISPR-associated_endoribonuclease_Cas6_1, CRISPR-associated endoribonuclease Cas6	csm2gr11|122aa|down_5|NC_019970.1_2675933_2676299_-	NA	csm3gr7|285aa|down_6|NC_019970.1_2676312_2677167_-	TIGR03986, CRISPR-associated_protein, CRISPR-associated protein	csm3gr7|260aa|down_7|NC_019970.1_2677153_2677933_-	cd09683, Csm3_III-A, CRISPR/Cas system-associated RAMP superfamily protein Csm3	csm3gr7|250aa|down_8|NC_019970.1_2677925_2678675_-	cd09683, Csm3_III-A, CRISPR/Cas system-associated RAMP superfamily protein Csm3	csm2gr11|150aa|down_9|NC_019970.1_2678702_2679152_-	NA
GCF_000328545.1_ASM32854v1	NC_019970	Thermoanaerobacterium thermosaccharolyticum M0795, complete genome	3	2661344-2668991	4,3,3	PILER-CR,CRISPRCasFinder,CRT	no	cas1,cas4,cas3,cas5,cas7,cas8b1,cas6,csm2gr11,csm3gr7,csx10gr5,cas10,csx20	cas3,RT,cas4,csa3,WYL,DinG,csx1,cmr4gr7,cmr5gr11,cmr6gr7,cmr1gr7,cas10,cmr3gr5,csm6,DEDDh,Cas9_archaeal,cas2,cas1,cas5,cas7,cas8b1,cas6,csm2gr11,csm3gr7,csx10gr5,csx20	Type I-B,Type III-D,Type III-C,Type III-A,Type III-B	GTTTTTAGCCTACCTATCAGGAATTGAAAC,GTTTCAATTCCTCATAGGTAGGCTAAAAAC,GTTTCAATTCCTGATAGGTAGGCTAAAAAC	30,30,30	0	0	NA	NA	NA:NA:NA	114,114,114	114	TypeI-B,TypeIII-D,TypeIII-C,TypeIII-A,TypeIII-B	cas3,RT,cas4,csa3,WYL,DinG,csx1,cmr4gr7,cmr5gr11,cmr6gr7,cmr1gr7,cas10,cmr3gr5,csm6,DEDDh,Cas9_archaeal,cas2,cas1,cas5,cas7,cas8b1,cas6,csm2gr11,csm3gr7,csx10gr5,csx20,csf2gr7,csf1gr8	NA,csm2gr11|122aa|down_5|NC_019970.1_2675933_2676299_-,csm2gr11|150aa|down_9|NC_019970.1_2678702_2679152_-	NA|742aa|up_9|NC_019970.1_2633038_2635264_-	COG1132, MdlB, ABC-type multidrug transport system, ATPase and permease components [Defense mechanisms]	NA|148aa|up_8|NC_019970.1_2635296_2635740_-	smart00347, HTH_MARR, helix_turn_helix multiple antibiotic resistance protein	NA|522aa|up_7|NC_019970.1_2635873_2637439_-	cd01031, EriC, ClC chloride channel EriC	NA|207aa|up_6|NC_019970.1_2637645_2638266_+	COG0490, COG0490, Putative regulatory, ligand-binding protein related to C-terminal domains of K+ channels [Inorganic ion transport and metabolism]	NA|370aa|up_5|NC_019970.1_2638386_2639496_+	COG1125, OpuBA, ABC-type proline/glycine betaine transport systems, ATPase components [Amino acid transport and metabolism]	NA|213aa|up_4|NC_019970.1_2639508_2640147_+	COG1174, OpuBB, ABC-type proline/glycine betaine transport systems, permease component [Amino acid transport and metabolism]	NA|299aa|up_3|NC_019970.1_2640155_2641052_+	cd13609, PBP2_Opu_like_1, Substrate-binding domain of putative ABC-type osmoprotectant uptake system; the type 2 periplasmic-binding protein fold	cas2|88aa|up_2|NC_019970.1_2641082_2641346_-	cd09725, Cas2_I_II_III, CRISPR/Cas system-associated protein Cas2	cas1|331aa|up_1|NC_019970.1_2641358_2642351_-	TIGR03641, cas1_HMARI, CRISPR-associated endonuclease Cas1, subtype I-B/HMARI/TNEAP	cas4|166aa|up_0|NC_019970.1_2642347_2642845_-	pfam01930, Cas_Cas4, Domain of unknown function DUF83	cas3|789aa|down_0|NC_019970.1_2669159_2671526_-	cd17930, DEXHc_cas3, DEXH/Q-box helicase domain of Cas3	cas5|238aa|down_1|NC_019970.1_2671500_2672214_-	TIGR02592, hypothetical_protein_CTC_01466, CRISPR-associated protein Cas5, subtype I-B/HMARI	cas7|304aa|down_2|NC_019970.1_2672289_2673201_-	TIGR02590, hypothetical_protein_MM_0563, CRISPR-associated protein Cas7/Csh2, subtype I-B/HMARI	cas8b1|587aa|down_3|NC_019970.1_2673197_2674958_-	pfam09484, Cas_TM1802, CRISPR-associated protein TM1802 (cas_TM1802)	cas6|247aa|down_4|NC_019970.1_2674973_2675714_-	TIGR01877, CRISPR-associated_endoribonuclease_Cas6_1, CRISPR-associated endoribonuclease Cas6	csm2gr11|122aa|down_5|NC_019970.1_2675933_2676299_-	NA	csm3gr7|285aa|down_6|NC_019970.1_2676312_2677167_-	TIGR03986, CRISPR-associated_protein, CRISPR-associated protein	csm3gr7|260aa|down_7|NC_019970.1_2677153_2677933_-	cd09683, Csm3_III-A, CRISPR/Cas system-associated RAMP superfamily protein Csm3	csm3gr7|250aa|down_8|NC_019970.1_2677925_2678675_-	cd09683, Csm3_III-A, CRISPR/Cas system-associated RAMP superfamily protein Csm3	csm2gr11|150aa|down_9|NC_019970.1_2678702_2679152_-	NA
GCF_000328545.1_ASM32854v1	NC_019956	Thermoanaerobacterium thermosaccharolyticum M0795 plasmid pTHETHE01, complete sequence	1	91693-91983	1,1	PILER-CR,CRISPRCasFinder	no	cas6,csf2gr7,csf1gr8	cas3,cas6,csf2gr7,csf1gr8	Type IV-A	CTTCTAAACCCACATAGGA,CCCATATAGGAAGATAAAAACTCA	19,24	0	0	NA	NA	NA:NA	4,2	4	TypeIV-A	cas3,RT,cas4,csa3,WYL,DinG,csx1,cmr4gr7,cmr5gr11,cmr6gr7,cmr1gr7,cas10,cmr3gr5,csm6,DEDDh,Cas9_archaeal,cas2,cas1,cas5,cas7,cas8b1,cas6,csm2gr11,csm3gr7,csx10gr5,csx20,csf2gr7,csf1gr8	NA|125aa|up_7|NC_019956.1_86701_87076_-,NA|182aa|up_5|NC_019956.1_87760_88306_-,NA|104aa|up_4|NC_019956.1_88320_88632_-,NA|86aa|up_1|NC_019956.1_89965_90223_-,NA|130aa|up_0|NC_019956.1_90246_90636_-,csf1gr8|227aa|down_4|NC_019956.1_97795_98476_-,NA|137aa|down_5|NC_019956.1_98480_98891_-,NA|93aa|down_7|NC_019956.1_99707_99986_-	NA|127aa|up_9|NC_019956.1_85372_85753_-	pfam11554, DUF3232, Protein of unknown function (DUF3232)	NA|252aa|up_8|NC_019956.1_85940_86696_-	cd01483, E1_enzyme_family, Superfamily of activating enzymes (E1) of the ubiquitin-like proteins	NA|125aa|up_7|NC_019956.1_86701_87076_-	NA	NA|225aa|up_6|NC_019956.1_87096_87771_-	pfam14460, Prok-E2_D, Prokaryotic E2 family D	NA|182aa|up_5|NC_019956.1_87760_88306_-	NA	NA|104aa|up_4|NC_019956.1_88320_88632_-	NA	NA|77aa|up_3|NC_019956.1_88685_88916_-	cd05625, STKc_LATS1, Catalytic domain of the Serine/Threonine Kinase, Large Tumor Suppressor 1	NA|271aa|up_2|NC_019956.1_89085_89898_-	COG4227, COG4227, Antirestriction protein [DNA replication, recombination, and repair]	NA|86aa|up_1|NC_019956.1_89965_90223_-	NA	NA|130aa|up_0|NC_019956.1_90246_90636_-	NA	NA|770aa|down_0|NC_019956.1_92089_94399_-	TIGR01448, recD_rel, helicase, putative, RecD/TraA family	cas6|243aa|down_1|NC_019956.1_94388_95117_-	pfam10040, CRISPR_Cas6, CRISPR-associated endoribonuclease Cas6	NA|246aa|down_2|NC_019956.1_95116_95854_-	TIGR03116, cas5_csf3, CRISPR type IV/AFERR-associated protein Csf3	csf2gr7|335aa|down_3|NC_019956.1_96769_97774_-	TIGR03115, cas7_csf2, CRISPR type IV/AFERR-associated protein Csf2	csf1gr8|227aa|down_4|NC_019956.1_97795_98476_-	NA	NA|137aa|down_5|NC_019956.1_98480_98891_-	NA	NA|133aa|down_6|NC_019956.1_99035_99434_-	pfam05534, HicB, HicB family	NA|93aa|down_7|NC_019956.1_99707_99986_-	NA	NA|271aa|down_8|NC_019956.1_100024_100837_-	TIGR02225, Tyrosine_recombinase_XerD, tyrosine recombinase XerD	NA|310aa|down_9|NC_019956.1_100924_101854_-	COG1468, COG1468, CRISPR-associated protein Cas4 (RecB family exonuclease) [Defense    mechanisms]
GCF_000328545.1_ASM32854v1	NC_019956	Thermoanaerobacterium thermosaccharolyticum M0795 plasmid pTHETHE01, complete sequence	2	96038-96656	2,1,2	CRISPRCasFinder,CRT,PILER-CR	no	cas6,csf2gr7,csf1gr8	cas3,cas6,csf2gr7,csf1gr8	Type IV-A	CTTCTAAACCTACATAGGATATTTCAAAC,CTTCTAAACCTACATAGGATATTTCAAAC,CTTCTAAACCTACATAGGATATTTCAAAC	29,29,29	0	0	NA	NA	NA:NA:NA	9,7,6	9	TypeIV-A	cas3,RT,cas4,csa3,WYL,DinG,csx1,cmr4gr7,cmr5gr11,cmr6gr7,cmr1gr7,cas10,cmr3gr5,csm6,DEDDh,Cas9_archaeal,cas2,cas1,cas5,cas7,cas8b1,cas6,csm2gr11,csm3gr7,csx10gr5,csx20,csf2gr7,csf1gr8	NA|182aa|up_8|NC_019956.1_87760_88306_-,NA|104aa|up_7|NC_019956.1_88320_88632_-,NA|86aa|up_4|NC_019956.1_89965_90223_-,NA|130aa|up_3|NC_019956.1_90246_90636_-,csf1gr8|227aa|down_1|NC_019956.1_97795_98476_-,NA|137aa|down_2|NC_019956.1_98480_98891_-,NA|93aa|down_4|NC_019956.1_99707_99986_-,NA|173aa|down_7|NC_019956.1_101898_102417_-,NA|204aa|down_8|NC_019956.1_102596_103208_-,NA|176aa|down_9|NC_019956.1_103238_103766_-	NA|225aa|up_9|NC_019956.1_87096_87771_-	pfam14460, Prok-E2_D, Prokaryotic E2 family D	NA|182aa|up_8|NC_019956.1_87760_88306_-	NA	NA|104aa|up_7|NC_019956.1_88320_88632_-	NA	NA|77aa|up_6|NC_019956.1_88685_88916_-	cd05625, STKc_LATS1, Catalytic domain of the Serine/Threonine Kinase, Large Tumor Suppressor 1	NA|271aa|up_5|NC_019956.1_89085_89898_-	COG4227, COG4227, Antirestriction protein [DNA replication, recombination, and repair]	NA|86aa|up_4|NC_019956.1_89965_90223_-	NA	NA|130aa|up_3|NC_019956.1_90246_90636_-	NA	NA|770aa|up_2|NC_019956.1_92089_94399_-	TIGR01448, recD_rel, helicase, putative, RecD/TraA family	cas6|243aa|up_1|NC_019956.1_94388_95117_-	pfam10040, CRISPR_Cas6, CRISPR-associated endoribonuclease Cas6	NA|246aa|up_0|NC_019956.1_95116_95854_-	TIGR03116, cas5_csf3, CRISPR type IV/AFERR-associated protein Csf3	csf2gr7|335aa|down_0|NC_019956.1_96769_97774_-	TIGR03115, cas7_csf2, CRISPR type IV/AFERR-associated protein Csf2	csf1gr8|227aa|down_1|NC_019956.1_97795_98476_-	NA	NA|137aa|down_2|NC_019956.1_98480_98891_-	NA	NA|133aa|down_3|NC_019956.1_99035_99434_-	pfam05534, HicB, HicB family	NA|93aa|down_4|NC_019956.1_99707_99986_-	NA	NA|271aa|down_5|NC_019956.1_100024_100837_-	TIGR02225, Tyrosine_recombinase_XerD, tyrosine recombinase XerD	NA|310aa|down_6|NC_019956.1_100924_101854_-	COG1468, COG1468, CRISPR-associated protein Cas4 (RecB family exonuclease) [Defense    mechanisms]	NA|173aa|down_7|NC_019956.1_101898_102417_-	NA	NA|204aa|down_8|NC_019956.1_102596_103208_-	NA	NA|176aa|down_9|NC_019956.1_103238_103766_-	NA
