assembly_id	genome_id	genome_def	crispr_array_locus_merge	crispr_array_location_merge	crispr_locus_id	crispr_pred_method	array_in_prot	prot_within_array_20000	prot_in_genome	crispr_type_by_cas_prot	consensus_repeat	repeat_length	self-targeting_spacer_number	self-targeting_target_number	spacer_location	protospacer_location	repeat_type	spacer_locus_num	spacer_num	correct_crispr_type	genome_cas_prots	unknown_protein_around_crispr	L10	L10_domain	L9	L9_domain	L8	L8_domain	L7	L7_domain	L6	L6_domain	L5	L5_domain	L4	L4_domain	L3	L3_domain	L2	L2_domain	L1	L1_domain	R1	R1_domain	R2	R2_domain	R3	R3_domain	R4	R4_domain	R5	R5_domain	R6	R6_domain	R7	R7_domain	R8	R8_domain	R9	R9_domain	R10	R10_domain
GCF_902381805.1_UHGG_MGYG-HGUT-01704	NZ_LR698970	Lachnospiraceae bacterium isolate MGYG-HGUT-01704 chromosome 1	1	997140-997466	1	CRT	no		cas3,DEDDh,csa3,RT,DinG,cas14j,c2c9_V-U4,PD-DExK,cas2,cas1,cas4,cas5,cas7,cas8b2,cas6	Orphan	TGGGGAGCAATGTGCACCGGCTGGGTA	27	3	5	997167-997199|997227-997259|997227-997259|997227-997259|997347-997379	NZ_LR698970.1_997107-997139|NZ_LR698970.1_2597572-2597540|NZ_LR698970.1_2597272-2597240|NZ_LR698970.1_2597392-2597360|NZ_LR698970.1_997107-997139	NA	5	5	Orphan	cas3,DEDDh,csa3,RT,DinG,cas14j,c2c9_V-U4,PD-DExK,cas2,cas1,cas4,cas5,cas7,cas8b2,cas6	NA,NA|98aa|down_1|NZ_LR698970.1_999483_999777_+	NA|42aa|up_9|NZ_LR698970.1_981090_981216_+	pfam09339, HTH_IclR, IclR helix-turn-helix domain	NA|141aa|up_8|NZ_LR698970.1_981212_981635_+	pfam02028, BCCT, BCCT, betaine/carnitine/choline family transporter	NA|214aa|up_7|NZ_LR698970.1_981722_982364_+	TIGR01367, Orotate_phosphoribosyltransferase, orotate phosphoribosyltransferase, Thermus family	NA|456aa|up_6|NZ_LR698970.1_982497_983865_-	COG5263, COG5263, FOG: Glucan-binding domain (YG repeat) [General function prediction only]	NA|383aa|up_5|NZ_LR698970.1_984069_985218_+	pfam13785, DUF4178, Domain of unknown function (DUF4178)	NA|141aa|up_4|NZ_LR698970.1_985245_985668_+	pfam03994, DUF350, Domain of Unknown Function (DUF350)	NA|509aa|up_3|NZ_LR698970.1_985667_987194_+	PRK03612, PRK03612, polyamine aminopropyltransferase	NA|973aa|up_2|NZ_LR698970.1_987285_990204_+	COG1026, COG1026, Predicted Zn-dependent peptidases, insulinase-like [General function prediction only]	NA|302aa|up_1|NZ_LR698970.1_990215_991121_+	PRK00089, era, GTPase Era; Reviewed	NA|248aa|up_0|NZ_LR698970.1_991120_991864_+	PRK00085, recO, DNA repair protein RecO; Reviewed	NA|465aa|down_0|NZ_LR698970.1_997686_999081_+	PRK04173, PRK04173, glycyl-tRNA synthetase; Provisional	NA|98aa|down_1|NZ_LR698970.1_999483_999777_+	NA	NA|89aa|down_2|NZ_LR698970.1_999754_1000021_+	cd01948, EAL, EAL domain	NA|428aa|down_3|NZ_LR698970.1_1000040_1001324_+	pfam09587, PGA_cap, Bacterial capsule synthesis protein PGA_cap	NA|416aa|down_4|NZ_LR698970.1_1001464_1002712_+	PRK14072, PRK14072, diphosphate--fructose-6-phosphate 1-phosphotransferase	NA|308aa|down_5|NZ_LR698970.1_1002861_1003785_+	cd04181, NTP_transferase, NTP_transferases catalyze the transfer of nucleotides onto phosphosugars	NA|449aa|down_6|NZ_LR698970.1_1003870_1005217_+	cd05802, GlmM, GlmM is a bacterial phosphoglucosamine mutase (PNGM) that belongs to the alpha-D-phosphohexomutase superfamily	NA|333aa|down_7|NZ_LR698970.1_1005271_1006270_+	sd00006, TPR, Tetratricopeptide repeat	NA|368aa|down_8|NZ_LR698970.1_1006312_1007416_+	pfam11258, DUF3048, Protein of unknown function (DUF3048) N-terminal domain	NA|298aa|down_9|NZ_LR698970.1_1007553_1008447_+	TIGR00762, DegV, EDD domain protein, DegV family
GCF_902381805.1_UHGG_MGYG-HGUT-01704	NZ_LR698970	Lachnospiraceae bacterium isolate MGYG-HGUT-01704 chromosome 1	2	1173624-1173721	1	CRISPRCasFinder	no		cas3,DEDDh,csa3,RT,DinG,cas14j,c2c9_V-U4,PD-DExK,cas2,cas1,cas4,cas5,cas7,cas8b2,cas6	Orphan	AACCGACCAAACAACCGTTTTAA	23	0	0	NA	NA	NA	1	1	Orphan	cas3,DEDDh,csa3,RT,DinG,cas14j,c2c9_V-U4,PD-DExK,cas2,cas1,cas4,cas5,cas7,cas8b2,cas6	NA,NA	NA|705aa|up_9|NZ_LR698970.1_1161500_1163615_+	cd07731, ComA-like_MBL-fold, Competence protein ComA, ComEC and related proteins; MBL-fold metallo hydrolase domain	NA|279aa|up_8|NZ_LR698970.1_1163723_1164560_-	COG0613, COG0613, Predicted metal-dependent phosphoesterases (PHP family) [General function prediction only]	NA|182aa|up_7|NZ_LR698970.1_1164937_1165483_+	cd03392, PAP2_like_2, PAP2_like_2 proteins	NA|641aa|up_6|NZ_LR698970.1_1165526_1167449_+	PRK05644, gyrB, DNA gyrase subunit B; Validated	NA|749aa|up_5|NZ_LR698970.1_1167459_1169706_+	PRK05560, PRK05560, DNA gyrase subunit A; Validated	NA|389aa|up_4|NZ_LR698970.1_1169726_1170893_+	COG5279, CYK3, Uncharacterized protein involved in cytokinesis, contains TGc (transglutaminase/protease-like) domain [Cell division and chromosome partitioning]	NA|155aa|up_3|NZ_LR698970.1_1171026_1171491_+	PRK00092, PRK00092, ribosome maturation protein RimP; Reviewed	NA|385aa|up_2|NZ_LR698970.1_1171511_1172666_+	PRK12327, nusA, transcription elongation factor NusA; Provisional	NA|93aa|up_1|NZ_LR698970.1_1172675_1172954_+	pfam04296, DUF448, Protein of unknown function (DUF448)	NA|105aa|up_0|NZ_LR698970.1_1172940_1173255_+	PRK07714, PRK07714, YlxQ family RNA-binding protein	NA|129aa|down_0|NZ_LR698970.1_1176061_1176448_+	PRK00521, rbfA, 30S ribosome-binding factor RbfA	NA|321aa|down_1|NZ_LR698970.1_1176428_1177391_+	COG0618, COG0618, Exopolyphosphatase-related proteins [General function prediction only]	NA|299aa|down_2|NZ_LR698970.1_1177393_1178290_+	PRK00130, truB, tRNA pseudouridine synthase B; Provisional	NA|317aa|down_3|NZ_LR698970.1_1178305_1179256_+	PRK05627, PRK05627, bifunctional riboflavin kinase/FAD synthetase	NA|545aa|down_4|NZ_LR698970.1_1179300_1180935_+	COG1283, NptA, Na+/phosphate symporter [Inorganic ion transport and metabolism]	NA|255aa|down_5|NZ_LR698970.1_1181031_1181796_+	COG0791, Spr, Cell wall-associated hydrolases (invasion-associated proteins) [Cell envelope biogenesis, outer membrane]	NA|89aa|down_6|NZ_LR698970.1_1181924_1182191_+	PRK05626, rpsO, 30S ribosomal protein S15; Reviewed	NA|698aa|down_7|NZ_LR698970.1_1182384_1184478_+	PRK11824, PRK11824, polynucleotide phosphorylase/polyadenylase; Provisional	NA|163aa|down_8|NZ_LR698970.1_1184561_1185050_+	pfam13353, Fer4_12, 4Fe-4S single cluster domain	NA|172aa|down_9|NZ_LR698970.1_1185058_1185574_+	COG0756, Dut, dUTPase [Nucleotide transport and metabolism]
GCF_902381805.1_UHGG_MGYG-HGUT-01704	NZ_LR698970	Lachnospiraceae bacterium isolate MGYG-HGUT-01704 chromosome 1	3	1920871-1921034	2	CRISPRCasFinder	no	RT,cas14j,c2c9_V-U4	cas3,DEDDh,csa3,RT,DinG,cas14j,c2c9_V-U4,PD-DExK,cas2,cas1,cas4,cas5,cas7,cas8b2,cas6	Unclear	GATGCATTTTATATATATAAGTGGATTGAAATC	33	0	0	NA	NA	NA	2	2	TypeV	cas3,DEDDh,csa3,RT,DinG,cas14j,c2c9_V-U4,PD-DExK,cas2,cas1,cas4,cas5,cas7,cas8b2,cas6	NA|82aa|up_8|NZ_LR698970.1_1916853_1917099_+,NA|91aa|up_7|NZ_LR698970.1_1917085_1917358_+,NA|95aa|up_6|NZ_LR698970.1_1917344_1917629_+,NA|108aa|up_4|NZ_LR698970.1_1918316_1918640_+,NA|116aa|up_3|NZ_LR698970.1_1918659_1919007_+,NA|52aa|up_2|NZ_LR698970.1_1919022_1919178_+,NA|67aa|down_1|NZ_LR698970.1_1922226_1922427_+,NA|188aa|down_2|NZ_LR698970.1_1922447_1923011_+,NA|65aa|down_4|NZ_LR698970.1_1923722_1923917_+,NA|227aa|down_5|NZ_LR698970.1_1923923_1924604_+,NA|247aa|down_7|NZ_LR698970.1_1926158_1926899_+,NA|224aa|down_8|NZ_LR698970.1_1926886_1927558_+,NA|58aa|down_9|NZ_LR698970.1_1927606_1927780_+	NA|397aa|up_9|NZ_LR698970.1_1915684_1916875_+	pfam00004, AAA, ATPase family associated with various cellular activities (AAA)	NA|82aa|up_8|NZ_LR698970.1_1916853_1917099_+	NA	NA|91aa|up_7|NZ_LR698970.1_1917085_1917358_+	NA	NA|95aa|up_6|NZ_LR698970.1_1917344_1917629_+	NA	NA|229aa|up_5|NZ_LR698970.1_1917628_1918315_+	cd00844, MPP_Dbr1_N, Dbr1 RNA lariat debranching enzyme, N-terminal metallophosphatase domain	NA|108aa|up_4|NZ_LR698970.1_1918316_1918640_+	NA	NA|116aa|up_3|NZ_LR698970.1_1918659_1919007_+	NA	NA|52aa|up_2|NZ_LR698970.1_1919022_1919178_+	NA	cas14j|374aa|up_1|NZ_LR698970.1_1919200_1920322_+	pfam01385, OrfB_IS605, Probable transposase	c2c9_V-U4|99aa|up_0|NZ_LR698970.1_1920293_1920590_+	pfam07282, OrfB_Zn_ribbon, Putative transposase DNA-binding domain	NA|151aa|down_0|NZ_LR698970.1_1921636_1922089_+	pfam09424, YqeY, Yqey-like protein	NA|67aa|down_1|NZ_LR698970.1_1922226_1922427_+	NA	NA|188aa|down_2|NZ_LR698970.1_1922447_1923011_+	NA	NA|241aa|down_3|NZ_LR698970.1_1923000_1923723_+	pfam18346, SH3_15, Mind bomb SH3 repeat domain	NA|65aa|down_4|NZ_LR698970.1_1923722_1923917_+	NA	NA|227aa|down_5|NZ_LR698970.1_1923923_1924604_+	NA	NA|413aa|down_6|NZ_LR698970.1_1924587_1925826_+	smart00487, DEXDc, DEAD-like helicases superfamily	NA|247aa|down_7|NZ_LR698970.1_1926158_1926899_+	NA	NA|224aa|down_8|NZ_LR698970.1_1926886_1927558_+	NA	NA|58aa|down_9|NZ_LR698970.1_1927606_1927780_+	NA
GCF_902381805.1_UHGG_MGYG-HGUT-01704	NZ_LR698970	Lachnospiraceae bacterium isolate MGYG-HGUT-01704 chromosome 1	4	1921365-1921597	1,2,3	PILER-CR,CRT,CRISPRCasFinder	no	RT,cas14j,c2c9_V-U4	cas3,DEDDh,csa3,RT,DinG,cas14j,c2c9_V-U4,PD-DExK,cas2,cas1,cas4,cas5,cas7,cas8b2,cas6	Unclear	TCTGATGCATTTTATATATATAAGTGGATTGAAATC,TGATGCATTTTATATATATAAGTGGATTGAAATC,GATGCATTTTATATATATAAGTGGATTGAAATC	36,34,33	0	0	NA	NA	NA:NA:NA	2,3,3	3	TypeV	cas3,DEDDh,csa3,RT,DinG,cas14j,c2c9_V-U4,PD-DExK,cas2,cas1,cas4,cas5,cas7,cas8b2,cas6	NA|82aa|up_8|NZ_LR698970.1_1916853_1917099_+,NA|91aa|up_7|NZ_LR698970.1_1917085_1917358_+,NA|95aa|up_6|NZ_LR698970.1_1917344_1917629_+,NA|108aa|up_4|NZ_LR698970.1_1918316_1918640_+,NA|116aa|up_3|NZ_LR698970.1_1918659_1919007_+,NA|52aa|up_2|NZ_LR698970.1_1919022_1919178_+,NA|67aa|down_1|NZ_LR698970.1_1922226_1922427_+,NA|188aa|down_2|NZ_LR698970.1_1922447_1923011_+,NA|65aa|down_4|NZ_LR698970.1_1923722_1923917_+,NA|227aa|down_5|NZ_LR698970.1_1923923_1924604_+,NA|247aa|down_7|NZ_LR698970.1_1926158_1926899_+,NA|224aa|down_8|NZ_LR698970.1_1926886_1927558_+,NA|58aa|down_9|NZ_LR698970.1_1927606_1927780_+	NA|397aa|up_9|NZ_LR698970.1_1915684_1916875_+	pfam00004, AAA, ATPase family associated with various cellular activities (AAA)	NA|82aa|up_8|NZ_LR698970.1_1916853_1917099_+	NA	NA|91aa|up_7|NZ_LR698970.1_1917085_1917358_+	NA	NA|95aa|up_6|NZ_LR698970.1_1917344_1917629_+	NA	NA|229aa|up_5|NZ_LR698970.1_1917628_1918315_+	cd00844, MPP_Dbr1_N, Dbr1 RNA lariat debranching enzyme, N-terminal metallophosphatase domain	NA|108aa|up_4|NZ_LR698970.1_1918316_1918640_+	NA	NA|116aa|up_3|NZ_LR698970.1_1918659_1919007_+	NA	NA|52aa|up_2|NZ_LR698970.1_1919022_1919178_+	NA	cas14j|374aa|up_1|NZ_LR698970.1_1919200_1920322_+	pfam01385, OrfB_IS605, Probable transposase	c2c9_V-U4|99aa|up_0|NZ_LR698970.1_1920293_1920590_+	pfam07282, OrfB_Zn_ribbon, Putative transposase DNA-binding domain	NA|151aa|down_0|NZ_LR698970.1_1921636_1922089_+	pfam09424, YqeY, Yqey-like protein	NA|67aa|down_1|NZ_LR698970.1_1922226_1922427_+	NA	NA|188aa|down_2|NZ_LR698970.1_1922447_1923011_+	NA	NA|241aa|down_3|NZ_LR698970.1_1923000_1923723_+	pfam18346, SH3_15, Mind bomb SH3 repeat domain	NA|65aa|down_4|NZ_LR698970.1_1923722_1923917_+	NA	NA|227aa|down_5|NZ_LR698970.1_1923923_1924604_+	NA	NA|413aa|down_6|NZ_LR698970.1_1924587_1925826_+	smart00487, DEXDc, DEAD-like helicases superfamily	NA|247aa|down_7|NZ_LR698970.1_1926158_1926899_+	NA	NA|224aa|down_8|NZ_LR698970.1_1926886_1927558_+	NA	NA|58aa|down_9|NZ_LR698970.1_1927606_1927780_+	NA
GCF_902381805.1_UHGG_MGYG-HGUT-01704	NZ_LR698970	Lachnospiraceae bacterium isolate MGYG-HGUT-01704 chromosome 1	5	2645610-2647950	3,2,4	CRT,PILER-CR,CRISPRCasFinder	no	cas2,cas1,cas4,cas3,cas5,cas7,cas8b2,cas6	cas3,DEDDh,csa3,RT,DinG,cas14j,c2c9_V-U4,PD-DExK,cas2,cas1,cas4,cas5,cas7,cas8b2,cas6	Unclear	ATTTACATTCCAATATGTTTCTATTAATAC,ATTTACATTCCAATATGTTTCTATTAATAC,ATTTACATTCCAATATGTTTCTATTAATAC	30,30,30	0	0	NA	NA	NA:NA:NA	35,34,34	35	Unclear	cas3,DEDDh,csa3,RT,DinG,cas14j,c2c9_V-U4,PD-DExK,cas2,cas1,cas4,cas5,cas7,cas8b2,cas6	NA|302aa|up_4|NZ_LR698970.1_2638166_2639072_-,NA|475aa|up_2|NZ_LR698970.1_2640062_2641487_-,NA|122aa|up_1|NZ_LR698970.1_2641934_2642300_+,NA	NA|429aa|up_9|NZ_LR698970.1_2633088_2634375_-	pfam06824, Glyco_hydro_125, Metal-independent alpha-mannosidase (GH125)	NA|120aa|up_8|NZ_LR698970.1_2634396_2634756_-	PRK11770, PRK11770, YccF domain-containing protein	NA|249aa|up_7|NZ_LR698970.1_2634901_2635648_+	PRK05557, fabG, 3-ketoacyl-(acyl-carrier-protein) reductase; Validated	NA|295aa|up_6|NZ_LR698970.1_2635672_2636557_-	COG0583, LysR, Transcriptional regulator [Transcription]	NA|477aa|up_5|NZ_LR698970.1_2636686_2638117_+	cd06450, DOPA_deC_like, DOPA decarboxylase family	NA|302aa|up_4|NZ_LR698970.1_2638166_2639072_-	NA	NA|280aa|up_3|NZ_LR698970.1_2639068_2639908_-	pfam13676, TIR_2, TIR domain	NA|475aa|up_2|NZ_LR698970.1_2640062_2641487_-	NA	NA|122aa|up_1|NZ_LR698970.1_2641934_2642300_+	NA	NA|121aa|up_0|NZ_LR698970.1_2642289_2642652_+	pfam05717, TnpB_IS66, IS66 Orf2 like protein	cas2|93aa|down_0|NZ_LR698970.1_2648115_2648394_-	cd09725, Cas2_I_II_III, CRISPR/Cas system-associated protein Cas2	cas1|331aa|down_1|NZ_LR698970.1_2648395_2649388_-	cd09722, Cas1_I-B, CRISPR/Cas system-associated protein Cas1	cas4|165aa|down_2|NZ_LR698970.1_2649400_2649895_-	pfam01930, Cas_Cas4, Domain of unknown function DUF83	cas3|794aa|down_3|NZ_LR698970.1_2649913_2652295_-	TIGR01587, CRISPR-associated_endonuclease/helicase_Cas3, CRISPR-associated helicase Cas3	cas5|242aa|down_4|NZ_LR698970.1_2652340_2653066_-	cd09658, Cas5_I-B, CRISPR/Cas system-associated RAMP superfamily protein Cas5	cas7|292aa|down_5|NZ_LR698970.1_2653049_2653925_-	cd09687, Cas7_I-C, CRISPR/Cas system-associated RAMP superfamily protein Cas7	cas8b2|561aa|down_6|NZ_LR698970.1_2653927_2655610_-	cd09754, Cas8a1_I-A, CRISPR/Cas system-associated protein Cas8a1	cas6|241aa|down_7|NZ_LR698970.1_2655625_2656348_-	TIGR01877, CRISPR-associated_endoribonuclease_Cas6_1, CRISPR-associated endoribonuclease Cas6	NA|550aa|down_8|NZ_LR698970.1_2657511_2659161_-	COG1595, RpoE, DNA-directed RNA polymerase specialized sigma subunit, sigma24 homolog [Transcription]	NA|121aa|down_9|NZ_LR698970.1_2659532_2659895_-	cd00093, HTH_XRE, Helix-turn-helix XRE-family like proteins
