assembly_id	genome_id	genome_def	crispr_array_locus_merge	crispr_array_location_merge	crispr_locus_id	crispr_pred_method	array_in_prot	prot_within_array_20000	prot_in_genome	crispr_type_by_cas_prot	consensus_repeat	repeat_length	self-targeting_spacer_number	self-targeting_target_number	spacer_location	protospacer_location	repeat_type	spacer_locus_num	spacer_num	correct_crispr_type	genome_cas_prots	unknown_protein_around_crispr	L10	L10_domain	L9	L9_domain	L8	L8_domain	L7	L7_domain	L6	L6_domain	L5	L5_domain	L4	L4_domain	L3	L3_domain	L2	L2_domain	L1	L1_domain	R1	R1_domain	R2	R2_domain	R3	R3_domain	R4	R4_domain	R5	R5_domain	R6	R6_domain	R7	R7_domain	R8	R8_domain	R9	R9_domain	R10	R10_domain
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	3	924795-925195	2,1,3	PILER-CR,CRT,CRISPRCasFinder	no	c2c5_V-U5	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Type V-U5	CTTTCAACCCACCCCTAGCCGGGATGGTTGTTGAAAC,CTTTCAACCCACCCCTAGCCGGGATGGTTGTTGAAACNNNNN,CTTTCAACCCACCCCTAGCCGGGATGGTTGTTGAAAC	37,42,37	0	0	NA	NA	V-U5:V-U5:V-U5	5,5,3	5	TypeV-U5	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|59aa|up_8|AP018318.1_912514_912691_-,NA|54aa|down_1|AP018318.1_927774_927936_+,NA|90aa|down_3|AP018318.1_929991_930261_-	NA|468aa|up_9|AP018318.1_911025_912429_-	cd14014, STKc_PknB_like, Catalytic domain of bacterial Serine/Threonine kinases, PknB and similar proteins	NA|59aa|up_8|AP018318.1_912514_912691_-	NA	NA|416aa|up_7|AP018318.1_912735_913983_+	cd02511, Beta4Glucosyltransferase, UDP-glucose LOS-beta-1,4 glucosyltransferase is required for biosynthesis of lipooligosaccharide	NA|396aa|up_6|AP018318.1_914374_915562_+	COG0003, ArsA, Predicted ATPase involved in chromosome partitioning [Cell division and chromosome partitioning]	NA|1101aa|up_5|AP018318.1_915884_919187_+	COG0642, BaeS, Signal transduction histidine kinase [Signal transduction mechanisms]	NA|202aa|up_4|AP018318.1_919247_919853_+	pfam02245, Pur_DNA_glyco, Methylpurine-DNA glycosylase (MPG)	NA|197aa|up_3|AP018318.1_920392_920983_+	COG1695, COG1695, Predicted transcriptional regulators [Transcription]	NA|394aa|up_2|AP018318.1_921209_922391_-	pfam01139, RtcB, tRNA-splicing ligase RtcB	NA|303aa|up_1|AP018318.1_922653_923562_+	pfam13649, Methyltransf_25, Methyltransferase domain	NA|243aa|up_0|AP018318.1_923558_924287_+	COG0637, COG0637, Predicted phosphatase/phosphohexomutase [General function prediction only]	c2c5_V-U5|637aa|down_0|AP018318.1_925689_927600_-	TIGR01766, Putative_transposase_MJ0751, transposase, IS605 OrfB family, central region	NA|54aa|down_1|AP018318.1_927774_927936_+	NA	NA|622aa|down_2|AP018318.1_928009_929875_-	COG3472, COG3472, Uncharacterized conserved protein [Function unknown]	NA|90aa|down_3|AP018318.1_929991_930261_-	NA	NA|458aa|down_4|AP018318.1_930810_932184_+	pfam01609, DDE_Tnp_1, Transposase DDE domain	NA|946aa|down_5|AP018318.1_932752_935590_+	cd18011, DEXDc_RapA, DEXH-box helicase domain of RapA	NA|1290aa|down_6|AP018318.1_935652_939522_+	TIGR02987, m6_adenine_and_m5_cytosine_DNA_methyltransferase, type II restriction m6 adenine DNA methyltransferase, Alw26I/Eco31I/Esp3I family	NA|1783aa|down_7|AP018318.1_939569_944918_+	cd17923, DEXHc_Hrq1-like, DEAH-box helicase domain of Hrq1 and similar proteins	NA|169aa|down_8|AP018318.1_944996_945503_-	cd00592, HTH_MerR-like, Helix-Turn-Helix DNA binding domain of MerR-like transcription regulators	NA|921aa|down_9|AP018318.1_945951_948714_+	pfam02384, N6_Mtase, N-6 DNA Methylase
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	4	1222484-1222731	4,2,3	CRISPRCasFinder,CRT,PILER-CR	no		c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Orphan	TTGAGCAACGCCTAACGGCATCAAGCCACGAATCAC,TTGAGCAACGCCTAACGGCATCAAGCCACGAATCAC,TTGAGCAACGCCTAACGGCATCAAGCCACGAATCAC	36,36,36	0	0	NA	NA	NA:NA:NA	3,3,2	3	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|279aa|up_8|AP018318.1_1213463_1214300_+,NA|30aa|up_4|AP018318.1_1217350_1217440_-,NA|129aa|up_1|AP018318.1_1221819_1222206_+,NA|38aa|up_0|AP018318.1_1222285_1222399_-,NA	NA|219aa|up_9|AP018318.1_1212552_1213209_+	TIGR04282, hypothetical_protein, transferase 1, rSAM/selenodomain-associated	NA|279aa|up_8|AP018318.1_1213463_1214300_+	NA	NA|297aa|up_7|AP018318.1_1214399_1215290_+	cd02511, Beta4Glucosyltransferase, UDP-glucose LOS-beta-1,4 glucosyltransferase is required for biosynthesis of lipooligosaccharide	NA|241aa|up_6|AP018318.1_1215813_1216536_+	COG2928, COG2928, Uncharacterized conserved protein [Function unknown]	NA|214aa|up_5|AP018318.1_1216640_1217282_+	PRK09634, nusB, transcription antitermination protein NusB; Provisional	NA|30aa|up_4|AP018318.1_1217350_1217440_-	NA	NA|525aa|up_3|AP018318.1_1217494_1219069_+	PRK10416, PRK10416, signal recognition particle-docking protein FtsY; Provisional	NA|464aa|up_2|AP018318.1_1219941_1221333_+	COG2208, RsbU, Serine phosphatase RsbU, regulator of sigma subunit [Signal transduction mechanisms / Transcription]	NA|129aa|up_1|AP018318.1_1221819_1222206_+	NA	NA|38aa|up_0|AP018318.1_1222285_1222399_-	NA	NA|458aa|down_0|AP018318.1_1222841_1224215_-	pfam01609, DDE_Tnp_1, Transposase DDE domain	NA|209aa|down_1|AP018318.1_1224850_1225477_-	pfam05685, Uma2, Putative restriction endonuclease	NA|391aa|down_2|AP018318.1_1225650_1226823_+	TIGR02937, RNA_polymerase_sigma_factor, RNA polymerase sigma factor, sigma-70 family	NA|381aa|down_3|AP018318.1_1226917_1228060_+	pfam08852, DUF1822, Protein of unknown function (DUF1822)	NA|156aa|down_4|AP018318.1_1228134_1228602_+	COG3296, COG3296, Uncharacterized protein conserved in bacteria [Function unknown]	NA|536aa|down_5|AP018318.1_1228671_1230279_-	COG4191, COG4191, Signal transduction histidine kinase regulating C4-dicarboxylate transport system [Signal transduction mechanisms]	NA|370aa|down_6|AP018318.1_1231047_1232157_+	COG0673, MviM, Predicted dehydrogenases and related proteins [General function prediction only]	NA|654aa|down_7|AP018318.1_1232240_1234202_+	COG4251, COG4251, Bacteriophytochrome (light-regulated signal transduction histidine kinase) [Signal transduction mechanisms]	NA|153aa|down_8|AP018318.1_1234478_1234937_+	cd17557, REC_Rcp-like, phosphoacceptor receiver (REC) domain of cyanobacterial phytochrome response regulator Rcp and similar domains	NA|764aa|down_9|AP018318.1_1234958_1237250_+	TIGR02956, sensor_protein_TorS, TMAO reductase sytem sensor TorS
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	5	1499973-1500069	5	CRISPRCasFinder	no		c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Orphan	ATGATTTGGGATAATTATCTGCGT	24	0	0	NA	NA	NA	1	1	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|119aa|up_8|AP018318.1_1490173_1490530_-,NA|138aa|up_1|AP018318.1_1498125_1498539_-,NA	NA|214aa|up_9|AP018318.1_1489328_1489970_-	cd07051, BMC_like_1_repeat1, Bacterial Micro-Compartment (BMC)-like domain 1 repeat 1	NA|119aa|up_8|AP018318.1_1490173_1490530_-	NA	NA|589aa|up_7|AP018318.1_1490872_1492639_-	PRK09319, PRK09319, bifunctional 3,4-dihydroxy-2-butanone-4-phosphate synthase RibB/GTP cyclohydrolase II RibA	NA|353aa|up_6|AP018318.1_1493059_1494118_+	PRK00436, argC, N-acetyl-gamma-glutamyl-phosphate reductase; Validated	NA|430aa|up_5|AP018318.1_1494172_1495462_-	PRK00077, eno, enolase; Provisional	NA|229aa|up_4|AP018318.1_1495788_1496475_+	PRK07580, PRK07580, Mg-protoporphyrin IX methyl transferase; Validated	NA|323aa|up_3|AP018318.1_1496621_1497590_+	PRK07399, PRK07399, DNA polymerase III subunit delta'; Validated	NA|135aa|up_2|AP018318.1_1497595_1498000_-	pfam03928, Haem_degrading, Haem-degrading	NA|138aa|up_1|AP018318.1_1498125_1498539_-	NA	NA|288aa|up_0|AP018318.1_1498961_1499825_+	pfam06485, DUF1092, Protein of unknown function (DUF1092)	NA|438aa|down_0|AP018318.1_1500113_1501427_+	COG0312, TldD, Predicted Zn-dependent proteases and their inactivated homologs [General function prediction only]	NA|238aa|down_1|AP018318.1_1501436_1502150_+	pfam08241, Methyltransf_11, Methyltransferase domain	NA|898aa|down_2|AP018318.1_1502234_1504928_+	cd01031, EriC, ClC chloride channel EriC	NA|181aa|down_3|AP018318.1_1505193_1505736_+	COG3685, COG3685, Uncharacterized protein conserved in bacteria [Function unknown]	NA|672aa|down_4|AP018318.1_1505779_1507795_-	cd00200, WD40, WD40 domain, found in a number of eukaryotic proteins that cover a wide variety of functions including adaptor/regulatory modules in signal transduction, pre-mRNA processing and cytoskeleton assembly; typically contains a GH dipeptide 11-24 residues from its N-terminus and the WD dipeptide at its C-terminus and is 40 residues long, hence the name WD40; between GH and WD lies a conserved core; serves as a stable propeller-like platform to which proteins can bind either stably or reversibly; forms a propeller-like structure with several blades where each blade is composed of a four-stranded anti-parallel b-sheet; instances with few detectable copies are hypothesized to form larger structures by dimerization; each WD40 sequence repeat forms the first three strands of one blade and the last strand in the next blade; the last C-terminal WD40 repeat completes the blade structure of the first WD40 repeat to create the closed ring propeller-structure; residues on the top and bottom surface of the propeller are proposed to coordinate interactions with other proteins and/or small ligands; 7 copies of the repeat are present in this alignment	NA|434aa|down_5|AP018318.1_1508135_1509437_+	PRK07764, PRK07764, DNA polymerase III subunits gamma and tau; Validated	NA|269aa|down_6|AP018318.1_1510191_1510998_+	COG1682, TagG, ABC-type polysaccharide/polyol phosphate export systems, permease component [Carbohydrate transport and metabolism / Cell envelope biogenesis, outer membrane]	NA|247aa|down_7|AP018318.1_1510998_1511739_+	COG1134, TagH, ABC-type polysaccharide/polyol phosphate transport system, ATPase component [Carbohydrate transport and metabolism / Cell envelope biogenesis, outer membrane]	NA|434aa|down_8|AP018318.1_1511813_1513115_+	cd03809, GT4_MtfB-like, glycosyltransferases MtfB, WbpX, and similar proteins	NA|349aa|down_9|AP018318.1_1513199_1514246_+	cd03801, GT4_PimA-like, phosphatidyl-myo-inositol mannosyltransferase
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	11	2534312-2535233	11,3,4	CRISPRCasFinder,CRT,PILER-CR	no		c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Orphan	GTTTCAATCCCTAATAGGGATTAGTTGAAATTGCAAT,GTTTCAATCCCTAATAGGGATTAGTTGAAATTGCAAT,GTTTC----AATCCCTAATAGGGATTAGTTGAAATTGCAAT	37,37,41	0	0	NA	NA	I-D,II-B:I-D,II-B:I-D,II-B	12,12,12	12	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|83aa|up_7|AP018318.1_2526749_2526998_-,NA|42aa|up_6|AP018318.1_2527165_2527291_-,NA|79aa|up_5|AP018318.1_2528116_2528353_+,NA|92aa|up_4|AP018318.1_2528375_2528651_+,NA|80aa|down_3|AP018318.1_2539324_2539564_-	NA|376aa|up_9|AP018318.1_2524088_2525216_-	COG4299, COG4299, Uncharacterized protein conserved in bacteria [Function unknown]	NA|299aa|up_8|AP018318.1_2525691_2526588_+	COG1210, GalU, UDP-glucose pyrophosphorylase [Cell envelope biogenesis, outer membrane]	NA|83aa|up_7|AP018318.1_2526749_2526998_-	NA	NA|42aa|up_6|AP018318.1_2527165_2527291_-	NA	NA|79aa|up_5|AP018318.1_2528116_2528353_+	NA	NA|92aa|up_4|AP018318.1_2528375_2528651_+	NA	NA|186aa|up_3|AP018318.1_2528711_2529269_-	cd06260, DUF820, Domain of unknown function (DUF820)	NA|552aa|up_2|AP018318.1_2529534_2531190_+	pfam04966, OprB, Carbohydrate-selective porin, OprB family	NA|439aa|up_1|AP018318.1_2531309_2532626_-	COG3839, MalK, ABC-type sugar transport systems, ATPase components [Carbohydrate transport and metabolism]	NA|423aa|up_0|AP018318.1_2532811_2534080_+	PRK06185, PRK06185, FAD-dependent oxidoreductase	NA|439aa|down_0|AP018318.1_2535481_2536798_-	smart00563, PlsC, Phosphate acyltransferases	NA|240aa|down_1|AP018318.1_2537049_2537769_+	pfam08241, Methyltransf_11, Methyltransferase domain	NA|456aa|down_2|AP018318.1_2537936_2539304_-	COG1252, Ndh, NADH dehydrogenase, FAD-containing subunit [Energy production and conversion]	NA|80aa|down_3|AP018318.1_2539324_2539564_-	NA	NA|230aa|down_4|AP018318.1_2539611_2540301_-	COG1845, CyoC, Heme/copper-type cytochrome/quinol oxidase, subunit 3 [Energy production and conversion]	NA|555aa|down_5|AP018318.1_2540307_2541972_-	TIGR02891, Probable_cytochrome_c_oxidase_subunit_1-beta, cytochrome c oxidase, subunit I	NA|317aa|down_6|AP018318.1_2541958_2542909_-	COG1622, CyoA, Heme/copper-type cytochrome/quinol oxidases, subunit 2 [Energy production and conversion]	NA|202aa|down_7|AP018318.1_2542929_2543535_-	COG4244, COG4244, Predicted membrane protein [Function unknown]	NA|165aa|down_8|AP018318.1_2543531_2544026_-	COG4244, COG4244, Predicted membrane protein [Function unknown]	NA|271aa|down_9|AP018318.1_2544598_2545411_+	cd05358, GlcDH_SDR_c, glucose 1 dehydrogenase (GlcDH), classical (c) SDRs
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	13	3214788-3215259	5,13,4	PILER-CR,CRISPRCasFinder,CRT	no		c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Orphan	ATTGCAATTTCAACTAATCCCTATTAGGG----------ATTGAAAC,ATTGCAATTTCAACTAATCCCTATTAGGGATTGAAAC,ATTGCAATTTCAACTAATCCCTATTAGGGATTGAAAC	47,37,37	0	0	NA	NA	I-D,II-B:I-D,II-B:I-D,II-B	6,6,6	6	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|32aa|up_9|AP018318.1_3205229_3205325_-,NA	NA|32aa|up_9|AP018318.1_3205229_3205325_-	NA	NA|250aa|up_8|AP018318.1_3205684_3206434_+	cd08934, CAD_SDR_c, clavulanic acid dehydrogenase (CAD), classical (c) SDR	NA|407aa|up_7|AP018318.1_3206468_3207689_+	COG0625, Gst, Glutathione S-transferase [Posttranslational modification, protein turnover, chaperones]	NA|228aa|up_6|AP018318.1_3207898_3208582_-	COG0625, Gst, Glutathione S-transferase [Posttranslational modification, protein turnover, chaperones]	NA|277aa|up_5|AP018318.1_3208877_3209708_+	COG0412, COG0412, Dienelactone hydrolase and related enzymes [Secondary metabolites biosynthesis, transport, and catabolism]	NA|263aa|up_4|AP018318.1_3209769_3210558_+	pfam08241, Methyltransf_11, Methyltransferase domain	NA|230aa|up_3|AP018318.1_3210638_3211328_+	cd05373, SDR_c10, classical (c) SDR, subgroup  10	NA|318aa|up_2|AP018318.1_3211409_3212363_-	COG1300, SpoIIM, Uncharacterized membrane protein [Function unknown]	NA|285aa|up_1|AP018318.1_3212888_3213743_+	PRK00068, PRK00068, hypothetical protein; Validated	NA|261aa|up_0|AP018318.1_3213763_3214546_+	COG1714, COG1714, Predicted membrane protein/domain [Function unknown]	NA|594aa|down_0|AP018318.1_3215826_3217608_-	COG1132, MdlB, ABC-type multidrug transport system, ATPase and permease components [Defense mechanisms]	NA|594aa|down_1|AP018318.1_3217821_3219603_-	COG1132, MdlB, ABC-type multidrug transport system, ATPase and permease components [Defense mechanisms]	NA|425aa|down_2|AP018318.1_3219734_3221009_-	PRK11360, PRK11360, two-component system sensor histidine kinase AtoS	NA|600aa|down_3|AP018318.1_3221912_3223712_+	COG2831, FhaC, Hemolysin activation/secretion protein [Intracellular trafficking and secretion]	NA|821aa|down_4|AP018318.1_3224075_3226538_+	pfam05860, Haemagg_act, haemagglutination activity domain	NA|812aa|down_5|AP018318.1_3226605_3229041_+	pfam05860, Haemagg_act, haemagglutination activity domain	NA|791aa|down_6|AP018318.1_3229310_3231683_+	pfam16734, Pilin_GH, Type IV pilin-like G and H, putative	NA|71aa|down_7|AP018318.1_3231790_3232003_+	COG3210, FhaB, Large exoproteins involved in heme utilization or adhesion [Intracellular trafficking and secretion]	NA|856aa|down_8|AP018318.1_3232118_3234686_+	COG4995, COG4995, Uncharacterized protein conserved in bacteria [Function unknown]	NA|306aa|down_9|AP018318.1_3234876_3235794_+	pfam06051, DUF928, Domain of Unknown Function (DUF928)
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	14	3435556-3435656	14	CRISPRCasFinder	no		c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Orphan	TTTCGATCCCCTCTAACCCCCCTTAAAAAG	30	0	0	NA	NA	NA	1	1	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|143aa|up_8|AP018318.1_3424531_3424960_-,NA|74aa|up_1|AP018318.1_3433939_3434161_+,NA|203aa|down_0|AP018318.1_3435821_3436430_-,NA|98aa|down_5|AP018318.1_3441971_3442265_+,NA|98aa|down_6|AP018318.1_3442377_3442671_-	NA|430aa|up_9|AP018318.1_3422862_3424152_-	pfam13354, Beta-lactamase2, Beta-lactamase enzyme family	NA|143aa|up_8|AP018318.1_3424531_3424960_-	NA	NA|216aa|up_7|AP018318.1_3425543_3426191_+	COG0811, TolQ, Biopolymer transport proteins [Intracellular trafficking and secretion]	NA|135aa|up_6|AP018318.1_3426174_3426579_+	COG0848, ExbD, Biopolymer transport protein [Intracellular trafficking and secretion]	NA|682aa|up_5|AP018318.1_3426869_3428915_+	cd00200, WD40, WD40 domain, found in a number of eukaryotic proteins that cover a wide variety of functions including adaptor/regulatory modules in signal transduction, pre-mRNA processing and cytoskeleton assembly; typically contains a GH dipeptide 11-24 residues from its N-terminus and the WD dipeptide at its C-terminus and is 40 residues long, hence the name WD40; between GH and WD lies a conserved core; serves as a stable propeller-like platform to which proteins can bind either stably or reversibly; forms a propeller-like structure with several blades where each blade is composed of a four-stranded anti-parallel b-sheet; instances with few detectable copies are hypothesized to form larger structures by dimerization; each WD40 sequence repeat forms the first three strands of one blade and the last strand in the next blade; the last C-terminal WD40 repeat completes the blade structure of the first WD40 repeat to create the closed ring propeller-structure; residues on the top and bottom surface of the propeller are proposed to coordinate interactions with other proteins and/or small ligands; 7 copies of the repeat are present in this alignment	NA|188aa|up_4|AP018318.1_3429317_3429881_-	cd06260, DUF820, Domain of unknown function (DUF820)	NA|193aa|up_3|AP018318.1_3430193_3430772_+	pfam05685, Uma2, Putative restriction endonuclease	NA|963aa|up_2|AP018318.1_3430917_3433806_-	TIGR02917, TPR_domain_protein, putative PEP-CTERM system TPR-repeat lipoprotein	NA|74aa|up_1|AP018318.1_3433939_3434161_+	NA	NA|400aa|up_0|AP018318.1_3434338_3435538_+	PRK00509, PRK00509, argininosuccinate synthase; Provisional	NA|203aa|down_0|AP018318.1_3435821_3436430_-	NA	NA|573aa|down_1|AP018318.1_3436837_3438556_+	COG0426, FpaA, Uncharacterized flavoproteins [Energy production and conversion]	NA|600aa|down_2|AP018318.1_3438721_3440521_+	COG0426, FpaA, Uncharacterized flavoproteins [Energy production and conversion]	NA|78aa|down_3|AP018318.1_3440729_3440963_-	cd05959, BCL_4HBCL, Benzoate CoA ligase (BCL) and 4-Hydroxybenzoate-Coenzyme A Ligase (4-HBA-CoA ligase)	NA|222aa|down_4|AP018318.1_3441167_3441833_-	pfam05685, Uma2, Putative restriction endonuclease	NA|98aa|down_5|AP018318.1_3441971_3442265_+	NA	NA|98aa|down_6|AP018318.1_3442377_3442671_-	NA	NA|601aa|down_7|AP018318.1_3442792_3444595_-	cd00200, WD40, WD40 domain, found in a number of eukaryotic proteins that cover a wide variety of functions including adaptor/regulatory modules in signal transduction, pre-mRNA processing and cytoskeleton assembly; typically contains a GH dipeptide 11-24 residues from its N-terminus and the WD dipeptide at its C-terminus and is 40 residues long, hence the name WD40; between GH and WD lies a conserved core; serves as a stable propeller-like platform to which proteins can bind either stably or reversibly; forms a propeller-like structure with several blades where each blade is composed of a four-stranded anti-parallel b-sheet; instances with few detectable copies are hypothesized to form larger structures by dimerization; each WD40 sequence repeat forms the first three strands of one blade and the last strand in the next blade; the last C-terminal WD40 repeat completes the blade structure of the first WD40 repeat to create the closed ring propeller-structure; residues on the top and bottom surface of the propeller are proposed to coordinate interactions with other proteins and/or small ligands; 7 copies of the repeat are present in this alignment	NA|113aa|down_8|AP018318.1_3445107_3445446_-	TIGR02978, Phage_shock_protein_C	NA|233aa|down_9|AP018318.1_3445724_3446423_+	cd05243, SDR_a5, atypical (a) SDRs, subgroup 5
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	17	4391468-4391801	17,5,6	CRISPRCasFinder,CRT,PILER-CR	no		c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Orphan	GTTTCAATCCCTAATAGGGATTAGTTGAAATTGCAAT,GTTTCAATCCCTAATAGGGATTAGTTGAAATTGCAAT,GTTTC----AATCCCTAATAGGGATTAGTTGAAATTGCAAT	37,37,41	0	0	NA	NA	I-D,II-B:I-D,II-B:I-D,II-B	4,4,4	4	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|64aa|up_8|AP018318.1_4378068_4378260_-,NA|476aa|up_1|AP018318.1_4388896_4390324_+,NA|93aa|down_0|AP018318.1_4392093_4392372_-,NA|183aa|down_7|AP018318.1_4401260_4401809_-,NA|285aa|down_8|AP018318.1_4402575_4403430_+,NA|458aa|down_9|AP018318.1_4403624_4404998_+	NA|628aa|up_9|AP018318.1_4375767_4377651_+	pfam04966, OprB, Carbohydrate-selective porin, OprB family	NA|64aa|up_8|AP018318.1_4378068_4378260_-	NA	NA|364aa|up_7|AP018318.1_4378683_4379775_+	PRK07409, PRK07409, threonine synthase; Validated	NA|845aa|up_6|AP018318.1_4379928_4382463_+	PRK13560, PRK13560, hypothetical protein; Provisional	NA|458aa|up_5|AP018318.1_4382654_4384028_+	pfam01609, DDE_Tnp_1, Transposase DDE domain	NA|670aa|up_4|AP018318.1_4384099_4386109_+	PRK13560, PRK13560, hypothetical protein; Provisional	NA|559aa|up_3|AP018318.1_4386111_4387788_+	cd17534, REC_DC-like, phosphoacceptor receiver (REC) domain of modulated diguanylate cyclase and similar domains	NA|292aa|up_2|AP018318.1_4387779_4388655_-	PRK00258, aroE, shikimate 5-dehydrogenase; Reviewed	NA|476aa|up_1|AP018318.1_4388896_4390324_+	NA	NA|312aa|up_0|AP018318.1_4390460_4391396_+	PLN02578, PLN02578, hydrolase	NA|93aa|down_0|AP018318.1_4392093_4392372_-	NA	NA|540aa|down_1|AP018318.1_4392604_4394224_-	sd00006, TPR, Tetratricopeptide repeat	NA|607aa|down_2|AP018318.1_4394549_4396370_+	TIGR03423, pbp2_mrdA, penicillin-binding protein 2	NA|530aa|down_3|AP018318.1_4396832_4398422_+	COG1032, COG1032, Fe-S oxidoreductase [Energy production and conversion]	NA|228aa|down_4|AP018318.1_4398623_4399307_-	smart00421, HTH_LUXR, helix_turn_helix, Lux Regulon	NA|283aa|down_5|AP018318.1_4399409_4400258_+	COG2897, SseA, Rhodanese-related sulfurtransferase [Inorganic ion transport and metabolism]	NA|228aa|down_6|AP018318.1_4400329_4401013_+	COG2020, STE14, Putative protein-S-isoprenylcysteine methyltransferase [Posttranslational modification, protein turnover, chaperones]	NA|183aa|down_7|AP018318.1_4401260_4401809_-	NA	NA|285aa|down_8|AP018318.1_4402575_4403430_+	NA	NA|458aa|down_9|AP018318.1_4403624_4404998_+	NA
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	18	4515175-4515274	18	CRISPRCasFinder	no		c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Orphan	CAGTAAATAGATTTAACAAAATTTCAC	27	0	0	NA	NA	NA	1	1	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|159aa|up_9|AP018318.1_4501364_4501841_-,NA|91aa|down_2|AP018318.1_4518491_4518764_-	NA|159aa|up_9|AP018318.1_4501364_4501841_-	NA	NA|348aa|up_8|AP018318.1_4502058_4503102_+	cd07987, LPLAT_MGAT-like, Lysophospholipid Acyltransferases (LPLATs) of Glycerophospholipid Biosynthesis: MGAT-like	NA|233aa|up_7|AP018318.1_4503119_4503818_-	COG0745, OmpR, Response regulators consisting of a CheY-like receiver domain and a winged-helix DNA-binding domain [Signal transduction mechanisms / Transcription]	NA|215aa|up_6|AP018318.1_4503894_4504539_+	PRK01686, hisG, ATP phosphoribosyltransferase catalytic subunit; Reviewed	NA|745aa|up_5|AP018318.1_4504785_4507020_+	COG4251, COG4251, Bacteriophytochrome (light-regulated signal transduction histidine kinase) [Signal transduction mechanisms]	NA|142aa|up_4|AP018318.1_4507037_4507463_+	cd17557, REC_Rcp-like, phosphoacceptor receiver (REC) domain of cyanobacterial phytochrome response regulator Rcp and similar domains	NA|756aa|up_3|AP018318.1_4507468_4509736_+	PRK13557, PRK13557, histidine kinase; Provisional	NA|354aa|up_2|AP018318.1_4509808_4510870_-	PRK00856, pyrB, aspartate carbamoyltransferase catalytic subunit	NA|188aa|up_1|AP018318.1_4511240_4511804_+	COG1434, COG1434, Uncharacterized conserved protein [Function unknown]	NA|792aa|up_0|AP018318.1_4511995_4514371_-	pfam06537, DHOR, Di-haem oxidoreductase, putative peroxidase	NA|451aa|down_0|AP018318.1_4516218_4517571_-	COG2239, MgtE, Mg/Co/Ni transporter MgtE (contains CBS domain) [Inorganic ion transport and metabolism]	NA|119aa|down_1|AP018318.1_4518135_4518492_-	pfam02452, PemK_toxin, PemK-like, MazF-like toxin of type II toxin-antitoxin system	NA|91aa|down_2|AP018318.1_4518491_4518764_-	NA	NA|1087aa|down_3|AP018318.1_4519412_4522673_+	pfam00723, Glyco_hydro_15, Glycosyl hydrolases family 15	NA|212aa|down_4|AP018318.1_4522751_4523387_-	pfam13649, Methyltransf_25, Methyltransferase domain	NA|245aa|down_5|AP018318.1_4523429_4524164_-	TIGR00911, High-affinity_methionine_permease, L-type amino acid transporter	NA|204aa|down_6|AP018318.1_4524173_4524785_-	TIGR00911, High-affinity_methionine_permease, L-type amino acid transporter	NA|426aa|down_7|AP018318.1_4524972_4526250_-	TIGR02966, Phosphate_regulon_sensor_protein_PhoR, phosphate regulon sensor kinase PhoR	NA|227aa|down_8|AP018318.1_4526269_4526950_-	COG0745, OmpR, Response regulators consisting of a CheY-like receiver domain and a winged-helix DNA-binding domain [Signal transduction mechanisms / Transcription]	NA|109aa|down_9|AP018318.1_4527246_4527573_-	COG3339, COG3339, Uncharacterized conserved protein [Function unknown]
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	19	4580504-4580613	19	CRISPRCasFinder	no		c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Orphan	CTTATAAAGGGGGGAAACTGAGTAATCTAATTCCCTCC	38	0	0	NA	NA	NA	1	1	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|292aa|up_9|AP018318.1_4568926_4569802_-,NA|59aa|up_4|AP018318.1_4577559_4577736_-,NA	NA|292aa|up_9|AP018318.1_4568926_4569802_-	NA	NA|483aa|up_8|AP018318.1_4570307_4571756_+	TIGR01730, COG0845:_Membrane-fusion_protein, RND family efflux transporter, MFP subunit	NA|1057aa|up_7|AP018318.1_4571889_4575060_+	COG0841, AcrB, Cation/multidrug efflux pump [Defense mechanisms]	NA|183aa|up_6|AP018318.1_4575200_4575749_-	cd06260, DUF820, Domain of unknown function (DUF820)	NA|528aa|up_5|AP018318.1_4575850_4577434_-	cd17515, RMtype1_S_MjaORF132P_Sau1132ORF3780P-TRD1-CR1_like, Type I restriction-modification system specificity (S) subunit Target Recognition Domain-ConseRved domain (TRD-CR), similar to MjaXIP/S	NA|59aa|up_4|AP018318.1_4577559_4577736_-	NA	NA|136aa|up_3|AP018318.1_4577744_4578152_-	cd09881, PIN_VapC4-5_FitB-like, VapC-like PIN domain of Mycobacterium tuberculosis VapC4 and VapC5, and Neisseria gonorrhoeae FitB and related proteins	NA|84aa|up_2|AP018318.1_4578148_4578400_-	pfam10047, DUF2281, Protein of unknown function (DUF2281)	NA|537aa|up_1|AP018318.1_4578430_4580041_-	pfam02384, N6_Mtase, N-6 DNA Methylase	NA|127aa|up_0|AP018318.1_4580010_4580391_-	cd01038, Endonuclease_DUF559, Domain of unknown function, appears to be related to a diverse group of endonucleases	NA|432aa|down_0|AP018318.1_4584185_4585481_+	cd14748, PBP2_UgpB, The periplasmic-binding component of ABC transport system specific for sn-glycerol-3-phosphate; possesses type 2 periplasmic binding fold	NA|367aa|down_1|AP018318.1_4585500_4586601_-	cd17602, REC_PatA-like, phosphoacceptor receiver (REC) domain of PatA and similar domains	NA|313aa|down_2|AP018318.1_4587804_4588743_+	TIGR00005, Ribosomal_large_subunit_pseudouridine_synthase_D, pseudouridine synthase, RluA family	NA|173aa|down_3|AP018318.1_4589524_4590043_+	cd14768, PC_PEC_beta, Beta subunits of phycoerythrin and phycoerythrocyanin; phycobilisome rod components	NA|163aa|down_4|AP018318.1_4590116_4590605_+	cd14770, PC-PEC_alpha, Alpha subunits of phycoerythrin and phycoerythrocyanin; phycobilisome rod components	NA|279aa|down_5|AP018318.1_4590742_4591579_+	pfam00427, PBS_linker_poly, Phycobilisome Linker polypeptide	NA|258aa|down_6|AP018318.1_4591699_4592473_+	COG1413, COG1413, FOG: HEAT repeat [Energy production and conversion]	NA|209aa|down_7|AP018318.1_4592602_4593229_+	COG1413, COG1413, FOG: HEAT repeat [Energy production and conversion]	NA|174aa|down_8|AP018318.1_4593791_4594313_+	cd14768, PC_PEC_beta, Beta subunits of phycoerythrin and phycoerythrocyanin; phycobilisome rod components	NA|164aa|down_9|AP018318.1_4594390_4594882_+	cd14770, PC-PEC_alpha, Alpha subunits of phycoerythrin and phycoerythrocyanin; phycobilisome rod components
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	20	4727622-4727803	7	PILER-CR	no		c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Orphan	TTTTGCAGGTTCAAATCCTGTCAGGAGTAC	30	0	0	NA	NA	NA	2	2	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|56aa|up_5|AP018318.1_4722591_4722759_-,NA|112aa|up_4|AP018318.1_4723096_4723432_-,NA|130aa|up_2|AP018318.1_4724368_4724758_+,NA|65aa|up_0|AP018318.1_4725870_4726065_+,NA|169aa|down_4|AP018318.1_4735569_4736076_-	NA|210aa|up_9|AP018318.1_4716520_4717150_+	COG1290, QcrB, Cytochrome b subunit of the bc complex [Energy production and conversion]	NA|641aa|up_8|AP018318.1_4717116_4719039_-	COG0025, NhaP, NhaP-type Na+/H+ and K+/H+ antiporters [Inorganic ion transport and metabolism]	NA|196aa|up_7|AP018318.1_4719714_4720302_-	cd08866, SRPBCC_11, Ligand-binding SRPBCC domain of an uncharacterized subfamily of proteins	NA|685aa|up_6|AP018318.1_4720310_4722365_-	COG0642, BaeS, Signal transduction histidine kinase [Signal transduction mechanisms]	NA|56aa|up_5|AP018318.1_4722591_4722759_-	NA	NA|112aa|up_4|AP018318.1_4723096_4723432_-	NA	NA|173aa|up_3|AP018318.1_4723833_4724352_+	COG1403, McrA, Restriction endonuclease [Defense mechanisms]	NA|130aa|up_2|AP018318.1_4724368_4724758_+	NA	NA|281aa|up_1|AP018318.1_4724829_4725672_-	cd10917, CE4_NodB_like_6s_7s, Catalytic NodB homology domain of rhizobial NodB-like proteins	NA|65aa|up_0|AP018318.1_4725870_4726065_+	NA	NA|341aa|down_0|AP018318.1_4728960_4729983_-	sd00006, TPR, Tetratricopeptide repeat	NA|196aa|down_1|AP018318.1_4730700_4731288_-	COG4636, Uma2, Endonuclease, Uma2 family (restriction endonuclease fold) [General function prediction only]	NA|535aa|down_2|AP018318.1_4731589_4733194_-	pfam13304, AAA_21, AAA domain, putative AbiEii toxin, Type IV TA system	NA|597aa|down_3|AP018318.1_4733461_4735252_-	COG1217, TypA, Predicted membrane GTPase involved in stress response [Signal transduction mechanisms]	NA|169aa|down_4|AP018318.1_4735569_4736076_-	NA	NA|147aa|down_5|AP018318.1_4736150_4736591_+	cd16383, GUN4, porphyrin-binding protein domain GUN4	NA|339aa|down_6|AP018318.1_4736827_4737844_-	PRK02812, PRK02812, ribose-phosphate pyrophosphokinase; Provisional	NA|565aa|down_7|AP018318.1_4738699_4740394_+	cd14014, STKc_PknB_like, Catalytic domain of bacterial Serine/Threonine kinases, PknB and similar proteins	NA|226aa|down_8|AP018318.1_4740563_4741241_+	PRK00090, bioD, ATP-dependent dethiobiotin synthetase BioD	NA|406aa|down_9|AP018318.1_4741526_4742744_+	cd08021, M20_Acy1_YhaA-like, M20 Peptidase aminoacylase 1 subfamily, includes Bacillus subtilis YhaA and Staphylococcus aureus amidohydrolase, SACOL0085
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	21	5026118-5026351	20,8	CRISPRCasFinder,PILER-CR	no		c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Orphan	TGAGCTTTGTGAATGAGTCTTTGAGCTTCATTAAT,GAATCTTTGAGCTTTGTGAATGAGTCTTTGAGCTTCATTAATGAGTATCTGA	35,52	0	0	NA	NA	NA:NA	3,2	3	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|99aa|up_5|AP018318.1_5020591_5020888_-,NA|161aa|up_0|AP018318.1_5025421_5025904_-,NA|152aa|down_2|AP018318.1_5030512_5030968_+,NA|146aa|down_5|AP018318.1_5036266_5036704_+	NA|374aa|up_9|AP018318.1_5016455_5017577_-	cd14014, STKc_PknB_like, Catalytic domain of bacterial Serine/Threonine kinases, PknB and similar proteins	NA|365aa|up_8|AP018318.1_5017682_5018777_+	cd13653, PBP2_phosphate_like_1, Substrate binding domain of putative ABC-type phosphate transporter, a member of the type 2 periplasmic binding fold superfamily	NA|140aa|up_7|AP018318.1_5018914_5019334_-	pfam07736, CM_1, Chorismate mutase type I	NA|274aa|up_6|AP018318.1_5019421_5020243_-	TIGR00706, Putative_signal_peptide_peptidase_SppA, signal peptide peptidase SppA, 36K type	NA|99aa|up_5|AP018318.1_5020591_5020888_-	NA	NA|266aa|up_4|AP018318.1_5021295_5022093_+	PLN03100, PLN03100, Permease subunit of ER-derived-lipid transporter; Provisional	NA|161aa|up_3|AP018318.1_5022123_5022606_+	pfam11317, DUF3119, Protein of unknown function (DUF3119)	NA|418aa|up_2|AP018318.1_5022732_5023986_+	pfam11285, DUF3086, Protein of unknown function (DUF3086)	NA|412aa|up_1|AP018318.1_5024098_5025334_-	COG2124, CypX, Cytochrome P450 [Secondary metabolites biosynthesis, transport, and catabolism]	NA|161aa|up_0|AP018318.1_5025421_5025904_-	NA	NA|877aa|down_0|AP018318.1_5026404_5029035_-	PRK09532, PRK09532, DNA polymerase III subunit alpha; Reviewed	NA|430aa|down_1|AP018318.1_5029218_5030508_+	COG1301, GltP, Na+/H+-dicarboxylate symporters [Energy production and conversion]	NA|152aa|down_2|AP018318.1_5030512_5030968_+	NA	NA|129aa|down_3|AP018318.1_5031272_5031659_-	cd01038, Endonuclease_DUF559, Domain of unknown function, appears to be related to a diverse group of endonucleases	NA|1226aa|down_4|AP018318.1_5031868_5035546_-	pfam12770, CHAT, CHAT domain	NA|146aa|down_5|AP018318.1_5036266_5036704_+	NA	NA|129aa|down_6|AP018318.1_5036730_5037117_-	cd17552, REC_RR468-like, phosphoacceptor receiver (REC) domain of Thermotoga maritima response regulator RR468 and similar domains	NA|1610aa|down_7|AP018318.1_5037449_5042279_-	COG0745, OmpR, Response regulators consisting of a CheY-like receiver domain and a winged-helix DNA-binding domain [Signal transduction mechanisms / Transcription]	NA|180aa|down_8|AP018318.1_5042723_5043263_+	pfam00582, Usp, Universal stress protein family	NA|171aa|down_9|AP018318.1_5043309_5043822_+	PRK07571, PRK07571, bidirectional hydrogenase complex protein HoxE; Reviewed
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	22	5031057-5031154	21	CRISPRCasFinder	no		c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Orphan	GACTGCGGTGTATACATTCATTTGATCC	28	0	0	NA	NA	NA	1	1	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|99aa|up_8|AP018318.1_5020591_5020888_-,NA|161aa|up_3|AP018318.1_5025421_5025904_-,NA|152aa|up_0|AP018318.1_5030512_5030968_+,NA|146aa|down_2|AP018318.1_5036266_5036704_+	NA|274aa|up_9|AP018318.1_5019421_5020243_-	TIGR00706, Putative_signal_peptide_peptidase_SppA, signal peptide peptidase SppA, 36K type	NA|99aa|up_8|AP018318.1_5020591_5020888_-	NA	NA|266aa|up_7|AP018318.1_5021295_5022093_+	PLN03100, PLN03100, Permease subunit of ER-derived-lipid transporter; Provisional	NA|161aa|up_6|AP018318.1_5022123_5022606_+	pfam11317, DUF3119, Protein of unknown function (DUF3119)	NA|418aa|up_5|AP018318.1_5022732_5023986_+	pfam11285, DUF3086, Protein of unknown function (DUF3086)	NA|412aa|up_4|AP018318.1_5024098_5025334_-	COG2124, CypX, Cytochrome P450 [Secondary metabolites biosynthesis, transport, and catabolism]	NA|161aa|up_3|AP018318.1_5025421_5025904_-	NA	NA|877aa|up_2|AP018318.1_5026404_5029035_-	PRK09532, PRK09532, DNA polymerase III subunit alpha; Reviewed	NA|430aa|up_1|AP018318.1_5029218_5030508_+	COG1301, GltP, Na+/H+-dicarboxylate symporters [Energy production and conversion]	NA|152aa|up_0|AP018318.1_5030512_5030968_+	NA	NA|129aa|down_0|AP018318.1_5031272_5031659_-	cd01038, Endonuclease_DUF559, Domain of unknown function, appears to be related to a diverse group of endonucleases	NA|1226aa|down_1|AP018318.1_5031868_5035546_-	pfam12770, CHAT, CHAT domain	NA|146aa|down_2|AP018318.1_5036266_5036704_+	NA	NA|129aa|down_3|AP018318.1_5036730_5037117_-	cd17552, REC_RR468-like, phosphoacceptor receiver (REC) domain of Thermotoga maritima response regulator RR468 and similar domains	NA|1610aa|down_4|AP018318.1_5037449_5042279_-	COG0745, OmpR, Response regulators consisting of a CheY-like receiver domain and a winged-helix DNA-binding domain [Signal transduction mechanisms / Transcription]	NA|180aa|down_5|AP018318.1_5042723_5043263_+	pfam00582, Usp, Universal stress protein family	NA|171aa|down_6|AP018318.1_5043309_5043822_+	PRK07571, PRK07571, bidirectional hydrogenase complex protein HoxE; Reviewed	NA|532aa|down_7|AP018318.1_5043808_5045404_+	COG1894, NuoF, NADH:ubiquinone oxidoreductase, NADH-binding (51 kD) subunit [Energy production and conversion]	NA|239aa|down_8|AP018318.1_5045712_5046429_+	PRK07569, PRK07569, bidirectional hydrogenase complex protein HoxU; Validated	NA|177aa|down_9|AP018318.1_5046447_5046978_+	pfam11320, DUF3122, Protein of unknown function (DUF3122)
GCA_003990705.1_ASM399070v1	AP018318	Nostoc sp. HK-01 DNA, complete genome	23	5079700-5080678	9,22,6	PILER-CR,CRISPRCasFinder,CRT	no		c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1	Orphan	GTTTAAATTTCACCTAATCCCTATTAGGG----------ATTGAAAC,GTTTAAATTTCACCTAATCCCTATTAGGGATTGAAAC,NNNNNNNNNNGTTTAAATTTCACCTAATCCCTATTAGGGATTGAAAC	47,37,47	0	0	NA	NA	I-D,II-B:I-D,II-B:I-D,II-B	13,13,13	13	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|64aa|up_5|AP018318.1_5073777_5073969_-,NA|83aa|down_1|AP018318.1_5082049_5082298_+,NA|86aa|down_2|AP018318.1_5082407_5082665_+,NA|86aa|down_3|AP018318.1_5082774_5083032_+,NA|81aa|down_4|AP018318.1_5083144_5083387_+,NA|86aa|down_5|AP018318.1_5083586_5083844_+,NA|94aa|down_6|AP018318.1_5083921_5084203_+	NA|166aa|up_9|AP018318.1_5068386_5068884_+	TIGR04110, hypothetical_protein_VSWAT3_12502, heme utilization protein HutZ	NA|296aa|up_8|AP018318.1_5069001_5069889_+	COG0596, MhpC, Predicted hydrolases or acyltransferases (alpha/beta hydrolase superfamily) [General function prediction only]	NA|430aa|up_7|AP018318.1_5069961_5071251_-	cd19920, REC_PA4781-like, phosphoacceptor receiver (REC) domain of cyclic di-GMP phosphodiesterase PA4781 and similar domains	NA|810aa|up_6|AP018318.1_5071262_5073692_-	PRK11091, PRK11091, aerobic respiration control sensor protein ArcB; Provisional	NA|64aa|up_5|AP018318.1_5073777_5073969_-	NA	NA|194aa|up_4|AP018318.1_5074043_5074625_+	TIGR04376, conserved_hypothetical_protein, TIGR04376 family protein	NA|181aa|up_3|AP018318.1_5074815_5075358_+	pfam14229, DUF4332, Domain of unknown function (DUF4332)	NA|205aa|up_2|AP018318.1_5075374_5075989_-	pfam00440, TetR_N, Bacterial regulatory proteins, tetR family	NA|544aa|up_1|AP018318.1_5076216_5077848_+	COG0612, PqqL, Predicted Zn-dependent peptidases [General function prediction only]	NA|495aa|up_0|AP018318.1_5077912_5079397_+	COG0612, PqqL, Predicted Zn-dependent peptidases [General function prediction only]	NA|90aa|down_0|AP018318.1_5081114_5081384_+	COG1938, COG1938, Archaeal enzymes of ATP-grasp superfamily [General function prediction only]	NA|83aa|down_1|AP018318.1_5082049_5082298_+	NA	NA|86aa|down_2|AP018318.1_5082407_5082665_+	NA	NA|86aa|down_3|AP018318.1_5082774_5083032_+	NA	NA|81aa|down_4|AP018318.1_5083144_5083387_+	NA	NA|86aa|down_5|AP018318.1_5083586_5083844_+	NA	NA|94aa|down_6|AP018318.1_5083921_5084203_+	NA	NA|393aa|down_7|AP018318.1_5084270_5085449_+	pfam01636, APH, Phosphotransferase enzyme family	NA|367aa|down_8|AP018318.1_5085860_5086961_+	pfam17914, HopA1, HopA1 effector protein family	NA|160aa|down_9|AP018318.1_5087195_5087675_-	COG1225, Bcp, Peroxiredoxin [Posttranslational modification, protein turnover, chaperones]
GCA_003990705.1_ASM399070v1	AP018319	Nostoc sp. HK-01 plasmid plasmid1 DNA, complete genome	1	62251-62573	1,1,1	CRISPRCasFinder,CRT,PILER-CR	no	cas1	Cas9_archaeal,cas1,RT,cas14j,Cas14c_CAS-V-F,c2c9_V-U4,cas6	Unclear	GTTTCAATCCCTGATAGGGATTAAGAGAAATTGCAAT,GTTTCAATCCCTGATAGGGATTAAGAGAAATTGCAAT,GTTTC----AATCCCTGATAGGGATTAAGAGAAATTGCAAT	37,37,41	0	0	NA	NA	I-D,II-B:I-D,II-B:I-D,II-B	4,4,3	4	Unclear	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|146aa|up_8|AP018319.1_55753_56191_-,NA|133aa|up_7|AP018319.1_56347_56746_-,NA|187aa|up_6|AP018319.1_56852_57413_-,NA|249aa|up_5|AP018319.1_57488_58235_-,NA|107aa|up_3|AP018319.1_58661_58982_-,NA|82aa|down_1|AP018319.1_64058_64304_+,NA|99aa|down_8|AP018319.1_73891_74188_-	NA|197aa|up_9|AP018319.1_55058_55649_-	pfam04471, Mrr_cat, Restriction endonuclease	NA|146aa|up_8|AP018319.1_55753_56191_-	NA	NA|133aa|up_7|AP018319.1_56347_56746_-	NA	NA|187aa|up_6|AP018319.1_56852_57413_-	NA	NA|249aa|up_5|AP018319.1_57488_58235_-	NA	NA|118aa|up_4|AP018319.1_58281_58635_-	pfam10551, MULE, MULE transposase domain	NA|107aa|up_3|AP018319.1_58661_58982_-	NA	NA|147aa|up_2|AP018319.1_59884_60325_+	cd03784, GT1_Gtf-like, UDP-glycosyltransferases and similar proteins	NA|223aa|up_1|AP018319.1_60480_61149_+	cd03784, GT1_Gtf-like, UDP-glycosyltransferases and similar proteins	cas1|350aa|up_0|AP018319.1_61175_62225_+	pfam01867, Cas_Cas1, CRISPR associated protein Cas1	NA|243aa|down_0|AP018319.1_63023_63752_+	CHL00148, orf27, Ycf27; Reviewed	NA|82aa|down_1|AP018319.1_64058_64304_+	NA	NA|1044aa|down_2|AP018319.1_64580_67712_-	PRK10060, PRK10060, cyclic di-GMP phosphodiesterase	NA|280aa|down_3|AP018319.1_68240_69080_-	cd19138, AKR_YeaE, Escherichia coli YeaE and similar proteins	NA|188aa|down_4|AP018319.1_69110_69674_-	cd03134, GATase1_PfpI_like, A type 1 glutamine amidotransferase (GATase1)-like domain found in PfpI from Pyrococcus furiosus	NA|393aa|down_5|AP018319.1_69728_70907_-	cd08283, FDH_like_1, Glutathione-dependent formaldehyde dehydrogenase related proteins, child 1	NA|390aa|down_6|AP018319.1_71031_72201_-	cd08283, FDH_like_1, Glutathione-dependent formaldehyde dehydrogenase related proteins, child 1	NA|168aa|down_7|AP018319.1_72286_72790_-	COG5637, COG5637, Predicted integral membrane protein [Function unknown]	NA|99aa|down_8|AP018319.1_73891_74188_-	NA	NA|173aa|down_9|AP018319.1_74236_74755_-	cd06554, ASCH_ASC-1_like, ASC-1 homology domain, ASC-1-like subfamily
GCA_003990705.1_ASM399070v1	AP018319	Nostoc sp. HK-01 plasmid plasmid1 DNA, complete genome	3	210450-210573	3	CRISPRCasFinder	no		Cas9_archaeal,cas1,RT,cas14j,Cas14c_CAS-V-F,c2c9_V-U4,cas6	Orphan	CTGGGTTTTTATCGTGTATAACCGGGTTTTTATCGGGT	38	0	0	NA	NA	NA	1	1	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|1733aa|up_9|AP018319.1_199014_204213_-,NA|181aa|up_8|AP018319.1_204259_204802_+,NA|211aa|up_7|AP018319.1_204798_205431_-,NA|90aa|up_2|AP018319.1_208438_208708_-,NA|152aa|up_0|AP018319.1_209945_210401_-,NA|204aa|down_3|AP018319.1_214684_215296_+,NA|78aa|down_4|AP018319.1_215310_215544_+,NA|272aa|down_5|AP018319.1_215540_216356_-,NA|145aa|down_7|AP018319.1_218969_219404_-	NA|1733aa|up_9|AP018319.1_199014_204213_-	NA	NA|181aa|up_8|AP018319.1_204259_204802_+	NA	NA|211aa|up_7|AP018319.1_204798_205431_-	NA	NA|230aa|up_6|AP018319.1_205466_206156_-	COG2197, CitB, Response regulator containing a CheY-like receiver domain and an HTH DNA-binding domain [Signal transduction mechanisms / Transcription]	NA|177aa|up_5|AP018319.1_206343_206874_-	pfam02643, DUF192, Uncharacterized ACR, COG1430	NA|190aa|up_4|AP018319.1_206876_207446_-	COG0464, SpoVK, ATPases of the AAA+ class [Posttranslational modification, protein turnover, chaperones]	NA|314aa|up_3|AP018319.1_207488_208430_-	pfam13737, DDE_Tnp_1_5, Transposase DDE domain	NA|90aa|up_2|AP018319.1_208438_208708_-	NA	NA|412aa|up_1|AP018319.1_208704_209940_-	cd10227, ParM_like, Plasmid segregation protein ParM and similar proteins	NA|152aa|up_0|AP018319.1_209945_210401_-	NA	NA|603aa|down_0|AP018319.1_210774_212583_+	pfam01076, Mob_Pre, Plasmid recombination enzyme	NA|302aa|down_1|AP018319.1_212795_213701_+	cd05386, TraL, transfer origin protein TraL	NA|250aa|down_2|AP018319.1_213675_214425_+	COG1341, COG1341, Predicted GTPase or GTP-binding protein [General function prediction only]	NA|204aa|down_3|AP018319.1_214684_215296_+	NA	NA|78aa|down_4|AP018319.1_215310_215544_+	NA	NA|272aa|down_5|AP018319.1_215540_216356_-	NA	NA|842aa|down_6|AP018319.1_216388_218914_-	pfam12965, DUF3854, Domain of unknown function (DUF3854)	NA|145aa|down_7|AP018319.1_218969_219404_-	NA	NA|369aa|down_8|AP018319.1_219533_220640_-	COG3505, VirD4, Type IV secretory pathway, VirD4 components [Intracellular trafficking and secretion]	NA|474aa|down_9|AP018319.1_220748_222170_+	pfam02281, Dimer_Tnp_Tn5, Transposase Tn5 dimerization domain
GCA_003990705.1_ASM399070v1	AP018319	Nostoc sp. HK-01 plasmid plasmid1 DNA, complete genome	4	481847-481931	4	CRISPRCasFinder	no		Cas9_archaeal,cas1,RT,cas14j,Cas14c_CAS-V-F,c2c9_V-U4,cas6	Orphan	TTTAGGGGTTATATGACCCCCATTT	25	0	0	NA	NA	NA	1	1	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|95aa|up_8|AP018319.1_473774_474059_-,NA|415aa|up_6|AP018319.1_476588_477833_-,NA|76aa|up_5|AP018319.1_477894_478122_-,NA|102aa|up_4|AP018319.1_478168_478474_-,NA|134aa|up_1|AP018319.1_480499_480901_+,NA|172aa|down_0|AP018319.1_482240_482756_-	NA|84aa|up_9|AP018319.1_473350_473602_+	cd00586, 4HBT, 4-hydroxybenzoyl-CoA thioesterase (4HBT)	NA|95aa|up_8|AP018319.1_473774_474059_-	NA	NA|749aa|up_7|AP018319.1_474085_476332_-	TIGR01448, recD_rel, helicase, putative, RecD/TraA family	NA|415aa|up_6|AP018319.1_476588_477833_-	NA	NA|76aa|up_5|AP018319.1_477894_478122_-	NA	NA|102aa|up_4|AP018319.1_478168_478474_-	NA	NA|130aa|up_3|AP018319.1_478494_478884_-	cd10227, ParM_like, Plasmid segregation protein ParM and similar proteins	NA|474aa|up_2|AP018319.1_478992_480414_+	pfam02281, Dimer_Tnp_Tn5, Transposase Tn5 dimerization domain	NA|134aa|up_1|AP018319.1_480499_480901_+	NA	NA|219aa|up_0|AP018319.1_480881_481538_-	cd10227, ParM_like, Plasmid segregation protein ParM and similar proteins	NA|172aa|down_0|AP018319.1_482240_482756_-	NA	NA|604aa|down_1|AP018319.1_482867_484679_+	COG5421, COG5421, Transposase [DNA replication, recombination, and repair]	NA|NA	NA	NA|NA	NA	NA|NA	NA	NA|NA	NA	NA|NA	NA	NA|NA	NA	NA|NA	NA	NA|NA	NA
GCA_003990705.1_ASM399070v1	AP018320	Nostoc sp. HK-01 plasmid plasmid2 DNA, complete genome	1	9345-9422	1	CRISPRCasFinder	no		RT,cas3,cas6,DEDDh	Orphan	AATCATCCTTTTGGCGTTTTTAGC	24	0	0	NA	NA	NA	1	1	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|268aa|up_5|AP018320.1_2115_2919_-,NA|374aa|up_4|AP018320.1_3237_4359_-,NA|239aa|up_3|AP018320.1_4360_5077_-,NA|118aa|up_1|AP018320.1_7907_8261_-,NA|197aa|up_0|AP018320.1_8360_8951_-,NA|270aa|down_1|AP018320.1_23358_24168_-,NA|357aa|down_2|AP018320.1_24465_25536_-,NA|182aa|down_6|AP018320.1_31733_32279_-,NA|45aa|down_7|AP018320.1_32924_33059_-,NA|59aa|down_8|AP018320.1_33033_33210_-	NA|NA	NA	NA|NA	NA	NA|NA	NA	NA|519aa|up_6|AP018320.1_562_2119_-	pfam03743, TrbI, Bacterial conjugation TrbI-like protein	NA|268aa|up_5|AP018320.1_2115_2919_-	NA	NA|374aa|up_4|AP018320.1_3237_4359_-	NA	NA|239aa|up_3|AP018320.1_4360_5077_-	NA	NA|925aa|up_2|AP018320.1_5076_7851_-	COG3451, VirB4, Type IV secretory pathway, VirB4 components [Intracellular trafficking and secretion]	NA|118aa|up_1|AP018320.1_7907_8261_-	NA	NA|197aa|up_0|AP018320.1_8360_8951_-	NA	NA|4203aa|down_0|AP018320.1_10244_22853_+	COG2931, COG2931, RTX toxins and related Ca2+-binding proteins [Secondary metabolites biosynthesis, transport, and catabolism]	NA|270aa|down_1|AP018320.1_23358_24168_-	NA	NA|357aa|down_2|AP018320.1_24465_25536_-	NA	NA|262aa|down_3|AP018320.1_25796_26582_-	cd17933, DEXSc_RecD-like, DEXS-box helicase domain of RecD and similar proteins	NA|304aa|down_4|AP018320.1_27079_27991_-	PRK00236, xerC, site-specific tyrosine recombinase XerC; Reviewed	NA|1156aa|down_5|AP018320.1_28266_31734_-	pfam12965, DUF3854, Domain of unknown function (DUF3854)	NA|182aa|down_6|AP018320.1_31733_32279_-	NA	NA|45aa|down_7|AP018320.1_32924_33059_-	NA	NA|59aa|down_8|AP018320.1_33033_33210_-	NA	NA|508aa|down_9|AP018320.1_33221_34745_-	pfam00931, NB-ARC, NB-ARC domain
GCA_003990705.1_ASM399070v1	AP018320	Nostoc sp. HK-01 plasmid plasmid2 DNA, complete genome	2	26643-26742	2	CRISPRCasFinder	no		RT,cas3,cas6,DEDDh	Orphan	GCTATCAATTTTGATATCAATTTC	24	0	0	NA	NA	NA	1	1	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|268aa|up_9|AP018320.1_2115_2919_-,NA|374aa|up_8|AP018320.1_3237_4359_-,NA|239aa|up_7|AP018320.1_4360_5077_-,NA|118aa|up_5|AP018320.1_7907_8261_-,NA|197aa|up_4|AP018320.1_8360_8951_-,NA|270aa|up_2|AP018320.1_23358_24168_-,NA|357aa|up_1|AP018320.1_24465_25536_-,NA|182aa|down_2|AP018320.1_31733_32279_-,NA|45aa|down_3|AP018320.1_32924_33059_-,NA|59aa|down_4|AP018320.1_33033_33210_-,NA|320aa|down_7|AP018320.1_37189_38149_+	NA|268aa|up_9|AP018320.1_2115_2919_-	NA	NA|374aa|up_8|AP018320.1_3237_4359_-	NA	NA|239aa|up_7|AP018320.1_4360_5077_-	NA	NA|925aa|up_6|AP018320.1_5076_7851_-	COG3451, VirB4, Type IV secretory pathway, VirB4 components [Intracellular trafficking and secretion]	NA|118aa|up_5|AP018320.1_7907_8261_-	NA	NA|197aa|up_4|AP018320.1_8360_8951_-	NA	NA|4203aa|up_3|AP018320.1_10244_22853_+	COG2931, COG2931, RTX toxins and related Ca2+-binding proteins [Secondary metabolites biosynthesis, transport, and catabolism]	NA|270aa|up_2|AP018320.1_23358_24168_-	NA	NA|357aa|up_1|AP018320.1_24465_25536_-	NA	NA|262aa|up_0|AP018320.1_25796_26582_-	cd17933, DEXSc_RecD-like, DEXS-box helicase domain of RecD and similar proteins	NA|304aa|down_0|AP018320.1_27079_27991_-	PRK00236, xerC, site-specific tyrosine recombinase XerC; Reviewed	NA|1156aa|down_1|AP018320.1_28266_31734_-	pfam12965, DUF3854, Domain of unknown function (DUF3854)	NA|182aa|down_2|AP018320.1_31733_32279_-	NA	NA|45aa|down_3|AP018320.1_32924_33059_-	NA	NA|59aa|down_4|AP018320.1_33033_33210_-	NA	NA|508aa|down_5|AP018320.1_33221_34745_-	pfam00931, NB-ARC, NB-ARC domain	NA|621aa|down_6|AP018320.1_35162_37025_+	cd14014, STKc_PknB_like, Catalytic domain of bacterial Serine/Threonine kinases, PknB and similar proteins	NA|320aa|down_7|AP018320.1_37189_38149_+	NA	NA|831aa|down_8|AP018320.1_38109_40602_-	COG4995, COG4995, Uncharacterized protein conserved in bacteria [Function unknown]	NA|866aa|down_9|AP018320.1_40604_43202_-	pfam05860, Haemagg_act, haemagglutination activity domain
GCA_003990705.1_ASM399070v1	AP018320	Nostoc sp. HK-01 plasmid plasmid2 DNA, complete genome	3	102249-103672	1,3,1	PILER-CR,CRISPRCasFinder,CRT	no	cas6,DEDDh	RT,cas3,cas6,DEDDh	Unclear	GTTTCCAAACTCGATTACCCCGCAAGGGGACTGAAAC,GTTTCCAAACTCGATTACCCCGCAAGGGGACTGAAAC,GTTTCCAAACTCGATTACCCCGCAAGGGGACTGAAAC	37,37,37	0	0	NA	NA	NA:NA:NA	16,19,19	19	Unclear	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|415aa|up_9|AP018320.1_86364_87609_-,NA|117aa|up_3|AP018320.1_98934_99285_-,NA|130aa|down_4|AP018320.1_107367_107757_+	NA|415aa|up_9|AP018320.1_86364_87609_-	NA	NA|1282aa|up_8|AP018320.1_87630_91476_-	NF033451, BREX_2_MTaseX, BREX-2 system adenine-specific DNA-methyltransferase PglX	NA|950aa|up_7|AP018320.1_91500_94350_-	cd18011, DEXDc_RapA, DEXH-box helicase domain of RapA	NA|481aa|up_6|AP018320.1_94773_96216_-	pfam15611, EH_Signature, EH_Signature domain	NA|218aa|up_5|AP018320.1_96227_96881_-	cd07185, OmpA_C-like, Peptidoglycan binding domains similar to the C-terminal domain of outer-membrane protein OmpA	NA|672aa|up_4|AP018320.1_96902_98918_-	TIGR02168, Chromosome_partition_protein_Smc, chromosome segregation protein SMC, common bacterial type	NA|117aa|up_3|AP018320.1_98934_99285_-	NA	NA|77aa|up_2|AP018320.1_100080_100311_+	COG5126, FRQ1, Ca2+-binding protein (EF-Hand superfamily) [Signal transduction mechanisms / Cytoskeleton / Cell division and chromosome partitioning / General function prediction only]	cas6|326aa|up_1|AP018320.1_100376_101354_+	COG5551, COG5551, CRISPR system related protein, RAMP superfamily [Defense    mechanisms]	NA|152aa|up_0|AP018320.1_101519_101975_+	COG2947, COG2947, Uncharacterized conserved protein [Function unknown]	NA|413aa|down_0|AP018320.1_104035_105274_-	COG2124, CypX, Cytochrome P450 [Secondary metabolites biosynthesis, transport, and catabolism]	NA|128aa|down_1|AP018320.1_105425_105809_+	pfam08463, EcoEI_R_C, EcoEI R protein C-terminal	NA|240aa|down_2|AP018320.1_105903_106623_-	smart00421, HTH_LUXR, helix_turn_helix, Lux Regulon	NA|143aa|down_3|AP018320.1_106810_107239_+	COG5331, COG5331, Uncharacterized protein conserved in bacteria [Function unknown]	NA|130aa|down_4|AP018320.1_107367_107757_+	NA	NA|284aa|down_5|AP018320.1_108060_108912_-	TIGR00027, Hypothetical_protein_Rv0893c/MT0917/Mb0917c	NA|183aa|down_6|AP018320.1_109029_109578_-	pfam13358, DDE_3, DDE superfamily endonuclease	NA|166aa|down_7|AP018320.1_109589_110087_-	COG3415, COG3415, Transposase and inactivated derivatives [DNA replication, recombination, and repair]	NA|339aa|down_8|AP018320.1_110218_111235_-	COG1637, COG1637, Predicted nuclease of the RecB family [DNA replication, recombination, and repair]	DEDDh|800aa|down_9|AP018320.1_111479_113879_+	smart00479, EXOIII, exonuclease domain in DNA-polymerase alpha and epsilon chain, ribonuclease T and other exonucleases
GCA_003990705.1_ASM399070v1	AP018320	Nostoc sp. HK-01 plasmid plasmid2 DNA, complete genome	4	211033-211153	4	CRISPRCasFinder	no		RT,cas3,cas6,DEDDh	Orphan	TGCTGAAAACCTTTGCTATACACTTGTTTCG	31	0	0	NA	NA	NA	1	1	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|315aa|up_9|AP018320.1_204622_205567_+,NA|111aa|up_7|AP018320.1_206492_206825_-,NA|173aa|up_6|AP018320.1_206825_207344_-,NA|99aa|up_5|AP018320.1_207505_207802_+,NA|95aa|up_3|AP018320.1_209053_209338_+,NA|63aa|up_1|AP018320.1_210053_210242_+,NA|242aa|up_0|AP018320.1_210244_210970_+,NA|177aa|down_0|AP018320.1_211201_211732_+,NA|131aa|down_3|AP018320.1_214540_214933_+,NA|149aa|down_4|AP018320.1_214925_215372_+,NA|37aa|down_5|AP018320.1_215331_215442_+,NA|92aa|down_6|AP018320.1_215507_215783_+,NA|106aa|down_7|AP018320.1_215775_216093_+	NA|315aa|up_9|AP018320.1_204622_205567_+	NA	NA|230aa|up_8|AP018320.1_205663_206353_+	pfam08852, DUF1822, Protein of unknown function (DUF1822)	NA|111aa|up_7|AP018320.1_206492_206825_-	NA	NA|173aa|up_6|AP018320.1_206825_207344_-	NA	NA|99aa|up_5|AP018320.1_207505_207802_+	NA	NA|314aa|up_4|AP018320.1_207889_208831_+	pfam13737, DDE_Tnp_1_5, Transposase DDE domain	NA|95aa|up_3|AP018320.1_209053_209338_+	NA	NA|198aa|up_2|AP018320.1_209334_209928_+	NF012221, MARTX_Nterm, MARTX multifunctional-autoprocessing repeats-in-toxin holotoxin RtxA	NA|63aa|up_1|AP018320.1_210053_210242_+	NA	NA|242aa|up_0|AP018320.1_210244_210970_+	NA	NA|177aa|down_0|AP018320.1_211201_211732_+	NA	NA|496aa|down_1|AP018320.1_211919_213407_+	pfam13401, AAA_22, AAA domain	NA|314aa|down_2|AP018320.1_213447_214389_-	pfam13737, DDE_Tnp_1_5, Transposase DDE domain	NA|131aa|down_3|AP018320.1_214540_214933_+	NA	NA|149aa|down_4|AP018320.1_214925_215372_+	NA	NA|37aa|down_5|AP018320.1_215331_215442_+	NA	NA|92aa|down_6|AP018320.1_215507_215783_+	NA	NA|106aa|down_7|AP018320.1_215775_216093_+	NA	NA|726aa|down_8|AP018320.1_216422_218600_-	pfam00656, Peptidase_C14, Caspase domain	NA|818aa|down_9|AP018320.1_218750_221204_-	pfam12770, CHAT, CHAT domain
GCA_003990705.1_ASM399070v1	AP018325	Nostoc sp. HK-01 plasmid plasmid7 DNA, complete genome	1	17805-17908	1	CRISPRCasFinder	no			Orphan	AGGCACATTAAAACTACTGCTAATCCAATGTACAGGCAA	39	0	0	NA	NA	NA	1	1	Orphan	c2c9_V-U4,Cas14u_CAS-V,DEDDh,csa3,c2c5_V-U5,RT,PD-DExK,2OG_CAS,cas6,cas4,cas1,cas2,cas3,DinG,csx1,Cas9_archaeal,cas14j,Cas14c_CAS-V-F	NA|251aa|up_9|AP018325.1_9602_10355_-,NA|179aa|up_8|AP018325.1_10351_10888_-,NA|239aa|up_7|AP018325.1_10913_11630_-,NA|101aa|up_6|AP018325.1_11643_11946_-,NA|66aa|up_5|AP018325.1_12045_12243_-,NA|89aa|up_4|AP018325.1_12251_12518_-,NA|86aa|up_3|AP018325.1_12514_12772_-,NA|65aa|up_1|AP018325.1_14138_14333_+,NA|186aa|down_0|AP018325.1_18203_18761_-,NA|171aa|down_2|AP018325.1_21063_21576_-,NA|171aa|down_5|AP018325.1_25709_26222_+,NA|196aa|down_6|AP018325.1_26497_27085_+,NA|134aa|down_7|AP018325.1_27065_27467_-	NA|251aa|up_9|AP018325.1_9602_10355_-	NA	NA|179aa|up_8|AP018325.1_10351_10888_-	NA	NA|239aa|up_7|AP018325.1_10913_11630_-	NA	NA|101aa|up_6|AP018325.1_11643_11946_-	NA	NA|66aa|up_5|AP018325.1_12045_12243_-	NA	NA|89aa|up_4|AP018325.1_12251_12518_-	NA	NA|86aa|up_3|AP018325.1_12514_12772_-	NA	NA|372aa|up_2|AP018325.1_12776_13892_-	cd10227, ParM_like, Plasmid segregation protein ParM and similar proteins	NA|65aa|up_1|AP018325.1_14138_14333_+	NA	NA|884aa|up_0|AP018325.1_14933_17585_+	COG3378, COG3378, Phage associated DNA primase [General function prediction only]	NA|186aa|down_0|AP018325.1_18203_18761_-	NA	NA|538aa|down_1|AP018325.1_19198_20812_+	pfam16927, HisKA_7TM, N-terminal 7TM region of histidine kinase	NA|171aa|down_2|AP018325.1_21063_21576_-	NA	NA|594aa|down_3|AP018325.1_21814_23596_+	COG4191, COG4191, Signal transduction histidine kinase regulating C4-dicarboxylate transport system [Signal transduction mechanisms]	NA|586aa|down_4|AP018325.1_23610_25368_-	pfam01076, Mob_Pre, Plasmid recombination enzyme	NA|171aa|down_5|AP018325.1_25709_26222_+	NA	NA|196aa|down_6|AP018325.1_26497_27085_+	NA	NA|134aa|down_7|AP018325.1_27065_27467_-	NA	NA|474aa|down_8|AP018325.1_27552_28974_-	pfam02281, Dimer_Tnp_Tn5, Transposase Tn5 dimerization domain	NA|NA	NA
