!ls
README.md data docs index.qmd
_quarto.yml design.ipynb images seq_analysis.ipynb
conda
docker
colab
To make google drive as colab working directory
from google.colab import drive
'/content/drive') drive.mount(
import os
'/content/drive/MyDrive/design_build') os.chdir(
!pwd
!wget https://media.addgene.org/snapgene-media/v1.7.9-0-g88a3305/sequences/222046/51c2cfab-a3b4-4d62-98df-0c77ec21164e/addgene-plasmid-50005-sequence-222046.gbk
!ls
README.md data docs index.qmd
_quarto.yml design.ipynb images seq_analysis.ipynb
Biopython is a collection of freely available Python tools for computational biology and bioinformatics.
from Bio import SeqIO
from pandas import DataFrame
= SeqIO.read("data/parts/addgene-plasmid-50005-sequence-222046.gbk", "genbank")
records
= []
features for feature in records.features:
features.append({"Label": feature.qualifiers.get("label", [""])[0],
"Strand": feature.strand,
"Start": feature.location.start,
"End": feature.location.end,
"Type": feature.type
})print(features)
DataFrame(features)
[{'Label': '', 'Strand': 1, 'Start': ExactPosition(0), 'End': ExactPosition(2686), 'Type': 'source'}, {'Label': 'pBR322ori-F', 'Strand': 1, 'Start': ExactPosition(117), 'End': ExactPosition(137), 'Type': 'primer_bind'}, {'Label': 'L4440', 'Strand': 1, 'Start': ExactPosition(370), 'End': ExactPosition(388), 'Type': 'primer_bind'}, {'Label': 'CAP binding site', 'Strand': 1, 'Start': ExactPosition(504), 'End': ExactPosition(526), 'Type': 'protein_bind'}, {'Label': 'lac promoter', 'Strand': 1, 'Start': ExactPosition(540), 'End': ExactPosition(571), 'Type': 'promoter'}, {'Label': 'lac operator', 'Strand': 1, 'Start': ExactPosition(578), 'End': ExactPosition(595), 'Type': 'protein_bind'}, {'Label': 'M13/pUC Reverse', 'Strand': 1, 'Start': ExactPosition(583), 'End': ExactPosition(606), 'Type': 'primer_bind'}, {'Label': 'M13 rev', 'Strand': 1, 'Start': ExactPosition(602), 'End': ExactPosition(619), 'Type': 'primer_bind'}, {'Label': 'M13 Reverse', 'Strand': 1, 'Start': ExactPosition(602), 'End': ExactPosition(619), 'Type': 'primer_bind'}, {'Label': 'lacZ-alpha', 'Strand': 1, 'Start': ExactPosition(614), 'End': ExactPosition(938), 'Type': 'CDS'}, {'Label': 'MCS', 'Strand': 1, 'Start': ExactPosition(631), 'End': ExactPosition(688), 'Type': 'misc_feature'}, {'Label': 'M13 Forward', 'Strand': -1, 'Start': ExactPosition(688), 'End': ExactPosition(706), 'Type': 'primer_bind'}, {'Label': 'M13 fwd', 'Strand': -1, 'Start': ExactPosition(688), 'End': ExactPosition(705), 'Type': 'primer_bind'}, {'Label': 'M13/pUC Forward', 'Strand': -1, 'Start': ExactPosition(697), 'End': ExactPosition(720), 'Type': 'primer_bind'}, {'Label': 'pRS-marker', 'Strand': -1, 'Start': ExactPosition(913), 'End': ExactPosition(933), 'Type': 'primer_bind'}, {'Label': "pGEX 3'", 'Strand': 1, 'Start': ExactPosition(1032), 'End': ExactPosition(1055), 'Type': 'primer_bind'}, {'Label': 'pBRforEco', 'Strand': -1, 'Start': ExactPosition(1092), 'End': ExactPosition(1111), 'Type': 'primer_bind'}, {'Label': 'AmpR promoter', 'Strand': 1, 'Start': ExactPosition(1178), 'End': ExactPosition(1283), 'Type': 'promoter'}, {'Label': 'AmpR', 'Strand': 1, 'Start': ExactPosition(1283), 'End': ExactPosition(2144), 'Type': 'CDS'}, {'Label': 'Amp-R', 'Strand': -1, 'Start': ExactPosition(1501), 'End': ExactPosition(1521), 'Type': 'primer_bind'}, {'Label': 'ori', 'Strand': 1, 'Start': ExactPosition(0), 'End': ExactPosition(2686), 'Type': 'rep_origin'}]
/home/haseong/anaconda3/envs/biopy/lib/python3.11/site-packages/Bio/SeqFeature.py:1040: BiopythonParserWarning: Attempting to fix invalid location '2315..217' as it looks like incorrect origin wrapping. Please fix input file, this could have unintended behavior.
warnings.warn(
/home/haseong/anaconda3/envs/biopy/lib/python3.11/site-packages/Bio/SeqFeature.py:230: BiopythonDeprecationWarning: Please use .location.strand rather than .strand
warnings.warn(
Label | Strand | Start | End | Type | |
---|---|---|---|---|---|
0 | 1 | 0 | 2686 | source | |
1 | pBR322ori-F | 1 | 117 | 137 | primer_bind |
2 | L4440 | 1 | 370 | 388 | primer_bind |
3 | CAP binding site | 1 | 504 | 526 | protein_bind |
4 | lac promoter | 1 | 540 | 571 | promoter |
5 | lac operator | 1 | 578 | 595 | protein_bind |
6 | M13/pUC Reverse | 1 | 583 | 606 | primer_bind |
7 | M13 rev | 1 | 602 | 619 | primer_bind |
8 | M13 Reverse | 1 | 602 | 619 | primer_bind |
9 | lacZ-alpha | 1 | 614 | 938 | CDS |
10 | MCS | 1 | 631 | 688 | misc_feature |
11 | M13 Forward | -1 | 688 | 706 | primer_bind |
12 | M13 fwd | -1 | 688 | 705 | primer_bind |
13 | M13/pUC Forward | -1 | 697 | 720 | primer_bind |
14 | pRS-marker | -1 | 913 | 933 | primer_bind |
15 | pGEX 3' | 1 | 1032 | 1055 | primer_bind |
16 | pBRforEco | -1 | 1092 | 1111 | primer_bind |
17 | AmpR promoter | 1 | 1178 | 1283 | promoter |
18 | AmpR | 1 | 1283 | 2144 | CDS |
19 | Amp-R | -1 | 1501 | 1521 | primer_bind |
20 | ori | 1 | 0 | 2686 | rep_origin |
primers It is uniquely focused on DNA assembly flows like Gibson Assembly and Golden Gate cloning. You can design primers while adding sequence to the 5’ ends of primers.
from primers import create, primers
from pandas import DataFrame
from random import sample, choices
= choices(["A", "T", "G", "C"], k=100)
myseq_list = "".join(myseq_list)
myseq print(myseq)
= create(myseq, add_fwd = "GGGG", add_rev = "TTTT")
fwd, rev # p1, p2 = primers(myseq, add_fwd = "GGGG", add_rev = "TTTT")
print(fwd)
print(rev)
## display table form
list(fwd.dict().values())[:-1], index = list(rev.dict().keys())[:-1])
DataFrame(
## default argument values
create
CGTGCCTCCATAAATAACTTGCAAGATTCTCACCATTCGAAGGTTCTCGACAAGGGGCGGGGGGTAAAAATAGCATTACTAGTTCGGATAAATCTGCCCT
Primer(seq='GGGGCGTGCCTCCATAAATAACTTG', len=25, tm=63.3, tm_total=70.6, gc=0.5, dg=0, fwd=True, off_target_count=0, scoring=Scoring(penalty=1.8, penalty_tm=1.3, penalty_tm_diff=0, penalty_gc=0.0, penalty_len=0.5, penalty_dg=0.0, penalty_off_target=0.0))
Primer(seq='TTTTAGGGCAGATTTATCCGAACTAGT', len=27, tm=63.4, tm_total=63.9, gc=0.4, dg=-0.37, fwd=False, off_target_count=0, scoring=Scoring(penalty=4.6, penalty_tm=1.4, penalty_tm_diff=0, penalty_gc=2.0, penalty_len=0.5, penalty_dg=0.7, penalty_off_target=0.0))
<function primers.primers.primers(seq: str, add_fwd: str = '', add_rev: str = '', add_fwd_len: Tuple[int, int] = (-1, -1), add_rev_len: Tuple[int, int] = (-1, -1), offtarget_check: str = '', optimal_tm: float = 62.0, optimal_gc: float = 0.5, optimal_len: int = 22, penalty_tm: float = 1.0, penalty_gc: float = 0.2, penalty_len: float = 0.5, penalty_tm_diff: float = 1.0, penalty_dg: float = 2.0, penalty_off_target: float = 20.0) -> Tuple[primers.primers.Primer, primers.primers.Primer]>
<function primers.primers.primers(seq: str, add_fwd: str = '', add_rev: str = '', add_fwd_len: Tuple[int, int] = (-1, -1), add_rev_len: Tuple[int, int] = (-1, -1), offtarget_check: str = '', optimal_tm: float = 62.0, optimal_gc: float = 0.5, optimal_len: int = 22, penalty_tm: float = 1.0, penalty_gc: float = 0.2, penalty_len: float = 0.5, penalty_tm_diff: float = 1.0, penalty_dg: float = 2.0, penalty_off_target: float = 20.0) -> Tuple[primers.primers.Primer, primers.primers.Primer]>
from primers import create
from random import choices
def print_primer_info(x):
from pandas import DataFrame
= DataFrame(list(x.dict().values())[:-1], index = list(x.dict().keys())[:-1])
df print(df)
= "GTCATATGCATTCGATGCGTTAGG"
primer_binding_seq = "".join(choices(["A", "T", "G", "C"], k=100))
rnd_seq1 = "".join(choices(["A", "T", "G", "C"], k=100))
rnd_seq2
= primer_binding_seq+rnd_seq1
myseq print(myseq)
len(myseq)
= create(myseq)
fwd, rev
print_primer_info(fwd)
## primer considering offtargets
= primer_binding_seq+rnd_seq1+primer_binding_seq+rnd_seq2
myseq2 = create(myseq2)
fwd2, rev
print_primer_info(fwd2)
## optimal_tm is ignored
= create(myseq2, optimal_tm = 62)
fwd2, rev print_primer_info(fwd2)
GTCATATGCATTCGATGCGTTAGGGACACCCATGGCAACATGTGGATATAACTCGGTGCTGAGGAAAACTTCATACGCTCTTGACGTTCTATGCATGAAGCCCTTTCAACGCACGCTTCATTCA
0
seq GTCATATGCATTCGATGCGT
len 20
tm 62.5
tm_total 62.5
gc 0.5
dg -0.56
fwd True
off_target_count 0
0
seq GTCATATGCATTCGATGCGTTAGGGA
len 26
tm 68.0
tm_total 68.0
gc 0.5
dg -0.56
fwd True
off_target_count 0
0
seq GTCATATGCATTCGATGCGTTAGGGA
len 26
tm 68.0
tm_total 68.0
gc 0.5
dg -0.56
fwd True
off_target_count 0
from pydna.dseqrecord import Dseqrecord
= Dseqrecord("ATGCGTTGC")
dsr dsr.figure()
Dseqrecord(-9) ATGCGTTGC TACGCAACG
from pydna.readers import read
= read("data/parts/addgene-plasmid-50005-sequence-222046.gbk")
p p.list_features()
/home/haseong/anaconda3/envs/biopy/lib/python3.11/site-packages/Bio/SeqFeature.py:1040: BiopythonParserWarning: Attempting to fix invalid location '2315..217' as it looks like incorrect origin wrapping. Please fix input file, this could have unintended behavior.
warnings.warn(
Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |
---|---|---|---|---|---|---|---|
0 | nd | --> | 0 | 2686 | 2686 | source | no |
1 | L:pBR322ori-F | --> | 117 | 137 | 20 | primer_bind | no |
2 | L:L4440 | --> | 370 | 388 | 18 | primer_bind | no |
3 | L:CAP binding si | --> | 504 | 526 | 22 | protein_bind | no |
4 | L:lac promoter | --> | 540 | 571 | 31 | promoter | no |
5 | L:lac operator | --> | 578 | 595 | 17 | protein_bind | no |
6 | L:M13/pUC Revers | --> | 583 | 606 | 23 | primer_bind | no |
7 | L:M13 rev | --> | 602 | 619 | 17 | primer_bind | no |
8 | L:M13 Reverse | --> | 602 | 619 | 17 | primer_bind | no |
9 | L:lacZ-alpha | --> | 614 | 938 | 324 | CDS | yes |
10 | L:MCS | --> | 631 | 688 | 57 | misc_feature | no |
11 | L:M13 Forward | <-- | 688 | 706 | 18 | primer_bind | no |
12 | L:M13 fwd | <-- | 688 | 705 | 17 | primer_bind | no |
13 | L:M13/pUC Forwar | <-- | 697 | 720 | 23 | primer_bind | no |
14 | L:pRS-marker | <-- | 913 | 933 | 20 | primer_bind | no |
15 | L:pGEX 3' | --> | 1032 | 1055 | 23 | primer_bind | no |
16 | L:pBRforEco | <-- | 1092 | 1111 | 19 | primer_bind | no |
17 | L:AmpR promoter | --> | 1178 | 1283 | 105 | promoter | no |
18 | L:AmpR | --> | 1283 | 2144 | 861 | CDS | yes |
19 | L:Amp-R | <-- | 1501 | 1521 | 20 | primer_bind | no |
20 | L:ori | --> | 0 | 2686 | 589 | rep_origin | no |
= p.extract_feature(10)
extracted_site extracted_site.seq
Dseq(-57)
AAGC..ATTC
TTCG..TAAG
pUC19 from Addgene
Remove BsaI site
Insert a part into MCS
Hinz, Aaron J., Benjamin Stenzler, and Alexandre J. Poulain. “Golden gate assembly of aerobic and anaerobic microbial bioreporters.” Applied and environmental microbiology 88.1 (2022): e01485-21.
from pydna.readers import read
from primers import create
= read("data/parts/pUC19-egfp.gb")
egfp
= egfp.list_features()
egfp_feature_list
display(egfp_feature_list)
= egfp.extract_feature(13)
myseq str(myseq.seq)
= create(str(myseq.seq), add_fwd = "GGGG", add_rev = "TTTT")
fwd, rev print(fwd)
/home/haseong/anaconda3/envs/biopy/lib/python3.11/site-packages/Bio/GenBank/Scanner.py:1528: BiopythonParserWarning: Attempting to parse malformed locus line:
'LOCUS pUC19-egfp 3371 bp DNA circular SYN 18-MAY-2024\n'
Found locus 'pUC19-egfp' size '3371' residue_type 'DNA'
Some fields may be wrong.
warnings.warn(
Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |
---|---|---|---|---|---|---|---|
0 | nd | --> | 0 | 3371 | 3371 | source | no |
1 | L:pBR322ori-F | --> | 117 | 137 | 20 | primer_bind | no |
2 | L:L4440 | --> | 370 | 388 | 18 | primer_bind | no |
3 | L:CAP binding si | --> | 504 | 526 | 22 | protein_bind | no |
4 | L:lac promoter | --> | 540 | 571 | 31 | promoter | no |
5 | L:lac operator | --> | 578 | 595 | 17 | protein_bind | no |
6 | L:M13/pUC Revers | --> | 583 | 606 | 23 | primer_bind | no |
7 | L:M13 rev | --> | 602 | 619 | 17 | primer_bind | no |
8 | L:M13 Reverse | --> | 602 | 619 | 17 | primer_bind | no |
9 | L:lacZ-alpha | --> | 614 | 1623 | 1009 | CDS | no |
10 | L:BsaI-F | --> | 631 | 638 | 7 | misc_feature | no |
11 | L:lacZ-alpha | --> | 638 | 642 | 4 | CDS | no |
12 | L:oh-f2-f3-F | --> | 638 | 642 | 4 | misc_feature | no |
13 | L:EGFP | --> | 642 | 1362 | 720 | CDS | yes |
14 | L:EGFP | --> | 642 | 1359 | 717 | CDS | no |
15 | L:EGFP-N | <-- | 687 | 709 | 22 | primer_bind | no |
16 | L:EXFP-R | <-- | 948 | 968 | 20 | primer_bind | no |
17 | L:EGFP-C | --> | 1295 | 1317 | 22 | primer_bind | no |
18 | L:oh-f3-f4-F | --> | 1362 | 1366 | 4 | misc_feature | no |
19 | L:BasI-R | --> | 1366 | 1373 | 7 | misc_feature | no |
20 | L:M13 Forward | <-- | 1373 | 1391 | 18 | primer_bind | no |
21 | L:M13 fwd | <-- | 1373 | 1390 | 17 | primer_bind | no |
22 | L:M13/pUC Forwar | <-- | 1382 | 1405 | 23 | primer_bind | no |
23 | L:pRS-marker | <-- | 1598 | 1618 | 20 | primer_bind | no |
24 | L:pGEX 3' | --> | 1717 | 1740 | 23 | primer_bind | no |
25 | L:pBRforEco | <-- | 1777 | 1796 | 19 | primer_bind | no |
26 | L:AmpR promoter | --> | 1863 | 1968 | 105 | promoter | no |
27 | L:AmpR | --> | 1968 | 2829 | 861 | CDS | yes |
28 | L:Amp-R | <-- | 2186 | 2206 | 20 | primer_bind | no |
29 | L:ori | --> | 0 | 3371 | 589 | rep_origin | no |
Primer(seq='GGGGATGGTGAGCAAGGGCG', len=20, tm=65.1, tm_total=72.1, gc=0.7, dg=0, fwd=True, off_target_count=0, scoring=Scoring(penalty=10.1, penalty_tm=3.1, penalty_tm_diff=0, penalty_gc=4.0, penalty_len=3.0, penalty_dg=0.0, penalty_off_target=0.0))
from pydna.all import pcr
= pcr(fwd.seq, rev.seq, egfp.extract_feature(13))
pcr_product pcr_product.figure()
5ATGGTGAGCAAGGGCG...CATGGACGAGCTGTACAAGTAA3
||||||||||||||||||||||
3GTACCTGCTCGACATGTTCATTTTTT5
5GGGGATGGTGAGCAAGGGCG3
||||||||||||||||
3TACCACTCGTTCCCGC...GTACCTGCTCGACATGTTCATT5
from primers import create
from pydna.all import pcr
= egfp.extract_feature(10)
frag1 = egfp.extract_feature(12)
frag2 = egfp.extract_feature(13)
frag3 = egfp.extract_feature(18)
frag4 = egfp.extract_feature(19)
frag5
= frag1+frag2+frag3+frag4+frag5
targetseq = create(str(targetseq.seq))
fwd, rev
= pcr(fwd.seq, rev.seq, targetseq)
pcr_product pcr_product.figure()
5GGTCTCAGTCAATGGTGA...TACAAGTAAGGGATGAGACC3
||||||||||||||||||||
3ATGTTCATTCCCTACTCTGG5
5GGTCTCAGTCAATGGTGA3
||||||||||||||||||
3CCAGAGTCAGTTACCACT...ATGTTCATTCCCTACTCTGG5
from Bio.Restriction import BsaI
= pcr_product.cut(BsaI)
cut_product print(len(cut_product))
0].figure())
display(cut_product[print()
1].figure())
display(cut_product[print()
2].figure()) display(cut_product[
3
Dseqrecord(-11)
GGTCTCA
CCAGAGTCAGT
Dseqrecord(-728)
GTCAATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAA
TACCACTCGTTCCCGCTCCTCGACAAGTGGCCCCACCACGGGTAGGACCAGCTCGACCTGCCGCTGCATTTGCCGGTGTTCAAGTCGCACAGGCCGCTCCCGCTCCCGCTACGGTGGATGCCGTTCGACTGGGACTTCAAGTAGACGTGGTGGCCGTTCGACGGGCACGGGACCGGGTGGGAGCACTGGTGGGACTGGATGCCGCACGTCACGAAGTCGGCGATGGGGCTGGTGTACTTCGTCGTGCTGAAGAAGTTCAGGCGGTACGGGCTTCCGATGCAGGTCCTCGCGTGGTAGAAGAAGTTCCTGCTGCCGTTGATGTTCTGGGCGCGGCTCCACTTCAAGCTCCCGCTGTGGGACCACTTGGCGTAGCTCGACTTCCCGTAGCTGAAGTTCCTCCTGCCGTTGTAGGACCCCGTGTTCGACCTCATGTTGATGTTGTCGGTGTTGCAGATATAGTACCGGCTGTTCGTCTTCTTGCCGTAGTTCCACTTGAAGTTCTAGGCGGTGTTGTAGCTCCTGCCGTCGCACGTCGAGCGGCTGGTGATGGTCGTCTTGTGGGGGTAGCCGCTGCCGGGGCACGACGACGGGCTGTTGGTGATGGACTCGTGGGTCAGGCGGGACTCGTTTCTGGGGTTGCTCTTCGCGCTAGTGTACCAGGACGACCTCAAGCACTGGCGGCGGCCCTAGTGAGAGCCGTACCTGCTCGACATGTTCATTCCCT
Dseqrecord(-11)
GGGATGAGACC
ACTCTGG
from pydna.assembly import Assembly
from pydna.common_sub_strings import terminal_overlap
= Assembly((cut_product[0], cut_product[1], cut_product[2]), algorithm = terminal_overlap, limit = 4)
asm
= asm.assemble_linear()
candidate
print(candidate[0])
0].figure() candidate[
Dseqrecord
circular: False
size: 742
ID: id
Name: name
Description: description
Number of features: 13
/molecule_type=DNA
Dseq(-742)
GGTC..GACC
CCAG..CTGG
name| 4
\/
/\
4|name| 4
\/
/\
4|name
from pydna.readers import read
## read the parts
= read("data/parts/pUC19-egfp.gb")
egfp = read("data/parts/pUC19-J23100.gb")
promoter = read("data/parts/pUC19-L2U3H03.gb")
terminator = read("data/parts/pUC19-RB0030.gb")
rbs
display(promoter.list_features())
display(rbs.list_features())
display(egfp.list_features()) display(terminator.list_features())
/home/haseong/anaconda3/envs/biopy/lib/python3.11/site-packages/Bio/GenBank/Scanner.py:1528: BiopythonParserWarning: Attempting to parse malformed locus line:
'LOCUS pUC19-egfp 3371 bp DNA circular SYN 18-MAY-2024\n'
Found locus 'pUC19-egfp' size '3371' residue_type 'DNA'
Some fields may be wrong.
warnings.warn(
/home/haseong/anaconda3/envs/biopy/lib/python3.11/site-packages/Bio/GenBank/Scanner.py:1528: BiopythonParserWarning: Attempting to parse malformed locus line:
'LOCUS pUC19-J23100 2686 bp DNA circular SYN 01-JUN-2024\n'
Found locus 'pUC19-J23100' size '2686' residue_type 'DNA'
Some fields may be wrong.
warnings.warn(
/home/haseong/anaconda3/envs/biopy/lib/python3.11/site-packages/Bio/GenBank/Scanner.py:1528: BiopythonParserWarning: Attempting to parse malformed locus line:
'LOCUS pUC19-L2U3H03 2688 bp DNA circular SYN 01-JUN-2024\n'
Found locus 'pUC19-L2U3H03' size '2688' residue_type 'DNA'
Some fields may be wrong.
warnings.warn(
/home/haseong/anaconda3/envs/biopy/lib/python3.11/site-packages/Bio/GenBank/Scanner.py:1528: BiopythonParserWarning: Attempting to parse malformed locus line:
'LOCUS pUC19-RB0030 2750 bp DNA circular SYN 01-JUN-2024\n'
Found locus 'pUC19-RB0030' size '2750' residue_type 'DNA'
Some fields may be wrong.
warnings.warn(
Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |
---|---|---|---|---|---|---|---|
0 | nd | --> | 0 | 2686 | 2686 | source | no |
1 | L:pBR322ori-F | --> | 117 | 137 | 20 | primer_bind | no |
2 | L:L4440 | --> | 370 | 388 | 18 | primer_bind | no |
3 | L:CAP binding si | --> | 504 | 526 | 22 | protein_bind | no |
4 | L:lac promoter | --> | 540 | 571 | 31 | promoter | no |
5 | L:lac operator | --> | 578 | 595 | 17 | protein_bind | no |
6 | L:M13/pUC Revers | --> | 583 | 606 | 23 | primer_bind | no |
7 | L:M13 rev | --> | 602 | 619 | 17 | primer_bind | no |
8 | L:M13 Reverse | --> | 602 | 619 | 17 | primer_bind | no |
9 | L:lacZ-alpha | --> | 614 | 938 | 324 | CDS | no |
10 | L:BsaI-F | --> | 631 | 638 | 7 | misc_feature | no |
11 | L:oh-v-f1-F | --> | 638 | 642 | 4 | misc_feature | no |
12 | L:BBa_J23100 | --> | 642 | 677 | 35 | promoter | no |
13 | L:oh-f1-f2-F | --> | 677 | 681 | 4 | misc_feature | no |
14 | L:BsaI-R | --> | 681 | 688 | 7 | misc_feature | no |
15 | L:M13 Forward | <-- | 688 | 706 | 18 | primer_bind | no |
16 | L:M13 fwd | <-- | 688 | 705 | 17 | primer_bind | no |
17 | L:M13/pUC Forwar | <-- | 697 | 720 | 23 | primer_bind | no |
18 | L:pRS-marker | <-- | 913 | 933 | 20 | primer_bind | no |
19 | L:pGEX 3' | --> | 1032 | 1055 | 23 | primer_bind | no |
20 | L:pBRforEco | <-- | 1092 | 1111 | 19 | primer_bind | no |
21 | L:AmpR promoter | --> | 1178 | 1283 | 105 | promoter | no |
22 | L:AmpR | --> | 1283 | 2144 | 861 | CDS | yes |
23 | L:Amp-R | <-- | 1501 | 1521 | 20 | primer_bind | no |
24 | L:ori | --> | 0 | 2686 | 589 | rep_origin | no |
Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |
---|---|---|---|---|---|---|---|
0 | nd | --> | 0 | 2750 | 2750 | source | no |
1 | L:pBR322ori-F | --> | 117 | 137 | 20 | primer_bind | no |
2 | L:L4440 | --> | 370 | 388 | 18 | primer_bind | no |
3 | L:CAP binding si | --> | 504 | 526 | 22 | protein_bind | no |
4 | L:lac promoter | --> | 540 | 571 | 31 | promoter | no |
5 | L:lac operator | --> | 578 | 595 | 17 | protein_bind | no |
6 | L:M13/pUC Revers | --> | 583 | 606 | 23 | primer_bind | no |
7 | L:M13 rev | --> | 602 | 619 | 17 | primer_bind | no |
8 | L:M13 Reverse | --> | 602 | 619 | 17 | primer_bind | no |
9 | L:BsaI-F | --> | 631 | 638 | 7 | misc_feature | no |
10 | L:oh-f1-f2-F | --> | 638 | 642 | 4 | misc_feature | no |
11 | L:R_BBa_B0030 | --> | 642 | 741 | 99 | misc_feature | no |
12 | L:sTRSV HHRz | --> | 644 | 694 | 50 | misc_RNA | no |
13 | N:strong bacteri | --> | 727 | 739 | 12 | RBS | no |
14 | L:oh-f2-f3-F | --> | 741 | 745 | 4 | misc_feature | no |
15 | L:BsaI-R | --> | 745 | 752 | 7 | misc_feature | no |
16 | L:M13 Forward | <-- | 752 | 770 | 18 | primer_bind | no |
17 | L:M13 fwd | <-- | 752 | 769 | 17 | primer_bind | no |
18 | L:M13/pUC Forwar | <-- | 761 | 784 | 23 | primer_bind | no |
19 | L:pRS-marker | <-- | 977 | 997 | 20 | primer_bind | no |
20 | L:pGEX 3' | --> | 1096 | 1119 | 23 | primer_bind | no |
21 | L:pBRforEco | <-- | 1156 | 1175 | 19 | primer_bind | no |
22 | L:AmpR promoter | --> | 1242 | 1347 | 105 | promoter | no |
23 | L:AmpR | --> | 1347 | 2208 | 861 | CDS | yes |
24 | L:Amp-R | <-- | 1565 | 1585 | 20 | primer_bind | no |
25 | L:ori | --> | 0 | 2750 | 589 | rep_origin | no |
Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |
---|---|---|---|---|---|---|---|
0 | nd | --> | 0 | 3371 | 3371 | source | no |
1 | L:pBR322ori-F | --> | 117 | 137 | 20 | primer_bind | no |
2 | L:L4440 | --> | 370 | 388 | 18 | primer_bind | no |
3 | L:CAP binding si | --> | 504 | 526 | 22 | protein_bind | no |
4 | L:lac promoter | --> | 540 | 571 | 31 | promoter | no |
5 | L:lac operator | --> | 578 | 595 | 17 | protein_bind | no |
6 | L:M13/pUC Revers | --> | 583 | 606 | 23 | primer_bind | no |
7 | L:M13 rev | --> | 602 | 619 | 17 | primer_bind | no |
8 | L:M13 Reverse | --> | 602 | 619 | 17 | primer_bind | no |
9 | L:lacZ-alpha | --> | 614 | 1623 | 1009 | CDS | no |
10 | L:BsaI-F | --> | 631 | 638 | 7 | misc_feature | no |
11 | L:lacZ-alpha | --> | 638 | 642 | 4 | CDS | no |
12 | L:oh-f2-f3-F | --> | 638 | 642 | 4 | misc_feature | no |
13 | L:EGFP | --> | 642 | 1362 | 720 | CDS | yes |
14 | L:EGFP | --> | 642 | 1359 | 717 | CDS | no |
15 | L:EGFP-N | <-- | 687 | 709 | 22 | primer_bind | no |
16 | L:EXFP-R | <-- | 948 | 968 | 20 | primer_bind | no |
17 | L:EGFP-C | --> | 1295 | 1317 | 22 | primer_bind | no |
18 | L:oh-f3-f4-F | --> | 1362 | 1366 | 4 | misc_feature | no |
19 | L:BsaI-R | --> | 1366 | 1373 | 7 | misc_feature | no |
20 | L:M13 Forward | <-- | 1373 | 1391 | 18 | primer_bind | no |
21 | L:M13 fwd | <-- | 1373 | 1390 | 17 | primer_bind | no |
22 | L:M13/pUC Forwar | <-- | 1382 | 1405 | 23 | primer_bind | no |
23 | L:pRS-marker | <-- | 1598 | 1618 | 20 | primer_bind | no |
24 | L:pGEX 3' | --> | 1717 | 1740 | 23 | primer_bind | no |
25 | L:pBRforEco | <-- | 1777 | 1796 | 19 | primer_bind | no |
26 | L:AmpR promoter | --> | 1863 | 1968 | 105 | promoter | no |
27 | L:AmpR | --> | 1968 | 2829 | 861 | CDS | yes |
28 | L:Amp-R | <-- | 2186 | 2206 | 20 | primer_bind | no |
29 | L:ori | --> | 0 | 3371 | 589 | rep_origin | no |
Ft# | Label or Note | Dir | Sta | End | Len | type | orf? |
---|---|---|---|---|---|---|---|
0 | nd | --> | 0 | 2688 | 2688 | source | no |
1 | L:pBR322ori-F | --> | 117 | 137 | 20 | primer_bind | no |
2 | L:L4440 | --> | 370 | 388 | 18 | primer_bind | no |
3 | L:CAP binding si | --> | 504 | 526 | 22 | protein_bind | no |
4 | L:lac promoter | --> | 540 | 571 | 31 | promoter | no |
5 | L:lac operator | --> | 578 | 595 | 17 | protein_bind | no |
6 | L:M13/pUC Revers | --> | 583 | 606 | 23 | primer_bind | no |
7 | L:M13 rev | --> | 602 | 619 | 17 | primer_bind | no |
8 | L:M13 Reverse | --> | 602 | 619 | 17 | primer_bind | no |
9 | L:BsaI-F | --> | 631 | 638 | 7 | misc_feature | no |
10 | L:oh-f3-f4-F | --> | 638 | 642 | 4 | misc_feature | no |
11 | L:L2U3H03 | --> | 642 | 679 | 37 | misc_feature | no |
12 | L:oh-f7-v-F | --> | 679 | 683 | 4 | misc_feature | no |
13 | L:BsaI-R | --> | 683 | 690 | 7 | misc_feature | no |
14 | L:M13 Forward | <-- | 690 | 708 | 18 | primer_bind | no |
15 | L:M13 fwd | <-- | 690 | 707 | 17 | primer_bind | no |
16 | L:M13/pUC Forwar | <-- | 699 | 722 | 23 | primer_bind | no |
17 | L:pRS-marker | <-- | 915 | 935 | 20 | primer_bind | no |
18 | L:pGEX 3' | --> | 1034 | 1057 | 23 | primer_bind | no |
19 | L:pBRforEco | <-- | 1094 | 1113 | 19 | primer_bind | no |
20 | L:AmpR promoter | --> | 1180 | 1285 | 105 | promoter | no |
21 | L:AmpR | --> | 1285 | 2146 | 861 | CDS | yes |
22 | L:Amp-R | <-- | 1503 | 1523 | 20 | primer_bind | no |
23 | L:ori | --> | 0 | 2688 | 589 | rep_origin | no |
## search by label and return the location
def get_positions_by_label(record, label) :
= record.features
feature_list = {'start': None, 'end': None}
pos for feature in feature_list:
if "label" in feature.qualifiers:
if feature.qualifiers['label'] == [label]:
'start'] = int(feature.location.start)
pos['end'] = int(feature.location.end)
pos[return pos
def get_record_between_labels(record, label1, label2) :
= {'start': get_positions_by_label(record, label1)['start'], 'end': get_positions_by_label(record, label2)['end']}
pos if pos['start'] == None or pos['end'] == None:
print("label not found")
return None
else:
return record[pos['start']:pos['end']]
= get_record_between_labels(egfp, "BsaI-F", "BsaI-R")
target_record # print("pos:", target_record)
display(target_record.figure()) target_record.seq
Dseqrecord(-742)
GGTCTCAGTCAATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAAGGGATGAGACC
CCAGAGTCAGTTACCACTCGTTCCCGCTCCTCGACAAGTGGCCCCACCACGGGTAGGACCAGCTCGACCTGCCGCTGCATTTGCCGGTGTTCAAGTCGCACAGGCCGCTCCCGCTCCCGCTACGGTGGATGCCGTTCGACTGGGACTTCAAGTAGACGTGGTGGCCGTTCGACGGGCACGGGACCGGGTGGGAGCACTGGTGGGACTGGATGCCGCACGTCACGAAGTCGGCGATGGGGCTGGTGTACTTCGTCGTGCTGAAGAAGTTCAGGCGGTACGGGCTTCCGATGCAGGTCCTCGCGTGGTAGAAGAAGTTCCTGCTGCCGTTGATGTTCTGGGCGCGGCTCCACTTCAAGCTCCCGCTGTGGGACCACTTGGCGTAGCTCGACTTCCCGTAGCTGAAGTTCCTCCTGCCGTTGTAGGACCCCGTGTTCGACCTCATGTTGATGTTGTCGGTGTTGCAGATATAGTACCGGCTGTTCGTCTTCTTGCCGTAGTTCCACTTGAAGTTCTAGGCGGTGTTGTAGCTCCTGCCGTCGCACGTCGAGCGGCTGGTGATGGTCGTCTTGTGGGGGTAGCCGCTGCCGGGGCACGACGACGGGCTGTTGGTGATGGACTCGTGGGTCAGGCGGGACTCGTTTCTGGGGTTGCTCTTCGCGCTAGTGTACCAGGACGACCTCAAGCACTGGCGGCGGCCCTAGTGAGAGCCGTACCTGCTCGACATGTTCATTCCCTACTCTGG
Dseq(-742)
GGTC..GACC
CCAG..CTGG
from pydna.readers import read
from primers import create, primers
from pydna.all import pcr
= "BsaI-F"
label1 = "BsaI-R"
label2
= get_record_between_labels(promoter, label1, label2)
promoter_target = get_record_between_labels(rbs, label1, label2)
rbs_target = get_record_between_labels(egfp, label1, label2)
egfp_target = get_record_between_labels(terminator, label1, label2)
terminator_target
= create(str(promoter_target.seq))
fwd, rev = pcr(fwd.seq, rev.seq, promoter)
promoter_pcr_product
display(promoter_pcr_product.figure())
= create(str(rbs_target.seq))
fwd, rev = pcr(fwd.seq, rev.seq, rbs)
rbs_pcr_product
display(rbs_pcr_product.figure())
= create(str(egfp_target.seq))
fwd, rev = pcr(fwd.seq, rev.seq, egfp)
egfp_pcr_product
display(egfp_pcr_product.figure())
= create(str(terminator_target.seq))
fwd, rev = pcr(fwd.seq, rev.seq, terminator)
terminator_pcr_product display(terminator_pcr_product.figure())
5GGTCTCAAAGCTTGACG...CTAGCCTCCAGAGACC3
||||||||||||||||
3GATCGGAGGTCTCTGG5
5GGTCTCAAAGCTTGACG3
|||||||||||||||||
3CCAGAGTTTCGAACTGC...GATCGGAGGTCTCTGG5
5GGTCTCACTCCAGCTG...GAGGAGAAATAGTCATGAGACC3
||||||||||||||||||||||
3CTCCTCTTTATCAGTACTCTGG5
5GGTCTCACTCCAGCTG3
||||||||||||||||
3CCAGAGTGAGGTCGAC...CTCCTCTTTATCAGTACTCTGG5
5GGTCTCAGTCAATGGTGA...TACAAGTAAGGGATGAGACC3
||||||||||||||||||||
3ATGTTCATTCCCTACTCTGG5
5GGTCTCAGTCAATGGTGA3
||||||||||||||||||
3CCAGAGTCAGTTACCACT...ATGTTCATTCCCTACTCTGG5
5GGTCTCAGGGATAGCG...TTGTTGAGCGAATGAGACC3
|||||||||||||||||||
3AACAACTCGCTTACTCTGG5
5GGTCTCAGGGATAGCG3
||||||||||||||||
3CCAGAGTCCCTATCGC...AACAACTCGCTTACTCTGG5
from Bio.Restriction import BsaI
from pydna.assembly import Assembly
from pydna.common_sub_strings import terminal_overlap
= promoter_pcr_product.cut(BsaI)
promoter_cut_product print(len(promoter_cut_product))
1].name = "promoter"
promoter_cut_product[1].figure())
display(promoter_cut_product[
= rbs_pcr_product.cut(BsaI)
rbs_cut_product print(len(rbs_cut_product))
1].name = "rbs"
rbs_cut_product[1].figure())
display(rbs_cut_product[
= egfp_pcr_product.cut(BsaI)
egfp_cut_product print(len(egfp_cut_product))
1].name = "egfp"
egfp_cut_product[1].figure())
display(egfp_cut_product[
= terminator_pcr_product.cut(BsaI)
terminator_cut_product print(len(terminator_cut_product))
1].name = "terminator"
terminator_cut_product[1].figure())
display(terminator_cut_product[
= Assembly((promoter_cut_product[1], rbs_cut_product[1], egfp_cut_product[1], terminator_cut_product[1]), algorithm = terminal_overlap, limit = 4)
asm
= asm.assemble_linear()
candidate print(len(candidate))
0].figure()
candidate[
3
3
3
3
1
Dseqrecord(-43)
AAGCTTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGC
AACTGCCGATCGAGTCAGGATCCATGTCACGATCGGAGG
Dseqrecord(-107)
CTCCAGCTGTCACCGGATGTGCTTTCCGGTCTGATGAGTCCGTGAGGACGAAACAGCCTCTACAAATAATTTTGTTTAATCTAGAGATTAAAGAGGAGAAATA
TCGACAGTGGCCTACACGAAAGGCCAGACTACTCAGGCACTCCTGCTTTGTCGGAGATGTTTATTAAAACAAATTAGATCTCTAATTTCTCCTCTTTATCAGT
Dseqrecord(-728)
GTCAATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAA
TACCACTCGTTCCCGCTCCTCGACAAGTGGCCCCACCACGGGTAGGACCAGCTCGACCTGCCGCTGCATTTGCCGGTGTTCAAGTCGCACAGGCCGCTCCCGCTCCCGCTACGGTGGATGCCGTTCGACTGGGACTTCAAGTAGACGTGGTGGCCGTTCGACGGGCACGGGACCGGGTGGGAGCACTGGTGGGACTGGATGCCGCACGTCACGAAGTCGGCGATGGGGCTGGTGTACTTCGTCGTGCTGAAGAAGTTCAGGCGGTACGGGCTTCCGATGCAGGTCCTCGCGTGGTAGAAGAAGTTCCTGCTGCCGTTGATGTTCTGGGCGCGGCTCCACTTCAAGCTCCCGCTGTGGGACCACTTGGCGTAGCTCGACTTCCCGTAGCTGAAGTTCCTCCTGCCGTTGTAGGACCCCGTGTTCGACCTCATGTTGATGTTGTCGGTGTTGCAGATATAGTACCGGCTGTTCGTCTTCTTGCCGTAGTTCCACTTGAAGTTCTAGGCGGTGTTGTAGCTCCTGCCGTCGCACGTCGAGCGGCTGGTGATGGTCGTCTTGTGGGGGTAGCCGCTGCCGGGGCACGACGACGGGCTGTTGGTGATGGACTCGTGGGTCAGGCGGGACTCGTTTCTGGGGTTGCTCTTCGCGCTAGTGTACCAGGACGACCTCAAGCACTGGCGGCGGCCCTAGTGAGAGCCGTACCTGCTCGACATGTTCATTCCCT
Dseqrecord(-45)
GGGATAGCGTGACCGGCGCATCGGTCACGCTATTTGTTGAG
ATCGCACTGGCCGCGTAGCCAGTGCGATAAACAACTCGCTT
promoter| 4
\/
/\
4|rbs| 4
\/
/\
4|egfp| 4
\/
/\
4|terminator
= Assembly((promoter_cut_product[0], rbs_cut_product[1], egfp_cut_product[1], terminator_cut_product[1]), algorithm = terminal_overlap, limit = 4)
asm
= asm.assemble_linear()
candidate print(len(candidate))
0