Class | Bio::EMBL |
In: |
lib/bio/db/embl/embl.rb
|
Parent: | EMBLDB |
oc | -> | classification |
taxonomy classfication |
database references (DR). Returns an array of Bio::Sequence::DBLink objects.
# File lib/bio/db/embl/embl.rb, line 511 511: def dblinks 512: get('DR').split(/\n/).collect { |x| 513: Bio::Sequence::DBLink.parse_embl_DR_line(x) 514: } 515: end
returns DIVISION in the ID line.
# File lib/bio/db/embl/embl.rb, line 140 140: def division 141: id_line('DIVISION') 142: end
returns contents in the date (DT) line.
where <DT Hash> is:
{}
keys: ‘created’ and ‘updated‘
DT Line; date (2/entry)
# File lib/bio/db/embl/embl.rb, line 182 182: def dt(key=nil) 183: unless @data['DT'] 184: tmp = Hash.new 185: dt_line = self.get('DT').split(/\n/) 186: tmp['created'] = dt_line[0].sub(/\w{2} /,'').strip 187: tmp['updated'] = dt_line[1].sub(/\w{2} /,'').strip 188: @data['DT'] = tmp 189: end 190: if key 191: @data['DT'][key] 192: else 193: @data['DT'] 194: end 195: end
returns ENTRY_NAME in the ID line.
# File lib/bio/db/embl/embl.rb, line 117 117: def entry 118: id_line('ENTRY_NAME') 119: end
returns contents in the feature table (FT) lines.
same as features method in bio/db/genbank.rb
FT Line; feature table data (>=0)
# File lib/bio/db/embl/embl.rb, line 336 336: def ft 337: unless @data['FT'] 338: ary = Array.new 339: in_quote = false 340: @orig['FT'].each_line do |line| 341: next if line =~ /^FEATURES/ 342: 343: head = line[0,20].strip # feature key (source, CDS, ...) 344: body = line[20,60].chomp # feature value (position, /qualifier=) 345: if line =~ /^FT {3}(\S+)/ 346: ary.push([ $1, body ]) # [ feature, position, /q="data", ... ] 347: elsif body =~ /^ \// and not in_quote 348: ary.last.push(body) # /q="data..., /q=data, /q 349: 350: if body =~ /=" / and body !~ /"$/ 351: in_quote = true 352: end 353: 354: else 355: ary.last.last << body # ...data..., ...data..." 356: 357: if body =~ /"$/ 358: in_quote = false 359: end 360: end 361: end 362: 363: ary.map! do |subary| 364: parse_qualifiers(subary) 365: end 366: 367: @data['FT'] = ary.extend(Bio::Features::BackwardCompatibility) 368: end 369: if block_given? 370: @data['FT'].each do |feature| 371: yield feature 372: end 373: else 374: @data['FT'] 375: end 376: end
returns contents in the ID line.
where <ID Hash> is:
{'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String, 'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int}
ID Line
"ID ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."
DATA_CLASS = [‘standard’]
MOLECULE_TYPE: DNA RNA XXX
Code ( DIVISION )
EST (ESTs) PHG (Bacteriophage) FUN (Fungi) GSS (Genome survey) HTC (High Throughput cDNAs) HTG (HTGs) HUM (Human) INV (Invertebrates) ORG (Organelles) MAM (Other Mammals) VRT (Other Vertebrates) PLN (Plants) PRO (Prokaryotes) ROD (Rodents) SYN (Synthetic) STS (STSs) UNC (Unclassified) VRL (Viruses)
Rel 89- ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP. ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.
# File lib/bio/db/embl/embl.rb, line 89 89: def id_line(key=nil) 90: unless @data['ID'] 91: tmp = Hash.new 92: idline = fetch('ID').split(/; +/) 93: tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/) 94: if idline.first =~ /^SV/ 95: tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last 96: tmp['TOPOLOGY'] = idline.shift 97: tmp['MOLECULE_TYPE'] = idline.shift 98: tmp['DATA_CLASS'] = idline.shift 99: else 100: tmp['MOLECULE_TYPE'] = idline.shift 101: end 102: tmp['DIVISION'] = idline.shift 103: tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i 104: 105: @data['ID'] = tmp 106: end 107: 108: if key 109: @data['ID'][key] 110: else 111: @data['ID'] 112: end 113: end
returns MOLECULE_TYPE in the ID line.
# File lib/bio/db/embl/embl.rb, line 125 125: def molecule 126: id_line('MOLECULE_TYPE') 127: end
returns contents in the OS line.
where <OS Hash> is:
[{'name'=>'Human', 'os'=>'Homo sapiens'}, {'name'=>'Rat', 'os'=>'Rattus norveticus'}]
OS Line; organism species (>=1)
OS Trifolium repens (white clover)
Typically, OS line shows "Genus species (name)" style:
OS Genus species (name)
Other examples:
OS uncultured bacterium OS xxxxxx metagenome OS Cloning vector xxxxxxxx
Complicated examples:
OS Poeciliopsis gracilis (Poeciliopsis gracilis (Heckel, 1848)) OS Etmopterus sp. B Last & Stevens, 1994 (bristled lanternshark) OS Galaxias sp. D (Allibone et al., 1996) (Pool Burn galaxias) OS Sicydiinae sp. 'Keith et al., 2010' OS Acanthopagrus sp. 'Jean & Lee, 2008' OS Gaussia princeps (T. Scott, 1894) OS Rana sp. 8 Hillis & Wilcox, 2005 OS Contracaecum rudolphii C D'Amelio et al., 2007 OS Partula sp. 'Mt. Marau, Tahiti' OS Leptocephalus sp. 'type II larva' (Smith, 1989) OS Tayloria grandis (D.G.Long) Goffinet & A.J.Shaw, 2002 OS Non-A, non-B hepatitis virus OS Canidae (dog, coyote, wolf, fox) OS Salmonella enterica subsp. enterica serovar 4,[5],12:i:- OS Yersinia enterocolitica (type O:5,27) OS Influenza A virus (A/green-winged teal/OH/72/99(H6N1,4)) OS Influenza A virus (A/Beijing/352/1989,(highgrowth reassortant NIB26)(H3N2)) OS Recombinant Hepatitis C virus H77(5'UTR-NS2)/JFH1_V787A,Q1247L
# File lib/bio/db/embl/embl.rb, line 266 266: def os(num = nil) 267: unless @data['OS'] 268: os = Array.new 269: tmp = fetch('OS') 270: if /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d]) *\(([\w\d ]+)\)\s*\z/ =~ tmp 271: org = $1 272: os.push({'name' => $2, 'os' => $1}) 273: else 274: os.push({'name' => nil, 'os' => tmp}) 275: end 276: @data['OS'] = os 277: end 278: if num 279: # EX. "Trifolium repens (white clover)" 280: "#{@data['OS'][num]['os']} {#data['OS'][num]['name']" 281: end 282: @data['OS'] 283: end
release number when created
# File lib/bio/db/embl/embl.rb, line 471 471: def release_created 472: parse_release_version(self.dt['created'])[0] 473: end
release number when last updated
# File lib/bio/db/embl/embl.rb, line 466 466: def release_modified 467: parse_release_version(self.dt['updated'])[0] 468: end
returns the nucleotie sequence in this entry.
@orig[’’] as sequence bb Line; (blanks) sequence data (>=1)
# File lib/bio/db/embl/embl.rb, line 445 445: def seq 446: Bio::Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') ) 447: end
returns sequence header information in the sequence header (SQ) line.
where <SQ Hash> is:
{'ntlen' => Int, 'other' => Int, 'a' => Int, 'c' => Int, 'g' => Int, 't' => Int}
SQ Line; sequence header (1/entry)
SQ Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
# File lib/bio/db/embl/embl.rb, line 422 422: def sq(base = nil) 423: unless @data['SQ'] 424: fetch('SQ') =~ \ 425: /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/ 426: @data['SQ'] = {'ntlen' => $1.to_i, 'other' => $6.to_i, 427: 'a' => $2.to_i, 'c' => $3.to_i , 'g' => $4.to_i, 't' => $5.to_i} 428: else 429: @data['SQ'] 430: end 431: 432: if base 433: @data['SQ'][base.downcase] 434: else 435: @data['SQ'] 436: end 437: end
returns the version information in the sequence version (SV) line.
SV Line; sequence version (1/entry)
SV Accession.Version
# File lib/bio/db/embl/embl.rb, line 162 162: def sv 163: if (v = field_fetch('SV').sub(/;/,'')) == "" 164: [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.') 165: else 166: v 167: end 168: end
converts the entry to Bio::Sequence object
Arguments::
Returns: | Bio::Sequence object |
# File lib/bio/db/embl/embl.rb, line 530 530: def to_biosequence 531: Bio::Sequence.adapter(self, Bio::Sequence::Adapter::EMBL) 532: end