-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgff3_load_cds.pl
More file actions
78 lines (70 loc) · 2.15 KB
/
gff3_load_cds.pl
File metadata and controls
78 lines (70 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#/usr/bin/perl
############## need to sort according to Parent##############
use strict;
use Carp;
use Data::Dumper;
use XML::DOM;
use Chado::WriteChadoMac;
use Chado::PrettyPrintDom;
use GFF3::GFF3Rec;
use Model::Worm;
my $gfffile = $ARGV[0];
open my $gfffh, "<", $gfffile;
#need to be a tmpfile in the future
my$xmlfile = $ARGV[1];
open my $xmlfh, ">", $xmlfile;
my $doc = new XML::DOM::Document;
my $root = $doc->createElement("chado");
#append elegans as the default organism
my $worm = new Model::Worm({genus => 'Caenorhabditis',
species => 'elegans',
});
my $organism = $worm->write_organism($doc);
$root->appendChild($organism);
my @unique_ids;
my $rank = 0;
my $last_feature;
my $last_pep;
while (<$gfffh>) {
my $rec = new GFF3::GFF3Rec({line => $_});
#testing
#print $rec->get_seqid(), "\t";
#print $rec->get_source(), "\t";
#print $rec->get_type(), "\t";
#print $rec->get_start(), "\t";
#print $rec->get_end(), "\t";
#print $rec->get_score(), "\t";
#print $rec->get_strand(), "\t";
#print $rec->get_phase(), "\t";
#print "ID=", $rec->get_ID(), ";Parent=", join(",", @{$rec->get_Parent()}), "\n";
unless ($rec->get_worm()->equal($worm)) {
$root->appendChild($rec->get_worm()->write_organism($doc));
}
if (scalar grep { $_ eq $rec->get_ID() } @unique_ids) {
$rank++;
my $featureloc = $rec->write_featureloc($doc, $organism, $rank);
$last_feature->appendChild($featureloc);
} else {
$root->appendChild($last_feature) if $last_feature;
$root->appendChild($last_pep) if $last_pep;
$rank = 0;
push @unique_ids, $rec->get_ID();
my $feature = $rec->write_feature($doc, $organism, 'force');
for my $fr ($rec->write_feature_relationship($doc, $organism)) {
$feature->appendChild($fr);
}
my $featureloc = $rec->write_featureloc($doc, $organism);
$feature->appendChild($featureloc);
if ($rec->get_type() eq 'CDS') {
my $pep = $rec->write_polypeptide($doc, $organism, 'force');
$last_pep = $pep;
}
$last_feature = $feature;
}
}
$root->appendChild($last_feature);
$root->appendChild($last_pep);
$doc->appendChild($root);
pretty_print($root, $xmlfh);
close($gfffh);
close($xmlfh);