×
INTELLIGENT WORK FORUMS
FOR COMPUTER PROFESSIONALS

Log In

Come Join Us!

Are you a
Computer / IT professional?
Join Tek-Tips Forums!
  • Talk With Other Members
  • Be Notified Of Responses
    To Your Posts
  • Keyword Search
  • One-Click Access To Your
    Favorite Forums
  • Automated Signatures
    On Your Posts
  • Best Of All, It's Free!
  • Students Click Here

*Tek-Tips's functionality depends on members receiving e-mail. By joining you are opting in to receive e-mail.

Posting Guidelines

Promoting, selling, recruiting, coursework and thesis posting is forbidden.

Students Click Here

Jobs

Using sed to find matched patterns and append

Using sed to find matched patterns and append

Using sed to find matched patterns and append

(OP)
Hi,

I am a beginner and I would like to learn and explore sed as it is very powerful one liner to crunch huge data. I have thousands of data that I am working on. Below is the sample:-

CODE -->

chr1	OnePiece	Loc	01516	03251	.	-	.	g_id LINC01725 ;	t_id LINC01725:44 ;	g_alias_1 G233008 ;	g_alias_2 RP11-475O6.1 ;	g_alias_3 G233008.1 ;	g_alias_4 9930.1 ;	g_alias_5 G233008.5 ;	g_alias_6 LINC01725 ;	g_alias_7 LOC101927560 ;	t_alias_1 E457273 ;	t_alias_2 E457273.1 ;	t_alias_3 RP11-475O6.1-005 ;	t_alias_4 27496.1 ;	t_alias_5 NONHSAT004171 ;	t_alias_6 NR_119374 ;	t_alias_7 E457273.5 ;	t_alias_8 NR_119374.1 ;
chr1	OnePiece	Loc	49907	50022	.	-	.	g_id LINC01725 ;	t_id LINC01725:44 ;	g_alias_1 G233008 ;	g_alias_2 RP11-475O6.1 ;	g_alias_3 G233008.1 ;	g_alias_4 9930.1 ;	g_alias_5 G233008.5 ;	g_alias_6 LINC01725 ;	g_alias_7 LOC101927560 ;	t_alias_1 E457273 ;	t_alias_2 E457273.1 ;	t_alias_3 RP11-475O6.1-005 ;	t_alias_4 27496.1 ;	t_alias_5 NONHSAT004171 ;	t_alias_6 NR_119374 ;	t_alias_7 E457273.5 ;	t_alias_8 NR_119374.1 ;
chr1	OnePiece	Loc	60408	60546	.	-	.	g_id LINC01725 ;	t_id LINC01725:44 ;	g_alias_1 G233008 ;	g_alias_2 RP11-475O6.1 ;	g_alias_3 G233008.1 ;	g_alias_4 9930.1 ;	g_alias_5 G233008.5 ;	g_alias_6 LINC01725 ;	g_alias_7 LOC101927560 ;	t_alias_1 E457273 ;	t_alias_2 E457273.1 ;	t_alias_3 RP11-475O6.1-005 ;	t_alias_4 27496.1 ;	t_alias_5 NONHSAT004171 ;	t_alias_6 NR_119374 ;	t_alias_7 E457273.5 ;	t_alias_8 NR_119374.1 ;
chr16	OnePiece	Loc	92392	92726	.	-	.	g_id lnc-ZFHX3-27 ;	t_id lnc-ZFHX3-27:11 ;	g_alias_1 G249447 ;	g_alias_2 XLOC_012007 ;	g_alias_3 linc-ZFHX3-2 ;	g_alias_4 G261404.1 ;	g_alias_5 AC009120.4 ;	g_alias_6 176255.2 ;	g_alias_7 G261404.5 ;	g_alias_8 G261404.6 ;	g_alias_9 AC138627.1 ;	g_alias_10 LOC101928035 ;	t_alias_1 E510251 ;	t_alias_2 TCONS_00024274 ;	t_alias_3 E568137.1 ;	t_alias_4 AC009120.4-001 ;	t_alias_5 431686.1 ;	t_alias_6 NONHSAT143655 ;	t_alias_7 NR_104657 ;	t_alias_8 NR_104657.1 ;
chr16	OnePiece	Loc	05905	06165	.	-	.	g_id lnc-ZFHX3-27 ;	t_id lnc-ZFHX3-27:11 ;	g_alias_1 G249447 ;	g_alias_2 XLOC_012007 ;	g_alias_3 linc-ZFHX3-2 ;	g_alias_4 G261404.1 ;	g_alias_5 AC009120.4 ;	g_alias_6 176255.2 ;	g_alias_7 G261404.5 ;	g_alias_8 G261404.6 ;	g_alias_9 AC138627.1 ;	g_alias_10 LOC101928035 ;	t_alias_1 E510251 ;	t_alias_2 TCONS_00024274 ;	t_alias_3 E568137.1 ;	t_alias_4 AC009120.4-001 ;	t_alias_5 431686.1 ;	t_alias_6 NONHSAT143655 ;	t_alias_7 NR_104657 ;	t_alias_8 NR_104657.1 ;
chr16	OnePiece	Loc	10306	10505	.	-	.	g_id lnc-ZFHX3-27 ;	t_id lnc-ZFHX3-27:11 ;	g_alias_1 G249447 ;	g_alias_2 XLOC_012007 ;	g_alias_3 linc-ZFHX3-2 ;	g_alias_4 G261404.1 ;	g_alias_5 AC009120.4 ;	g_alias_6 176255.2 ;	g_alias_7 G261404.5 ;	g_alias_8 G261404.6 ;	g_alias_9 AC138627.1 ;	g_alias_10 LOC101928035 ;	t_alias_1 E510251 ;	t_alias_2 TCONS_00024274 ;	t_alias_3 E568137.1 ;	t_alias_4 AC009120.4-001 ;	t_alias_5 431686.1 ;	t_alias_6 NONHSAT143655 ;	t_alias_7 NR_104657 ;	t_alias_8 NR_104657.1 ;
chr16	OnePiece	Loc	15352	15521	.	-	.	g_id lnc-ZFHX3-27 ;	t_id lnc-ZFHX3-27:11 ;	g_alias_1 G249447 ;	g_alias_2 XLOC_012007 ;	g_alias_3 linc-ZFHX3-2 ;	g_alias_4 G261404.1 ;	g_alias_5 AC009120.4 ;	g_alias_6 176255.2 ;	g_alias_7 G261404.5 ;	g_alias_8 G261404.6 ;	g_alias_9 AC138627.1 ;	g_alias_10 LOC101928035 ;	t_alias_1 E510251 ;	t_alias_2 TCONS_00024274 ;	t_alias_3 E568137.1 ;	t_alias_4 AC009120.4-001 ;	t_alias_5 431686.1 ;	t_alias_6 NONHSAT143655 ;	t_alias_7 NR_104657 ;	t_alias_8 NR_104657.1 ; 

I need to find pattern " ;" and append the effected columns with quote (") and get the output as follows:-

CODE -->

chr1	OnePiece	Loc	01516	03251	.	-	.	g_id “LINC01725”;	t_id "LINC01725:44";	g_alias_1 "G233008";	g_alias_2 "RP11-475O6.1";	g_alias_3 "G233008.1";	g_alias_4 "9930.1";	g_alias_5 "G233008.5";	g_alias_6 "LINC01725";	g_alias_7 "LOC101927560";	t_alias_1 "E457273";	t_alias_2 "E457273.1";	t_alias_3 "RP11-475O6.1-005";	t_alias_4 "27496.1";	t_alias_5 "NONHSAT004171";	t_alias_6 "NR_119374";	t_alias_7 "E457273.5";	t_alias_8 "NR_119374.1";
chr1	OnePiece	Loc	49907	50022	.	-	.	g_id "LINC01725";	t_id "LINC01725:44";	g_alias_1 "G233008";	g_alias_2 "RP11-475O6.1";	g_alias_3 "G233008.1";	g_alias_4 "9930.1";	g_alias_5 "G233008.5";	g_alias_6 "LINC01725";	g_alias_7 "LOC101927560";	t_alias_1 "E457273";	t_alias_2 "E457273.1";	t_alias_3 "RP11-475O6.1-005";	t_alias_4 "27496.1";	t_alias_5 "NONHSAT004171";	t_alias_6 "NR_119374";	t_alias_7 "E457273.5";	t_alias_8 "NR_119374.1";
chr1	OnePiece	Loc	60408	60546	.	-	.	g_id "LINC01725";	t_id "LINC01725:44";	g_alias_1 "G233008";	g_alias_2 "RP11-475O6.1";	g_alias_3 "G233008.1";	g_alias_4 "9930.1";	g_alias_5 "G233008.5";	g_alias_6 "LINC01725";	g_alias_7 "LOC101927560";	t_alias_1 "E457273";	t_alias_2 "E457273.1";	t_alias_3 "RP11-475O6.1-005";	t_alias_4 "27496.1";	t_alias_5 "NONHSAT004171";	t_alias_6 "NR_119374";	t_alias_7 "E457273.5";	t_alias_8 "NR_119374.1";
chr16	OnePiece	Loc	92392	92726	.	-	.	g_id "lnc-ZFHX3-27";	t_id "lnc-ZFHX3-27:11";	g_alias_1 "G249447";	g_alias_2 "XLOC_012007";	g_alias_3 "linc-ZFHX3-2";	g_alias_4 "G261404.1";	g_alias_5 "AC009120.4";	g_alias_6 "176255.2";	g_alias_7 "G261404.5";	g_alias_8 "G261404.6";	g_alias_9 "AC138627.1";	g_alias_10 "LOC101928035";	t_alias_1 "E510251";	t_alias_2 "TCONS_00024274";	t_alias_3 "E568137.1";	t_alias_4 "AC009120.4-001";	t_alias_5 "431686.1";	t_alias_6 "NONHSAT143655";	t_alias_7 "NR_104657";	t_alias_8 "NR_104657.1";
chr16	OnePiece	Loc	05905	06165	.	-	.	g_id "lnc-ZFHX3-27";	t_id "lnc-ZFHX3-27:11";	g_alias_1 "G249447";	g_alias_2 "XLOC_012007";	g_alias_3 "linc-ZFHX3-2";	g_alias_4 "G261404.1";	g_alias_5 "AC009120.4";	g_alias_6 "176255.2";	g_alias_7 "G261404.5";	g_alias_8 "G261404.6";	g_alias_9 "AC138627.1";	g_alias_10 "LOC101928035";	t_alias_1 "E510251";	t_alias_2 "TCONS_00024274";	t_alias_3 "E568137.1";	t_alias_4 "AC009120.4-001";	t_alias_5 "431686.1";	t_alias_6 "NONHSAT143655";	t_alias_7 "NR_104657";	t_alias_8 "NR_104657.1";
chr16	OnePiece	Loc	10306	10505	.	-	.	g_id "lnc-ZFHX3-27";	t_id "lnc-ZFHX3-27:11";	g_alias_1 "G249447";	g_alias_2 "XLOC_012007";	g_alias_3 "linc-ZFHX3-2";	g_alias_4 "G261404.1";	g_alias_5 "AC009120.4";	g_alias_6 "176255.2";	g_alias_7 "G261404.5";	g_alias_8 "G261404.6";	g_alias_9 "AC138627.1";	g_alias_10 "LOC101928035";	t_alias_1 "E510251";	t_alias_2 "TCONS_00024274";	t_alias_3 "E568137.1";	t_alias_4 "AC009120.4-001";	t_alias_5 "431686.1";	t_alias_6 "NONHSAT43655";	t_alias_7 "NR_104657";	t_alias_8 "NR_104657.1";
chr16	OnePiece	Loc	15352	15521	.	-	.	g_id "lnc-ZFHX3-27";	t_id "lnc-ZFHX3-27:11";	g_alias_1 "G249447";	g_alias_2 "XLOC_012007";	g_alias_3 "linc-ZFHX3-2";	g_alias_4 "G261404.1";	g_alias_5 "AC009120.4";	g_alias_6 "176255.2";	g_alias_7 "G261404.5";	g_alias_8 "G261404.6";	g_alias_9 "AC138627.1";	g_alias_10 "LOC101928035";	t_alias_1 "E510251";	t_alias_2 "TCONS_00024274";	t_alias_3 "E568137.1";	t_alias_4 "AC009120.4-001";	t_alias_5 "431686.1";	t_alias_6 "NONHSAT143655";	t_alias_7 "NR_104657";	t_alias_8 "NR_104657.1"; 

I tried playing around with the following codes and it almost worked.

CODE -->

sed 's/ /\"/g' inputfile 

but couldn't get the output that I wanted as above, where there should be a blank space after the field id. For instance:-

The result should be g_id "lnc-ZFHX3-27"; NOT g_id"lnc-ZFHX3-27";

Just need a little bit more help here. thanks

RE: Using sed to find matched patterns and append

I suggest you post in one of these other forums, as your question is not AIX specific; you will get more people's eyes on it. One is for awk, which is a close relative to sed. The other is for General Unix Scripting.

https://www.tek-tips.com/threadminder.cfm?pid=271

https://www.tek-tips.com/threadminder.cfm?pid=822

==================================
advanced cognitive capabilities and other marketing buzzwords explained with sarcastic simplicity


Red Flag This Post

Please let us know here why this post is inappropriate. Reasons such as off-topic, duplicates, flames, illegal, vulgar, or students posting their homework.

Red Flag Submitted

Thank you for helping keep Tek-Tips Forums free from inappropriate posts.
The Tek-Tips staff will check this out and take appropriate action.

Reply To This Thread

Posting in the Tek-Tips forums is a member-only feature.

Click Here to join Tek-Tips and talk with other members! Already a Member? Login

Close Box

Join Tek-Tips® Today!

Join your peers on the Internet's largest technical computer professional community.
It's easy to join and it's free.

Here's Why Members Love Tek-Tips Forums:

Register now while it's still free!

Already a member? Close this window and log in.

Join Us             Close