Skip to content

Instantly share code, notes, and snippets.

@yssymmt
Created September 19, 2022 16:49
Show Gist options
  • Save yssymmt/e16c9b513126d73ce0c55a73ff8f0f5c to your computer and use it in GitHub Desktop.
Save yssymmt/e16c9b513126d73ce0c55a73ff8f0f5c to your computer and use it in GitHub Desktop.
drop table jumbo.aud07_wordseq;
create table jumbo.aud07_wordseq (
docid integer,
seqno integer,
cat varchar(10) character set unicode,
word varchar(10) character set unicode
) primary index(docid)
;
insert into jumbo.aud07_wordseq
select
a1.docid,
a1.seqno,
cat,
word
from (
select
docid,
seqno,
case
when word='バスケット' then 'バスケットボール'
when pos in ('名詞','感動詞') then word
else norm end as word
from jumbo.aud06_mecab_vertical
where (pos='名詞' and not word in ('ん','.','!」','の'))
or pos in ('形容詞','感動詞')
or (pos='動詞' and not norm in ('いる','する','せる','つく','なる','れる'))
or (pos ='助動詞' and word='ない')
) a1 left outer join (
select
docid,
cat
from jumbo.aud01_org
group by 1,2
) a2 on a1.docid=a2.docid
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment