Created
September 19, 2022 16:49
-
-
Save yssymmt/e16c9b513126d73ce0c55a73ff8f0f5c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
drop table jumbo.aud07_wordseq; | |
create table jumbo.aud07_wordseq ( | |
docid integer, | |
seqno integer, | |
cat varchar(10) character set unicode, | |
word varchar(10) character set unicode | |
) primary index(docid) | |
; | |
insert into jumbo.aud07_wordseq | |
select | |
a1.docid, | |
a1.seqno, | |
cat, | |
word | |
from ( | |
select | |
docid, | |
seqno, | |
case | |
when word='バスケット' then 'バスケットボール' | |
when pos in ('名詞','感動詞') then word | |
else norm end as word | |
from jumbo.aud06_mecab_vertical | |
where (pos='名詞' and not word in ('ん','.','!」','の')) | |
or pos in ('形容詞','感動詞') | |
or (pos='動詞' and not norm in ('いる','する','せる','つく','なる','れる')) | |
or (pos ='助動詞' and word='ない') | |
) a1 left outer join ( | |
select | |
docid, | |
cat | |
from jumbo.aud01_org | |
group by 1,2 | |
) a2 on a1.docid=a2.docid | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment