|
|
最近小弟要做一个类似于网页信息抓取的东西,该网站采用gbk编码,由于我抓取该网站html页面中的内容后,要将它做为某种文件的文件名保存,因为系统为utf-8编码,所以需要从gbk转至utf-8,但我使用iconv函数库进行转换,却屡次不能成功,所以想请朋友们帮忙看一下这段短代码.
小弟代码写的不是太好,请见谅。
谢谢!- /*function convert gbk to linux supported utf-8*/
- /*char *name是一个存有gbk编码的中文数据,以\0结尾*/
- int TC_convert_gbk_to_utf8(char *name)
- {
- /*定义两个缓冲区*/
- char buffer1[MAX_VIDEO_NAME_LENGTH];
- char buffer2[MAX_VIDEO_NAME_LENGTH];
- iconv_t cd;
- size_t in_len;
- size_t out_len = MAX_VIDEO_NAME_LENGTH - 1;
- /*作一些初始化*/
- memset(buffer1, '\0', sizeof(buffer1));
- memset(buffer2, '\0', sizeof(buffer2));
- /*get the max length to be converted*/
- in_len = strlen(name) + 1;
-
- /*copy the oringinal code to the temp buffer*/
- strcpy(buffer1, name);
- /*clear the orginale buffer*/
- memset(name, '\0', sizeof(name));
-
- /*create a convert descriptor*/
- cd = iconv_open("UTF-8", "GBK");
- if(cd == (iconv_t)-1)
- {
- printf("cannot get a convert descriptor!\n");
- return -1;
- }
-
- //下边一行代码总是出现段错误,但是name确实数据合法。
- if(iconv(cd, &buffer1, &in_len, &buffer2, &out_len) == 0)
- {
- /*copy back the converted character*/
- /*将内容写至name中带回*/
- strcpy(name, buffer2);
- /*close the descriptor*/
- iconv_close(cd);
- return 0;
- }
- /*close the descriptor*/
- iconv_close(cd);
- return -1;
- }
复制代码 |
|